1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/math.h>
22 #include <xnnpack/math-stubs.h>
23
24
25 constexpr int kBlockSize = 1024;
26
27 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE_ADDSUB,positive_normal)28 TEST(ROUNDNE__SSE_ADDSUB, positive_normal) {
29 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
30 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
31 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
32 for (uint32_t i = 0; i < kBlockSize; i++) {
33 inputs[i] = uint32_as_float(n + i);
34 }
35 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
36 for (uint32_t i = 0; i < kBlockSize; i++) {
37 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
38 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
39 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
40 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
41 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
42 }
43 }
44 }
45
TEST(ROUNDNE__SSE_ADDSUB,negative_normal)46 TEST(ROUNDNE__SSE_ADDSUB, negative_normal) {
47 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
48 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
49 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
50 for (uint32_t i = 0; i < kBlockSize; i++) {
51 inputs[i] = uint32_as_float(n + i);
52 }
53 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
54 for (uint32_t i = 0; i < kBlockSize; i++) {
55 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
56 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
57 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
58 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
59 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
60 }
61 }
62 }
63
TEST(ROUNDNE__SSE_ADDSUB,positive_integral)64 TEST(ROUNDNE__SSE_ADDSUB, positive_integral) {
65 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
66 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
67 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
68 for (uint32_t i = 0; i < kBlockSize; i++) {
69 inputs[i] = uint32_as_float(n + i);
70 }
71 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
72 for (uint32_t i = 0; i < kBlockSize; i++) {
73 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
74 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
75 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
76 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
77 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
78 }
79 }
80 }
81
TEST(ROUNDNE__SSE_ADDSUB,negative_integral)82 TEST(ROUNDNE__SSE_ADDSUB, negative_integral) {
83 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
84 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
85 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
86 for (uint32_t i = 0; i < kBlockSize; i++) {
87 inputs[i] = uint32_as_float(n + i);
88 }
89 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
90 for (uint32_t i = 0; i < kBlockSize; i++) {
91 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
92 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
93 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
94 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
95 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
96 }
97 }
98 }
99
TEST(ROUNDNE__SSE_ADDSUB,positive_infinity)100 TEST(ROUNDNE__SSE_ADDSUB, positive_infinity) {
101 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
102 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
103 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
104 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
105 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
106 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
107 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
108 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
109 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
110 }
111
TEST(ROUNDNE__SSE_ADDSUB,negative_infinity)112 TEST(ROUNDNE__SSE_ADDSUB, negative_infinity) {
113 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
114 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
115 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
116 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
117 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
118 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
119 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
120 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
121 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
122 }
123
TEST(ROUNDNE__SSE_ADDSUB,positive_qnan)124 TEST(ROUNDNE__SSE_ADDSUB, positive_qnan) {
125 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
126 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
127 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
128 for (uint32_t i = 0; i < kBlockSize; i++) {
129 inputs[i] = uint32_as_float(n + i);
130 }
131 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
132 for (uint32_t i = 0; i < kBlockSize; i++) {
133 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
134 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
135 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
136 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
137 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
138 }
139 }
140 }
141
TEST(ROUNDNE__SSE_ADDSUB,negative_qnan)142 TEST(ROUNDNE__SSE_ADDSUB, negative_qnan) {
143 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
144 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
145 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
146 for (uint32_t i = 0; i < kBlockSize; i++) {
147 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
148 }
149 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
150 for (uint32_t i = 0; i < kBlockSize; i++) {
151 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
152 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
153 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
154 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
155 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
156 }
157 }
158 }
159
TEST(ROUNDNE__SSE_ADDSUB,positive_snan)160 TEST(ROUNDNE__SSE_ADDSUB, positive_snan) {
161 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
162 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
163 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
164 for (uint32_t i = 0; i < kBlockSize; i++) {
165 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
166 }
167 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
168 for (uint32_t i = 0; i < kBlockSize; i++) {
169 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
170 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
171 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
172 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
173 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
174 }
175 }
176 }
177
TEST(ROUNDNE__SSE_ADDSUB,negative_snan)178 TEST(ROUNDNE__SSE_ADDSUB, negative_snan) {
179 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
180 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
181 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
182 for (uint32_t i = 0; i < kBlockSize; i++) {
183 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
184 }
185 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
186 for (uint32_t i = 0; i < kBlockSize; i++) {
187 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
188 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
189 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
190 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
191 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
192 }
193 }
194 }
195
TEST(ROUNDNE__SSE_ADDSUB,positive_snan_to_qnan)196 TEST(ROUNDNE__SSE_ADDSUB, positive_snan_to_qnan) {
197 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
198 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
199 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
200 for (uint32_t i = 0; i < kBlockSize; i++) {
201 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
202 }
203 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
204 for (uint32_t i = 0; i < kBlockSize; i++) {
205 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
206 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
207 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
208 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
209 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
210 }
211 }
212 }
213
TEST(ROUNDNE__SSE_ADDSUB,negative_snan_to_qnan)214 TEST(ROUNDNE__SSE_ADDSUB, negative_snan_to_qnan) {
215 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
216 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
217 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
218 for (uint32_t i = 0; i < kBlockSize; i++) {
219 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
220 }
221 xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
222 for (uint32_t i = 0; i < kBlockSize; i++) {
223 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
224 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
225 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
226 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
227 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
228 }
229 }
230 }
231 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
232
233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE2_CVT,positive_normal)234 TEST(ROUNDNE__SSE2_CVT, positive_normal) {
235 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
236 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
237 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
238 for (uint32_t i = 0; i < kBlockSize; i++) {
239 inputs[i] = uint32_as_float(n + i);
240 }
241 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
242 for (uint32_t i = 0; i < kBlockSize; i++) {
243 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
244 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
245 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
246 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
247 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
248 }
249 }
250 }
251
TEST(ROUNDNE__SSE2_CVT,negative_normal)252 TEST(ROUNDNE__SSE2_CVT, negative_normal) {
253 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
254 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
255 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
256 for (uint32_t i = 0; i < kBlockSize; i++) {
257 inputs[i] = uint32_as_float(n + i);
258 }
259 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
260 for (uint32_t i = 0; i < kBlockSize; i++) {
261 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
262 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
263 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
264 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
265 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
266 }
267 }
268 }
269
TEST(ROUNDNE__SSE2_CVT,positive_integral)270 TEST(ROUNDNE__SSE2_CVT, positive_integral) {
271 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
272 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
273 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
274 for (uint32_t i = 0; i < kBlockSize; i++) {
275 inputs[i] = uint32_as_float(n + i);
276 }
277 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
278 for (uint32_t i = 0; i < kBlockSize; i++) {
279 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
280 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
281 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
282 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
283 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
284 }
285 }
286 }
287
TEST(ROUNDNE__SSE2_CVT,negative_integral)288 TEST(ROUNDNE__SSE2_CVT, negative_integral) {
289 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
290 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
291 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
292 for (uint32_t i = 0; i < kBlockSize; i++) {
293 inputs[i] = uint32_as_float(n + i);
294 }
295 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
296 for (uint32_t i = 0; i < kBlockSize; i++) {
297 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
298 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
299 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
300 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
301 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
302 }
303 }
304 }
305
TEST(ROUNDNE__SSE2_CVT,positive_infinity)306 TEST(ROUNDNE__SSE2_CVT, positive_infinity) {
307 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
308 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
309 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
310 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
311 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
312 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
313 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
314 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
315 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
316 }
317
TEST(ROUNDNE__SSE2_CVT,negative_infinity)318 TEST(ROUNDNE__SSE2_CVT, negative_infinity) {
319 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
320 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
321 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
322 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
323 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
324 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
325 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
326 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
327 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
328 }
329
TEST(ROUNDNE__SSE2_CVT,positive_qnan)330 TEST(ROUNDNE__SSE2_CVT, positive_qnan) {
331 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
332 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
333 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
334 for (uint32_t i = 0; i < kBlockSize; i++) {
335 inputs[i] = uint32_as_float(n + i);
336 }
337 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
338 for (uint32_t i = 0; i < kBlockSize; i++) {
339 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
340 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
341 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
342 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
343 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
344 }
345 }
346 }
347
TEST(ROUNDNE__SSE2_CVT,negative_qnan)348 TEST(ROUNDNE__SSE2_CVT, negative_qnan) {
349 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
350 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
351 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
352 for (uint32_t i = 0; i < kBlockSize; i++) {
353 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
354 }
355 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
356 for (uint32_t i = 0; i < kBlockSize; i++) {
357 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
358 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
359 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
360 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
361 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
362 }
363 }
364 }
365
TEST(ROUNDNE__SSE2_CVT,positive_snan)366 TEST(ROUNDNE__SSE2_CVT, positive_snan) {
367 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
368 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
369 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
370 for (uint32_t i = 0; i < kBlockSize; i++) {
371 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
372 }
373 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
374 for (uint32_t i = 0; i < kBlockSize; i++) {
375 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
376 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
377 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
378 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
379 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
380 }
381 }
382 }
383
TEST(ROUNDNE__SSE2_CVT,negative_snan)384 TEST(ROUNDNE__SSE2_CVT, negative_snan) {
385 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
386 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
387 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
388 for (uint32_t i = 0; i < kBlockSize; i++) {
389 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
390 }
391 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
392 for (uint32_t i = 0; i < kBlockSize; i++) {
393 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
394 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
395 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
396 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
397 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
398 }
399 }
400 }
401
TEST(ROUNDNE__SSE2_CVT,DISABLED_positive_snan_to_qnan)402 TEST(ROUNDNE__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
403 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
404 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
405 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
406 for (uint32_t i = 0; i < kBlockSize; i++) {
407 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
408 }
409 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
410 for (uint32_t i = 0; i < kBlockSize; i++) {
411 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
412 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
413 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
414 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
415 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
416 }
417 }
418 }
419
TEST(ROUNDNE__SSE2_CVT,DISABLED_negative_snan_to_qnan)420 TEST(ROUNDNE__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
421 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
422 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
423 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
424 for (uint32_t i = 0; i < kBlockSize; i++) {
425 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
426 }
427 xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
428 for (uint32_t i = 0; i < kBlockSize; i++) {
429 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
430 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
431 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
432 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
433 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
434 }
435 }
436 }
437 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
438
439 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(ROUNDNE__SSE41,positive_normal)440 TEST(ROUNDNE__SSE41, positive_normal) {
441 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
442 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
443 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
444 for (uint32_t i = 0; i < kBlockSize; i++) {
445 inputs[i] = uint32_as_float(n + i);
446 }
447 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
448 for (uint32_t i = 0; i < kBlockSize; i++) {
449 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
450 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
451 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
452 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
453 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
454 }
455 }
456 }
457
TEST(ROUNDNE__SSE41,negative_normal)458 TEST(ROUNDNE__SSE41, negative_normal) {
459 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
460 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
461 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
462 for (uint32_t i = 0; i < kBlockSize; i++) {
463 inputs[i] = uint32_as_float(n + i);
464 }
465 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
466 for (uint32_t i = 0; i < kBlockSize; i++) {
467 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
468 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
469 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
470 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
471 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
472 }
473 }
474 }
475
TEST(ROUNDNE__SSE41,positive_integral)476 TEST(ROUNDNE__SSE41, positive_integral) {
477 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
478 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
479 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
480 for (uint32_t i = 0; i < kBlockSize; i++) {
481 inputs[i] = uint32_as_float(n + i);
482 }
483 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
484 for (uint32_t i = 0; i < kBlockSize; i++) {
485 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
486 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
487 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
488 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
489 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
490 }
491 }
492 }
493
TEST(ROUNDNE__SSE41,negative_integral)494 TEST(ROUNDNE__SSE41, negative_integral) {
495 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
496 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
497 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
498 for (uint32_t i = 0; i < kBlockSize; i++) {
499 inputs[i] = uint32_as_float(n + i);
500 }
501 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
502 for (uint32_t i = 0; i < kBlockSize; i++) {
503 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
504 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
505 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
506 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
507 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
508 }
509 }
510 }
511
TEST(ROUNDNE__SSE41,positive_infinity)512 TEST(ROUNDNE__SSE41, positive_infinity) {
513 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
514 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
515 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
516 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
517 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
518 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
519 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
520 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
521 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
522 }
523
TEST(ROUNDNE__SSE41,negative_infinity)524 TEST(ROUNDNE__SSE41, negative_infinity) {
525 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
526 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
527 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
528 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
529 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
530 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
531 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
532 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
533 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
534 }
535
TEST(ROUNDNE__SSE41,positive_qnan)536 TEST(ROUNDNE__SSE41, positive_qnan) {
537 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
538 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
539 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
540 for (uint32_t i = 0; i < kBlockSize; i++) {
541 inputs[i] = uint32_as_float(n + i);
542 }
543 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
544 for (uint32_t i = 0; i < kBlockSize; i++) {
545 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
546 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
547 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
548 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
549 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
550 }
551 }
552 }
553
TEST(ROUNDNE__SSE41,negative_qnan)554 TEST(ROUNDNE__SSE41, negative_qnan) {
555 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
556 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
557 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
558 for (uint32_t i = 0; i < kBlockSize; i++) {
559 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
560 }
561 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
562 for (uint32_t i = 0; i < kBlockSize; i++) {
563 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
564 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
565 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
566 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
567 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
568 }
569 }
570 }
571
TEST(ROUNDNE__SSE41,positive_snan)572 TEST(ROUNDNE__SSE41, positive_snan) {
573 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
574 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
575 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
576 for (uint32_t i = 0; i < kBlockSize; i++) {
577 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
578 }
579 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
580 for (uint32_t i = 0; i < kBlockSize; i++) {
581 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
582 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
583 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
584 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
585 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
586 }
587 }
588 }
589
TEST(ROUNDNE__SSE41,negative_snan)590 TEST(ROUNDNE__SSE41, negative_snan) {
591 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
592 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
593 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
594 for (uint32_t i = 0; i < kBlockSize; i++) {
595 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
596 }
597 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
598 for (uint32_t i = 0; i < kBlockSize; i++) {
599 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
600 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
601 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
602 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
603 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
604 }
605 }
606 }
607
TEST(ROUNDNE__SSE41,positive_snan_to_qnan)608 TEST(ROUNDNE__SSE41, positive_snan_to_qnan) {
609 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
610 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
611 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
612 for (uint32_t i = 0; i < kBlockSize; i++) {
613 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
614 }
615 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
616 for (uint32_t i = 0; i < kBlockSize; i++) {
617 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
618 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
619 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
620 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
621 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
622 }
623 }
624 }
625
TEST(ROUNDNE__SSE41,negative_snan_to_qnan)626 TEST(ROUNDNE__SSE41, negative_snan_to_qnan) {
627 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
628 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
629 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
630 for (uint32_t i = 0; i < kBlockSize; i++) {
631 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
632 }
633 xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
634 for (uint32_t i = 0; i < kBlockSize; i++) {
635 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
636 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
637 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
638 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
639 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
640 }
641 }
642 }
643 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
644
645 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEON_ADDSUB,positive_normal)646 TEST(ROUNDNE__NEON_ADDSUB, positive_normal) {
647 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
648 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
649 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
650 for (uint32_t i = 0; i < kBlockSize; i++) {
651 inputs[i] = uint32_as_float(n + i);
652 }
653 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
654 for (uint32_t i = 0; i < kBlockSize; i++) {
655 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
656 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
657 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
658 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
659 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
660 }
661 }
662 }
663
TEST(ROUNDNE__NEON_ADDSUB,negative_normal)664 TEST(ROUNDNE__NEON_ADDSUB, negative_normal) {
665 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
666 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
667 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
668 for (uint32_t i = 0; i < kBlockSize; i++) {
669 inputs[i] = uint32_as_float(n + i);
670 }
671 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
672 for (uint32_t i = 0; i < kBlockSize; i++) {
673 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
674 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
676 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
677 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
678 }
679 }
680 }
681
TEST(ROUNDNE__NEON_ADDSUB,positive_integral)682 TEST(ROUNDNE__NEON_ADDSUB, positive_integral) {
683 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
684 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
685 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
686 for (uint32_t i = 0; i < kBlockSize; i++) {
687 inputs[i] = uint32_as_float(n + i);
688 }
689 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
690 for (uint32_t i = 0; i < kBlockSize; i++) {
691 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
692 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
693 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
694 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
695 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
696 }
697 }
698 }
699
TEST(ROUNDNE__NEON_ADDSUB,negative_integral)700 TEST(ROUNDNE__NEON_ADDSUB, negative_integral) {
701 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
702 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
703 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
704 for (uint32_t i = 0; i < kBlockSize; i++) {
705 inputs[i] = uint32_as_float(n + i);
706 }
707 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
708 for (uint32_t i = 0; i < kBlockSize; i++) {
709 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
710 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
711 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
712 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
713 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
714 }
715 }
716 }
717
TEST(ROUNDNE__NEON_ADDSUB,positive_infinity)718 TEST(ROUNDNE__NEON_ADDSUB, positive_infinity) {
719 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
720 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
721 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
722 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
723 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
724 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
725 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
726 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
727 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
728 }
729
TEST(ROUNDNE__NEON_ADDSUB,negative_infinity)730 TEST(ROUNDNE__NEON_ADDSUB, negative_infinity) {
731 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
732 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
733 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
734 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
735 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
736 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
737 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
738 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
739 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
740 }
741
TEST(ROUNDNE__NEON_ADDSUB,positive_qnan)742 TEST(ROUNDNE__NEON_ADDSUB, positive_qnan) {
743 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
744 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
745 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
746 for (uint32_t i = 0; i < kBlockSize; i++) {
747 inputs[i] = uint32_as_float(n + i);
748 }
749 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
750 for (uint32_t i = 0; i < kBlockSize; i++) {
751 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
752 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
753 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
754 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
755 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
756 }
757 }
758 }
759
TEST(ROUNDNE__NEON_ADDSUB,negative_qnan)760 TEST(ROUNDNE__NEON_ADDSUB, negative_qnan) {
761 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
762 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
763 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
764 for (uint32_t i = 0; i < kBlockSize; i++) {
765 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
766 }
767 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
768 for (uint32_t i = 0; i < kBlockSize; i++) {
769 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
770 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
771 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
772 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
773 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
774 }
775 }
776 }
777
TEST(ROUNDNE__NEON_ADDSUB,positive_snan)778 TEST(ROUNDNE__NEON_ADDSUB, positive_snan) {
779 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
780 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
781 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
782 for (uint32_t i = 0; i < kBlockSize; i++) {
783 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
784 }
785 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
786 for (uint32_t i = 0; i < kBlockSize; i++) {
787 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
788 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
789 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
790 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
791 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
792 }
793 }
794 }
795
TEST(ROUNDNE__NEON_ADDSUB,negative_snan)796 TEST(ROUNDNE__NEON_ADDSUB, negative_snan) {
797 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
798 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
799 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
800 for (uint32_t i = 0; i < kBlockSize; i++) {
801 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
802 }
803 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
804 for (uint32_t i = 0; i < kBlockSize; i++) {
805 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
806 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
807 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
808 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
809 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
810 }
811 }
812 }
813
TEST(ROUNDNE__NEON_ADDSUB,positive_snan_to_qnan)814 TEST(ROUNDNE__NEON_ADDSUB, positive_snan_to_qnan) {
815 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
817 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
818 for (uint32_t i = 0; i < kBlockSize; i++) {
819 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
820 }
821 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
822 for (uint32_t i = 0; i < kBlockSize; i++) {
823 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
824 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
825 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
826 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
827 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
828 }
829 }
830 }
831
TEST(ROUNDNE__NEON_ADDSUB,negative_snan_to_qnan)832 TEST(ROUNDNE__NEON_ADDSUB, negative_snan_to_qnan) {
833 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
834 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
835 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
836 for (uint32_t i = 0; i < kBlockSize; i++) {
837 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
838 }
839 xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
840 for (uint32_t i = 0; i < kBlockSize; i++) {
841 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
842 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
843 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
844 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
845 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
846 }
847 }
848 }
849 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
850
851 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(ROUNDNE__NEONV8,positive_normal)852 TEST(ROUNDNE__NEONV8, positive_normal) {
853 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
854 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
855 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
856 for (uint32_t i = 0; i < kBlockSize; i++) {
857 inputs[i] = uint32_as_float(n + i);
858 }
859 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
860 for (uint32_t i = 0; i < kBlockSize; i++) {
861 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
862 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
863 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
864 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
865 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
866 }
867 }
868 }
869
TEST(ROUNDNE__NEONV8,negative_normal)870 TEST(ROUNDNE__NEONV8, negative_normal) {
871 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
872 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
873 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
874 for (uint32_t i = 0; i < kBlockSize; i++) {
875 inputs[i] = uint32_as_float(n + i);
876 }
877 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
878 for (uint32_t i = 0; i < kBlockSize; i++) {
879 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
880 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
881 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
882 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
883 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
884 }
885 }
886 }
887
TEST(ROUNDNE__NEONV8,positive_integral)888 TEST(ROUNDNE__NEONV8, positive_integral) {
889 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
890 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
891 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
892 for (uint32_t i = 0; i < kBlockSize; i++) {
893 inputs[i] = uint32_as_float(n + i);
894 }
895 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
896 for (uint32_t i = 0; i < kBlockSize; i++) {
897 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
898 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
899 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
900 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
901 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
902 }
903 }
904 }
905
TEST(ROUNDNE__NEONV8,negative_integral)906 TEST(ROUNDNE__NEONV8, negative_integral) {
907 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
908 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
909 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
910 for (uint32_t i = 0; i < kBlockSize; i++) {
911 inputs[i] = uint32_as_float(n + i);
912 }
913 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
914 for (uint32_t i = 0; i < kBlockSize; i++) {
915 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
916 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
917 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
918 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
919 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
920 }
921 }
922 }
923
TEST(ROUNDNE__NEONV8,positive_infinity)924 TEST(ROUNDNE__NEONV8, positive_infinity) {
925 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
926 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
927 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
928 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
929 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
930 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
931 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
932 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
933 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
934 }
935
TEST(ROUNDNE__NEONV8,negative_infinity)936 TEST(ROUNDNE__NEONV8, negative_infinity) {
937 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
938 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
939 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
940 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
941 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
942 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
943 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
944 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
945 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
946 }
947
TEST(ROUNDNE__NEONV8,positive_qnan)948 TEST(ROUNDNE__NEONV8, positive_qnan) {
949 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
950 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
951 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
952 for (uint32_t i = 0; i < kBlockSize; i++) {
953 inputs[i] = uint32_as_float(n + i);
954 }
955 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
956 for (uint32_t i = 0; i < kBlockSize; i++) {
957 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
958 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
959 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
960 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
961 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
962 }
963 }
964 }
965
TEST(ROUNDNE__NEONV8,negative_qnan)966 TEST(ROUNDNE__NEONV8, negative_qnan) {
967 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
968 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
969 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
970 for (uint32_t i = 0; i < kBlockSize; i++) {
971 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
972 }
973 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
974 for (uint32_t i = 0; i < kBlockSize; i++) {
975 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
976 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
977 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
978 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
979 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
980 }
981 }
982 }
983
TEST(ROUNDNE__NEONV8,positive_snan)984 TEST(ROUNDNE__NEONV8, positive_snan) {
985 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
986 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
987 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
988 for (uint32_t i = 0; i < kBlockSize; i++) {
989 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
990 }
991 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
992 for (uint32_t i = 0; i < kBlockSize; i++) {
993 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
994 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
995 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
996 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
997 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
998 }
999 }
1000 }
1001
TEST(ROUNDNE__NEONV8,negative_snan)1002 TEST(ROUNDNE__NEONV8, negative_snan) {
1003 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1004 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1005 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1006 for (uint32_t i = 0; i < kBlockSize; i++) {
1007 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1008 }
1009 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1010 for (uint32_t i = 0; i < kBlockSize; i++) {
1011 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1012 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1013 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1014 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1015 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1016 }
1017 }
1018 }
1019
TEST(ROUNDNE__NEONV8,positive_snan_to_qnan)1020 TEST(ROUNDNE__NEONV8, positive_snan_to_qnan) {
1021 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1022 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1023 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1024 for (uint32_t i = 0; i < kBlockSize; i++) {
1025 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1026 }
1027 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1028 for (uint32_t i = 0; i < kBlockSize; i++) {
1029 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1030 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1031 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1032 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1033 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1034 }
1035 }
1036 }
1037
TEST(ROUNDNE__NEONV8,negative_snan_to_qnan)1038 TEST(ROUNDNE__NEONV8, negative_snan_to_qnan) {
1039 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1040 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1041 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1042 for (uint32_t i = 0; i < kBlockSize; i++) {
1043 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1044 }
1045 xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1046 for (uint32_t i = 0; i < kBlockSize; i++) {
1047 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1048 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1049 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1050 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1051 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1052 }
1053 }
1054 }
1055 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1056
1057 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_normal)1058 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_normal) {
1059 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1060 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1061 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1062 for (uint32_t i = 0; i < kBlockSize; i++) {
1063 inputs[i] = uint32_as_float(n + i);
1064 }
1065 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1066 for (uint32_t i = 0; i < kBlockSize; i++) {
1067 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1068 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1069 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1070 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1071 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1072 }
1073 }
1074 }
1075
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_normal)1076 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_normal) {
1077 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1078 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1079 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1080 for (uint32_t i = 0; i < kBlockSize; i++) {
1081 inputs[i] = uint32_as_float(n + i);
1082 }
1083 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1084 for (uint32_t i = 0; i < kBlockSize; i++) {
1085 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1086 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1087 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1088 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1089 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1090 }
1091 }
1092 }
1093
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_integral)1094 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_integral) {
1095 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1096 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1097 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1098 for (uint32_t i = 0; i < kBlockSize; i++) {
1099 inputs[i] = uint32_as_float(n + i);
1100 }
1101 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1102 for (uint32_t i = 0; i < kBlockSize; i++) {
1103 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1104 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1105 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1106 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1107 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1108 }
1109 }
1110 }
1111
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_integral)1112 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_integral) {
1113 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1114 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1115 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1116 for (uint32_t i = 0; i < kBlockSize; i++) {
1117 inputs[i] = uint32_as_float(n + i);
1118 }
1119 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1120 for (uint32_t i = 0; i < kBlockSize; i++) {
1121 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1122 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1123 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1124 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1125 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1126 }
1127 }
1128 }
1129
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_infinity)1130 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_infinity) {
1131 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1132 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1133 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1134 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1135 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1136 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1137 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1138 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1139 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1140 }
1141
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_infinity)1142 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_infinity) {
1143 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1144 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1145 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1146 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1147 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1148 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1149 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1150 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1151 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1152 }
1153
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_qnan)1154 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_qnan) {
1155 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1156 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1157 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1158 for (uint32_t i = 0; i < kBlockSize; i++) {
1159 inputs[i] = uint32_as_float(n + i);
1160 }
1161 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1162 for (uint32_t i = 0; i < kBlockSize; i++) {
1163 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1164 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1165 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1166 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1167 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1168 }
1169 }
1170 }
1171
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_qnan)1172 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_qnan) {
1173 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1174 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1175 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1176 for (uint32_t i = 0; i < kBlockSize; i++) {
1177 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1178 }
1179 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1180 for (uint32_t i = 0; i < kBlockSize; i++) {
1181 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1182 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1183 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1184 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1185 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1186 }
1187 }
1188 }
1189
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan)1190 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan) {
1191 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1192 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1193 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1194 for (uint32_t i = 0; i < kBlockSize; i++) {
1195 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1196 }
1197 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1198 for (uint32_t i = 0; i < kBlockSize; i++) {
1199 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1200 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1201 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1202 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1203 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1204 }
1205 }
1206 }
1207
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan)1208 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan) {
1209 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1210 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1211 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1212 for (uint32_t i = 0; i < kBlockSize; i++) {
1213 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1214 }
1215 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1216 for (uint32_t i = 0; i < kBlockSize; i++) {
1217 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1218 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1219 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1220 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1221 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1222 }
1223 }
1224 }
1225
TEST(ROUNDNE__WASMSIMD_ADDSUB,positive_snan_to_qnan)1226 TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
1227 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1228 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1229 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1230 for (uint32_t i = 0; i < kBlockSize; i++) {
1231 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1232 }
1233 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1234 for (uint32_t i = 0; i < kBlockSize; i++) {
1235 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1236 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1237 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1238 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1239 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1240 }
1241 }
1242 }
1243
TEST(ROUNDNE__WASMSIMD_ADDSUB,negative_snan_to_qnan)1244 TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
1245 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1246 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1247 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1248 for (uint32_t i = 0; i < kBlockSize; i++) {
1249 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1250 }
1251 xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1252 for (uint32_t i = 0; i < kBlockSize; i++) {
1253 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1254 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1255 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1256 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1257 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1258 }
1259 }
1260 }
1261 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1262
1263 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_normal)1264 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_normal) {
1265 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1266 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1267 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1268 for (uint32_t i = 0; i < kBlockSize; i++) {
1269 inputs[i] = uint32_as_float(n + i);
1270 }
1271 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1272 for (uint32_t i = 0; i < kBlockSize; i++) {
1273 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1274 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1275 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1276 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1277 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1278 }
1279 }
1280 }
1281
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_normal)1282 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_normal) {
1283 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1284 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1285 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1286 for (uint32_t i = 0; i < kBlockSize; i++) {
1287 inputs[i] = uint32_as_float(n + i);
1288 }
1289 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1290 for (uint32_t i = 0; i < kBlockSize; i++) {
1291 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1292 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1293 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1294 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1295 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1296 }
1297 }
1298 }
1299
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_integral)1300 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_integral) {
1301 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1302 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1303 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1304 for (uint32_t i = 0; i < kBlockSize; i++) {
1305 inputs[i] = uint32_as_float(n + i);
1306 }
1307 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1308 for (uint32_t i = 0; i < kBlockSize; i++) {
1309 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1310 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1311 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1312 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1313 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1314 }
1315 }
1316 }
1317
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_integral)1318 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_integral) {
1319 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1320 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1321 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1322 for (uint32_t i = 0; i < kBlockSize; i++) {
1323 inputs[i] = uint32_as_float(n + i);
1324 }
1325 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1326 for (uint32_t i = 0; i < kBlockSize; i++) {
1327 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1328 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1329 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1330 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1331 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1332 }
1333 }
1334 }
1335
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_infinity)1336 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_infinity) {
1337 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1338 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1339 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1340 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1341 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1342 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1343 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1344 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1345 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1346 }
1347
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_infinity)1348 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_infinity) {
1349 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1350 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1351 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1352 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1353 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1354 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1355 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1356 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1357 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1358 }
1359
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_qnan)1360 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_qnan) {
1361 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1362 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1363 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1364 for (uint32_t i = 0; i < kBlockSize; i++) {
1365 inputs[i] = uint32_as_float(n + i);
1366 }
1367 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1368 for (uint32_t i = 0; i < kBlockSize; i++) {
1369 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1370 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1371 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1372 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1373 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1374 }
1375 }
1376 }
1377
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_qnan)1378 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_qnan) {
1379 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1380 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1381 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1382 for (uint32_t i = 0; i < kBlockSize; i++) {
1383 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1384 }
1385 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1386 for (uint32_t i = 0; i < kBlockSize; i++) {
1387 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1388 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1389 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1390 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1391 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1392 }
1393 }
1394 }
1395
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_snan)1396 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan) {
1397 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1398 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1399 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1400 for (uint32_t i = 0; i < kBlockSize; i++) {
1401 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1402 }
1403 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1404 for (uint32_t i = 0; i < kBlockSize; i++) {
1405 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1406 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1407 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1408 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1409 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1410 }
1411 }
1412 }
1413
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_snan)1414 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan) {
1415 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1416 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1417 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1418 for (uint32_t i = 0; i < kBlockSize; i++) {
1419 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1420 }
1421 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1422 for (uint32_t i = 0; i < kBlockSize; i++) {
1423 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1424 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1425 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1426 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1427 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1428 }
1429 }
1430 }
1431
TEST(ROUNDNE__WASMSIMD_NATIVE,positive_snan_to_qnan)1432 TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan_to_qnan) {
1433 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1434 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1435 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1436 for (uint32_t i = 0; i < kBlockSize; i++) {
1437 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1438 }
1439 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1440 for (uint32_t i = 0; i < kBlockSize; i++) {
1441 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1442 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1443 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1444 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1445 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1446 }
1447 }
1448 }
1449
TEST(ROUNDNE__WASMSIMD_NATIVE,negative_snan_to_qnan)1450 TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan_to_qnan) {
1451 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1452 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1453 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1454 for (uint32_t i = 0; i < kBlockSize; i++) {
1455 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1456 }
1457 xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1458 for (uint32_t i = 0; i < kBlockSize; i++) {
1459 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1460 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1461 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1462 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1463 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1464 }
1465 }
1466 }
1467 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1468
TEST(ROUNDNE__SCALAR_ADDSUB,positive_normal)1469 TEST(ROUNDNE__SCALAR_ADDSUB, positive_normal) {
1470 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1471 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1472 for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
1473 for (uint32_t i = 0; i < kBlockSize; i++) {
1474 inputs[i] = uint32_as_float(n + i);
1475 }
1476 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1477 for (uint32_t i = 0; i < kBlockSize; i++) {
1478 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1479 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1480 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1481 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1482 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1483 }
1484 }
1485 }
1486
TEST(ROUNDNE__SCALAR_ADDSUB,negative_normal)1487 TEST(ROUNDNE__SCALAR_ADDSUB, negative_normal) {
1488 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1489 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1490 for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
1491 for (uint32_t i = 0; i < kBlockSize; i++) {
1492 inputs[i] = uint32_as_float(n + i);
1493 }
1494 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1495 for (uint32_t i = 0; i < kBlockSize; i++) {
1496 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1497 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1498 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1499 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1500 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1501 }
1502 }
1503 }
1504
TEST(ROUNDNE__SCALAR_ADDSUB,positive_integral)1505 TEST(ROUNDNE__SCALAR_ADDSUB, positive_integral) {
1506 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1507 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1508 for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1509 for (uint32_t i = 0; i < kBlockSize; i++) {
1510 inputs[i] = uint32_as_float(n + i);
1511 }
1512 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1513 for (uint32_t i = 0; i < kBlockSize; i++) {
1514 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1515 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1516 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1517 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1518 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1519 }
1520 }
1521 }
1522
TEST(ROUNDNE__SCALAR_ADDSUB,negative_integral)1523 TEST(ROUNDNE__SCALAR_ADDSUB, negative_integral) {
1524 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1525 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1526 for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1527 for (uint32_t i = 0; i < kBlockSize; i++) {
1528 inputs[i] = uint32_as_float(n + i);
1529 }
1530 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1531 for (uint32_t i = 0; i < kBlockSize; i++) {
1532 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1533 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1534 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1535 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1536 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1537 }
1538 }
1539 }
1540
TEST(ROUNDNE__SCALAR_ADDSUB,positive_infinity)1541 TEST(ROUNDNE__SCALAR_ADDSUB, positive_infinity) {
1542 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1543 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1544 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1545 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1546 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1547 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1548 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1549 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1550 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1551 }
1552
TEST(ROUNDNE__SCALAR_ADDSUB,negative_infinity)1553 TEST(ROUNDNE__SCALAR_ADDSUB, negative_infinity) {
1554 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1555 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1556 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1557 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1558 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[0]));
1559 ASSERT_EQ(reference_output, float_as_uint32(outputs[0]))
1560 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1561 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1562 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[0]);
1563 }
1564
TEST(ROUNDNE__SCALAR_ADDSUB,positive_qnan)1565 TEST(ROUNDNE__SCALAR_ADDSUB, positive_qnan) {
1566 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1567 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1568 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1569 for (uint32_t i = 0; i < kBlockSize; i++) {
1570 inputs[i] = uint32_as_float(n + i);
1571 }
1572 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1573 for (uint32_t i = 0; i < kBlockSize; i++) {
1574 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1575 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1576 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1577 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1578 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1579 }
1580 }
1581 }
1582
TEST(ROUNDNE__SCALAR_ADDSUB,negative_qnan)1583 TEST(ROUNDNE__SCALAR_ADDSUB, negative_qnan) {
1584 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1585 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1586 for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
1587 for (uint32_t i = 0; i < kBlockSize; i++) {
1588 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | (n + i));
1589 }
1590 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1591 for (uint32_t i = 0; i < kBlockSize; i++) {
1592 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1593 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1594 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1595 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1596 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1597 }
1598 }
1599 }
1600
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan)1601 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan) {
1602 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1603 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1604 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1605 for (uint32_t i = 0; i < kBlockSize; i++) {
1606 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1607 }
1608 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1609 for (uint32_t i = 0; i < kBlockSize; i++) {
1610 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1611 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1612 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1613 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1614 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1615 }
1616 }
1617 }
1618
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan)1619 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan) {
1620 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1621 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1622 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1623 for (uint32_t i = 0; i < kBlockSize; i++) {
1624 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1625 }
1626 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1627 for (uint32_t i = 0; i < kBlockSize; i++) {
1628 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1629 ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), float_as_uint32(outputs[i]) & UINT32_C(0xFFBFFFFF))
1630 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1631 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1632 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1633 }
1634 }
1635 }
1636
TEST(ROUNDNE__SCALAR_ADDSUB,positive_snan_to_qnan)1637 TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan_to_qnan) {
1638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1639 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1640 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1641 for (uint32_t i = 0; i < kBlockSize; i++) {
1642 inputs[i] = uint32_as_float(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1643 }
1644 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1645 for (uint32_t i = 0; i < kBlockSize; i++) {
1646 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1647 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1648 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1649 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1650 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1651 }
1652 }
1653 }
1654
TEST(ROUNDNE__SCALAR_ADDSUB,negative_snan_to_qnan)1655 TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan_to_qnan) {
1656 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1657 std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
1658 for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
1659 for (uint32_t i = 0; i < kBlockSize; i++) {
1660 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
1661 }
1662 xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
1663 for (uint32_t i = 0; i < kBlockSize; i++) {
1664 const uint32_t reference_output = float_as_uint32(std::nearbyint(inputs[i]));
1665 ASSERT_EQ(reference_output, float_as_uint32(outputs[i]))
1666 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1667 << ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
1668 << ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(outputs[i]);
1669 }
1670 }
1671 }
1672