1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <iomanip>
12 #include <ios>
13 #include <vector>
14
15 #include <gtest/gtest.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack/aligned-allocator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/isa-checks.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/math-stubs.h>
24
25
26 constexpr int kBlockSize = 1024;
27
28 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE2,positive_normal)29 TEST(CVT__SSE2, positive_normal) {
30 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
31 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
32 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
33 for (uint32_t i = 0; i < kBlockSize; i++) {
34 inputs[i] = uint32_as_float(n + i);
35 }
36 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
37 for (uint32_t i = 0; i < kBlockSize; i++) {
38 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
39 ASSERT_EQ(reference_output, outputs[i])
40 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
41 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
42 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
43 }
44 }
45 }
46
TEST(CVT__SSE2,negative_normal)47 TEST(CVT__SSE2, negative_normal) {
48 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
49 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
50 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
51 for (uint32_t i = 0; i < kBlockSize; i++) {
52 inputs[i] = uint32_as_float(n + i);
53 }
54 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
55 for (uint32_t i = 0; i < kBlockSize; i++) {
56 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
57 ASSERT_EQ(reference_output, outputs[i])
58 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
59 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
60 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
61 }
62 }
63 }
64
TEST(CVT__SSE2,positive_subnormal)65 TEST(CVT__SSE2, positive_subnormal) {
66 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
67 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
68 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
69 for (uint32_t i = 0; i < kBlockSize; i++) {
70 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
71 }
72 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
73 for (uint32_t i = 0; i < kBlockSize; i++) {
74 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
75 ASSERT_EQ(reference_output, outputs[i])
76 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
77 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
78 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
79 }
80 }
81 }
82
TEST(CVT__SSE2,negative_subnormal)83 TEST(CVT__SSE2, negative_subnormal) {
84 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
85 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
86 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
87 for (uint32_t i = 0; i < kBlockSize; i++) {
88 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
89 }
90 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
91 for (uint32_t i = 0; i < kBlockSize; i++) {
92 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
93 ASSERT_EQ(reference_output, outputs[i])
94 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
95 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
96 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
97 }
98 }
99 }
100
TEST(CVT__SSE2,positive_underflow)101 TEST(CVT__SSE2, positive_underflow) {
102 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
103 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
104 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
105 for (uint32_t i = 0; i < kBlockSize; i++) {
106 inputs[i] = uint32_as_float(n + i);
107 }
108 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
109 for (uint32_t i = 0; i < kBlockSize; i++) {
110 const uint16_t reference_output = UINT16_C(0x0000);
111 ASSERT_EQ(reference_output, outputs[i])
112 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
113 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
114 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
115 }
116 }
117 }
118
TEST(CVT__SSE2,negative_underflow)119 TEST(CVT__SSE2, negative_underflow) {
120 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
121 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
122 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
123 for (uint32_t i = 0; i < kBlockSize; i++) {
124 inputs[i] = uint32_as_float(n + i);
125 }
126 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
127 for (uint32_t i = 0; i < kBlockSize; i++) {
128 const uint16_t reference_output = UINT16_C(0x8000);
129 ASSERT_EQ(reference_output, outputs[i])
130 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
131 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
132 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
133 }
134 }
135 }
136
TEST(CVT__SSE2,positive_zero)137 TEST(CVT__SSE2, positive_zero) {
138 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
139 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
140 std::fill(inputs.begin(), inputs.end(), +0.0f);
141 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
142 const uint16_t reference_output = UINT16_C(0x0000);
143 ASSERT_EQ(reference_output, outputs[0])
144 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
145 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
146 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
147 }
148
TEST(CVT__SSE2,negative_zero)149 TEST(CVT__SSE2, negative_zero) {
150 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
151 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
152 std::fill(inputs.begin(), inputs.end(), -0.0f);
153 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
154 const uint16_t reference_output = UINT16_C(0x8000);
155 ASSERT_EQ(reference_output, outputs[0])
156 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
157 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
158 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
159 }
160
TEST(CVT__SSE2,positive_overflow)161 TEST(CVT__SSE2, positive_overflow) {
162 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
163 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
164 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
165 for (uint32_t i = 0; i < kBlockSize; i++) {
166 inputs[i] = uint32_as_float(n + i);
167 }
168 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
169 for (uint32_t i = 0; i < kBlockSize; i++) {
170 const uint16_t reference_output = UINT16_C(0x7C00);
171 ASSERT_EQ(reference_output, outputs[i])
172 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
173 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
174 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
175 }
176 }
177 }
178
TEST(CVT__SSE2,negative_overflow)179 TEST(CVT__SSE2, negative_overflow) {
180 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
181 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
182 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
183 for (uint32_t i = 0; i < kBlockSize; i++) {
184 inputs[i] = uint32_as_float(n + i);
185 }
186 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
187 for (uint32_t i = 0; i < kBlockSize; i++) {
188 const uint16_t reference_output = UINT16_C(0xFC00);
189 ASSERT_EQ(reference_output, outputs[i])
190 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
191 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
192 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
193 }
194 }
195 }
196
TEST(CVT__SSE2,positive_infinity)197 TEST(CVT__SSE2, positive_infinity) {
198 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
199 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
200 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
201 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
202 const uint16_t reference_output = UINT16_C(0x7C00);
203 ASSERT_EQ(reference_output, outputs[0])
204 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
205 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
206 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
207 }
208
TEST(CVT__SSE2,negative_infinity)209 TEST(CVT__SSE2, negative_infinity) {
210 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
211 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
212 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
213 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
214 const uint16_t reference_output = UINT16_C(0xFC00);
215 ASSERT_EQ(reference_output, outputs[0])
216 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
217 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
218 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
219 }
220
TEST(CVT__SSE2,positive_nan)221 TEST(CVT__SSE2, positive_nan) {
222 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
223 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
224 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
225 for (uint32_t i = 0; i < kBlockSize; i++) {
226 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
227 }
228 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
229 for (uint32_t i = 0; i < kBlockSize; i++) {
230 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
231 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
232 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
233 ASSERT_LT(outputs[i], UINT16_C(0x8000))
234 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
235 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
236 }
237 }
238 }
239
TEST(CVT__SSE2,negative_nan)240 TEST(CVT__SSE2, negative_nan) {
241 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
242 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
243 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
244 for (uint32_t i = 0; i < kBlockSize; i++) {
245 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
246 }
247 xnn_math_f32_f16_cvt__sse2(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
248 for (uint32_t i = 0; i < kBlockSize; i++) {
249 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
250 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
251 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
252 }
253 }
254 }
255 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
256
257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__SSE41,positive_normal)258 TEST(CVT__SSE41, positive_normal) {
259 TEST_REQUIRES_X86_SSE41;
260
261 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
262 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
263 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
264 for (uint32_t i = 0; i < kBlockSize; i++) {
265 inputs[i] = uint32_as_float(n + i);
266 }
267 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
268 for (uint32_t i = 0; i < kBlockSize; i++) {
269 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
270 ASSERT_EQ(reference_output, outputs[i])
271 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
272 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
273 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
274 }
275 }
276 }
277
TEST(CVT__SSE41,negative_normal)278 TEST(CVT__SSE41, negative_normal) {
279 TEST_REQUIRES_X86_SSE41;
280
281 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
282 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
283 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
284 for (uint32_t i = 0; i < kBlockSize; i++) {
285 inputs[i] = uint32_as_float(n + i);
286 }
287 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
288 for (uint32_t i = 0; i < kBlockSize; i++) {
289 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
290 ASSERT_EQ(reference_output, outputs[i])
291 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
292 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
293 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
294 }
295 }
296 }
297
TEST(CVT__SSE41,positive_subnormal)298 TEST(CVT__SSE41, positive_subnormal) {
299 TEST_REQUIRES_X86_SSE41;
300
301 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
302 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
303 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
304 for (uint32_t i = 0; i < kBlockSize; i++) {
305 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
306 }
307 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
308 for (uint32_t i = 0; i < kBlockSize; i++) {
309 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
310 ASSERT_EQ(reference_output, outputs[i])
311 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
312 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
313 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
314 }
315 }
316 }
317
TEST(CVT__SSE41,negative_subnormal)318 TEST(CVT__SSE41, negative_subnormal) {
319 TEST_REQUIRES_X86_SSE41;
320
321 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
322 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
323 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
324 for (uint32_t i = 0; i < kBlockSize; i++) {
325 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
326 }
327 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
328 for (uint32_t i = 0; i < kBlockSize; i++) {
329 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
330 ASSERT_EQ(reference_output, outputs[i])
331 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
332 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
333 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
334 }
335 }
336 }
337
TEST(CVT__SSE41,positive_underflow)338 TEST(CVT__SSE41, positive_underflow) {
339 TEST_REQUIRES_X86_SSE41;
340
341 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
342 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
343 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
344 for (uint32_t i = 0; i < kBlockSize; i++) {
345 inputs[i] = uint32_as_float(n + i);
346 }
347 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
348 for (uint32_t i = 0; i < kBlockSize; i++) {
349 const uint16_t reference_output = UINT16_C(0x0000);
350 ASSERT_EQ(reference_output, outputs[i])
351 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
352 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
353 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
354 }
355 }
356 }
357
TEST(CVT__SSE41,negative_underflow)358 TEST(CVT__SSE41, negative_underflow) {
359 TEST_REQUIRES_X86_SSE41;
360
361 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
362 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
363 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
364 for (uint32_t i = 0; i < kBlockSize; i++) {
365 inputs[i] = uint32_as_float(n + i);
366 }
367 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
368 for (uint32_t i = 0; i < kBlockSize; i++) {
369 const uint16_t reference_output = UINT16_C(0x8000);
370 ASSERT_EQ(reference_output, outputs[i])
371 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
372 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
373 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
374 }
375 }
376 }
377
TEST(CVT__SSE41,positive_zero)378 TEST(CVT__SSE41, positive_zero) {
379 TEST_REQUIRES_X86_SSE41;
380
381 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
382 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
383 std::fill(inputs.begin(), inputs.end(), +0.0f);
384 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
385 const uint16_t reference_output = UINT16_C(0x0000);
386 ASSERT_EQ(reference_output, outputs[0])
387 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
388 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
389 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
390 }
391
TEST(CVT__SSE41,negative_zero)392 TEST(CVT__SSE41, negative_zero) {
393 TEST_REQUIRES_X86_SSE41;
394
395 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
396 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
397 std::fill(inputs.begin(), inputs.end(), -0.0f);
398 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
399 const uint16_t reference_output = UINT16_C(0x8000);
400 ASSERT_EQ(reference_output, outputs[0])
401 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
402 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
403 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
404 }
405
TEST(CVT__SSE41,positive_overflow)406 TEST(CVT__SSE41, positive_overflow) {
407 TEST_REQUIRES_X86_SSE41;
408
409 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
410 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
411 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
412 for (uint32_t i = 0; i < kBlockSize; i++) {
413 inputs[i] = uint32_as_float(n + i);
414 }
415 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
416 for (uint32_t i = 0; i < kBlockSize; i++) {
417 const uint16_t reference_output = UINT16_C(0x7C00);
418 ASSERT_EQ(reference_output, outputs[i])
419 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
420 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
421 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
422 }
423 }
424 }
425
TEST(CVT__SSE41,negative_overflow)426 TEST(CVT__SSE41, negative_overflow) {
427 TEST_REQUIRES_X86_SSE41;
428
429 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
430 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
431 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
432 for (uint32_t i = 0; i < kBlockSize; i++) {
433 inputs[i] = uint32_as_float(n + i);
434 }
435 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
436 for (uint32_t i = 0; i < kBlockSize; i++) {
437 const uint16_t reference_output = UINT16_C(0xFC00);
438 ASSERT_EQ(reference_output, outputs[i])
439 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
440 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
441 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
442 }
443 }
444 }
445
TEST(CVT__SSE41,positive_infinity)446 TEST(CVT__SSE41, positive_infinity) {
447 TEST_REQUIRES_X86_SSE41;
448
449 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
450 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
451 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
452 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
453 const uint16_t reference_output = UINT16_C(0x7C00);
454 ASSERT_EQ(reference_output, outputs[0])
455 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
456 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
457 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
458 }
459
TEST(CVT__SSE41,negative_infinity)460 TEST(CVT__SSE41, negative_infinity) {
461 TEST_REQUIRES_X86_SSE41;
462
463 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
464 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
465 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
466 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
467 const uint16_t reference_output = UINT16_C(0xFC00);
468 ASSERT_EQ(reference_output, outputs[0])
469 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
470 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
471 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
472 }
473
TEST(CVT__SSE41,positive_nan)474 TEST(CVT__SSE41, positive_nan) {
475 TEST_REQUIRES_X86_SSE41;
476
477 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
478 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
479 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
480 for (uint32_t i = 0; i < kBlockSize; i++) {
481 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
482 }
483 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
484 for (uint32_t i = 0; i < kBlockSize; i++) {
485 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
486 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
487 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
488 ASSERT_LT(outputs[i], UINT16_C(0x8000))
489 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
490 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
491 }
492 }
493 }
494
TEST(CVT__SSE41,negative_nan)495 TEST(CVT__SSE41, negative_nan) {
496 TEST_REQUIRES_X86_SSE41;
497
498 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
499 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
500 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
501 for (uint32_t i = 0; i < kBlockSize; i++) {
502 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
503 }
504 xnn_math_f32_f16_cvt__sse41(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
505 for (uint32_t i = 0; i < kBlockSize; i++) {
506 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
507 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
508 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
509 }
510 }
511 }
512 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
513
514 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(CVT__F16C,positive_normal)515 TEST(CVT__F16C, positive_normal) {
516 TEST_REQUIRES_X86_F16C;
517
518 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
519 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
520 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
521 for (uint32_t i = 0; i < kBlockSize; i++) {
522 inputs[i] = uint32_as_float(n + i);
523 }
524 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
525 for (uint32_t i = 0; i < kBlockSize; i++) {
526 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
527 ASSERT_EQ(reference_output, outputs[i])
528 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
529 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
530 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
531 }
532 }
533 }
534
TEST(CVT__F16C,negative_normal)535 TEST(CVT__F16C, negative_normal) {
536 TEST_REQUIRES_X86_F16C;
537
538 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
539 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
540 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
541 for (uint32_t i = 0; i < kBlockSize; i++) {
542 inputs[i] = uint32_as_float(n + i);
543 }
544 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
545 for (uint32_t i = 0; i < kBlockSize; i++) {
546 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
547 ASSERT_EQ(reference_output, outputs[i])
548 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
549 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
550 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
551 }
552 }
553 }
554
TEST(CVT__F16C,positive_subnormal)555 TEST(CVT__F16C, positive_subnormal) {
556 TEST_REQUIRES_X86_F16C;
557
558 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
559 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
560 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
561 for (uint32_t i = 0; i < kBlockSize; i++) {
562 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
563 }
564 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
565 for (uint32_t i = 0; i < kBlockSize; i++) {
566 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
567 ASSERT_EQ(reference_output, outputs[i])
568 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
569 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
570 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
571 }
572 }
573 }
574
TEST(CVT__F16C,negative_subnormal)575 TEST(CVT__F16C, negative_subnormal) {
576 TEST_REQUIRES_X86_F16C;
577
578 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
579 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
580 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
581 for (uint32_t i = 0; i < kBlockSize; i++) {
582 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
583 }
584 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
585 for (uint32_t i = 0; i < kBlockSize; i++) {
586 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
587 ASSERT_EQ(reference_output, outputs[i])
588 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
589 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
590 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
591 }
592 }
593 }
594
TEST(CVT__F16C,positive_underflow)595 TEST(CVT__F16C, positive_underflow) {
596 TEST_REQUIRES_X86_F16C;
597
598 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
599 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
600 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
601 for (uint32_t i = 0; i < kBlockSize; i++) {
602 inputs[i] = uint32_as_float(n + i);
603 }
604 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
605 for (uint32_t i = 0; i < kBlockSize; i++) {
606 const uint16_t reference_output = UINT16_C(0x0000);
607 ASSERT_EQ(reference_output, outputs[i])
608 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
609 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
610 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
611 }
612 }
613 }
614
TEST(CVT__F16C,negative_underflow)615 TEST(CVT__F16C, negative_underflow) {
616 TEST_REQUIRES_X86_F16C;
617
618 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
619 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
620 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
621 for (uint32_t i = 0; i < kBlockSize; i++) {
622 inputs[i] = uint32_as_float(n + i);
623 }
624 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
625 for (uint32_t i = 0; i < kBlockSize; i++) {
626 const uint16_t reference_output = UINT16_C(0x8000);
627 ASSERT_EQ(reference_output, outputs[i])
628 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
629 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
630 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
631 }
632 }
633 }
634
TEST(CVT__F16C,positive_zero)635 TEST(CVT__F16C, positive_zero) {
636 TEST_REQUIRES_X86_F16C;
637
638 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
639 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
640 std::fill(inputs.begin(), inputs.end(), +0.0f);
641 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
642 const uint16_t reference_output = UINT16_C(0x0000);
643 ASSERT_EQ(reference_output, outputs[0])
644 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
645 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
646 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
647 }
648
TEST(CVT__F16C,negative_zero)649 TEST(CVT__F16C, negative_zero) {
650 TEST_REQUIRES_X86_F16C;
651
652 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
653 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
654 std::fill(inputs.begin(), inputs.end(), -0.0f);
655 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
656 const uint16_t reference_output = UINT16_C(0x8000);
657 ASSERT_EQ(reference_output, outputs[0])
658 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
659 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
660 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
661 }
662
TEST(CVT__F16C,positive_overflow)663 TEST(CVT__F16C, positive_overflow) {
664 TEST_REQUIRES_X86_F16C;
665
666 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
667 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
668 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
669 for (uint32_t i = 0; i < kBlockSize; i++) {
670 inputs[i] = uint32_as_float(n + i);
671 }
672 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
673 for (uint32_t i = 0; i < kBlockSize; i++) {
674 const uint16_t reference_output = UINT16_C(0x7C00);
675 ASSERT_EQ(reference_output, outputs[i])
676 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
677 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
678 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
679 }
680 }
681 }
682
TEST(CVT__F16C,negative_overflow)683 TEST(CVT__F16C, negative_overflow) {
684 TEST_REQUIRES_X86_F16C;
685
686 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
687 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
688 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
689 for (uint32_t i = 0; i < kBlockSize; i++) {
690 inputs[i] = uint32_as_float(n + i);
691 }
692 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
693 for (uint32_t i = 0; i < kBlockSize; i++) {
694 const uint16_t reference_output = UINT16_C(0xFC00);
695 ASSERT_EQ(reference_output, outputs[i])
696 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
697 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
698 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
699 }
700 }
701 }
702
TEST(CVT__F16C,positive_infinity)703 TEST(CVT__F16C, positive_infinity) {
704 TEST_REQUIRES_X86_F16C;
705
706 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
707 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
708 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
709 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
710 const uint16_t reference_output = UINT16_C(0x7C00);
711 ASSERT_EQ(reference_output, outputs[0])
712 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
713 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
714 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
715 }
716
TEST(CVT__F16C,negative_infinity)717 TEST(CVT__F16C, negative_infinity) {
718 TEST_REQUIRES_X86_F16C;
719
720 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
721 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
722 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
723 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
724 const uint16_t reference_output = UINT16_C(0xFC00);
725 ASSERT_EQ(reference_output, outputs[0])
726 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
727 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
728 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
729 }
730
TEST(CVT__F16C,positive_nan)731 TEST(CVT__F16C, positive_nan) {
732 TEST_REQUIRES_X86_F16C;
733
734 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
735 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
736 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
737 for (uint32_t i = 0; i < kBlockSize; i++) {
738 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
739 }
740 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
741 for (uint32_t i = 0; i < kBlockSize; i++) {
742 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
743 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
744 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
745 ASSERT_LT(outputs[i], UINT16_C(0x8000))
746 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
747 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
748 }
749 }
750 }
751
TEST(CVT__F16C,negative_nan)752 TEST(CVT__F16C, negative_nan) {
753 TEST_REQUIRES_X86_F16C;
754
755 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
756 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
757 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
758 for (uint32_t i = 0; i < kBlockSize; i++) {
759 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
760 }
761 xnn_math_f32_f16_cvt__f16c(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
762 for (uint32_t i = 0; i < kBlockSize; i++) {
763 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
764 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
765 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
766 }
767 }
768 }
769 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
770
771 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEON,positive_normal)772 TEST(CVT__NEON, positive_normal) {
773 TEST_REQUIRES_ARM_NEON;
774
775 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
776 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
777 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
778 for (uint32_t i = 0; i < kBlockSize; i++) {
779 inputs[i] = uint32_as_float(n + i);
780 }
781 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
782 for (uint32_t i = 0; i < kBlockSize; i++) {
783 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
784 ASSERT_EQ(reference_output, outputs[i])
785 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
786 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
787 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
788 }
789 }
790 }
791
TEST(CVT__NEON,negative_normal)792 TEST(CVT__NEON, negative_normal) {
793 TEST_REQUIRES_ARM_NEON;
794
795 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
796 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
797 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
798 for (uint32_t i = 0; i < kBlockSize; i++) {
799 inputs[i] = uint32_as_float(n + i);
800 }
801 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
802 for (uint32_t i = 0; i < kBlockSize; i++) {
803 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
804 ASSERT_EQ(reference_output, outputs[i])
805 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
806 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
807 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
808 }
809 }
810 }
811
TEST(CVT__NEON,positive_subnormal)812 TEST(CVT__NEON, positive_subnormal) {
813 TEST_REQUIRES_ARM_NEON;
814
815 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
816 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
817 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
818 for (uint32_t i = 0; i < kBlockSize; i++) {
819 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
820 }
821 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
822 for (uint32_t i = 0; i < kBlockSize; i++) {
823 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
824 ASSERT_EQ(reference_output, outputs[i])
825 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
826 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
827 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
828 }
829 }
830 }
831
TEST(CVT__NEON,negative_subnormal)832 TEST(CVT__NEON, negative_subnormal) {
833 TEST_REQUIRES_ARM_NEON;
834
835 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
836 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
837 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
838 for (uint32_t i = 0; i < kBlockSize; i++) {
839 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
840 }
841 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
842 for (uint32_t i = 0; i < kBlockSize; i++) {
843 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
844 ASSERT_EQ(reference_output, outputs[i])
845 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
846 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
847 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
848 }
849 }
850 }
851
TEST(CVT__NEON,positive_underflow)852 TEST(CVT__NEON, positive_underflow) {
853 TEST_REQUIRES_ARM_NEON;
854
855 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
856 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
857 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
858 for (uint32_t i = 0; i < kBlockSize; i++) {
859 inputs[i] = uint32_as_float(n + i);
860 }
861 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
862 for (uint32_t i = 0; i < kBlockSize; i++) {
863 const uint16_t reference_output = UINT16_C(0x0000);
864 ASSERT_EQ(reference_output, outputs[i])
865 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
866 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
867 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
868 }
869 }
870 }
871
TEST(CVT__NEON,negative_underflow)872 TEST(CVT__NEON, negative_underflow) {
873 TEST_REQUIRES_ARM_NEON;
874
875 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
876 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
877 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
878 for (uint32_t i = 0; i < kBlockSize; i++) {
879 inputs[i] = uint32_as_float(n + i);
880 }
881 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
882 for (uint32_t i = 0; i < kBlockSize; i++) {
883 const uint16_t reference_output = UINT16_C(0x8000);
884 ASSERT_EQ(reference_output, outputs[i])
885 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
886 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
887 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
888 }
889 }
890 }
891
TEST(CVT__NEON,positive_zero)892 TEST(CVT__NEON, positive_zero) {
893 TEST_REQUIRES_ARM_NEON;
894
895 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
896 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
897 std::fill(inputs.begin(), inputs.end(), +0.0f);
898 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
899 const uint16_t reference_output = UINT16_C(0x0000);
900 ASSERT_EQ(reference_output, outputs[0])
901 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
902 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
903 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
904 }
905
TEST(CVT__NEON,negative_zero)906 TEST(CVT__NEON, negative_zero) {
907 TEST_REQUIRES_ARM_NEON;
908
909 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
910 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
911 std::fill(inputs.begin(), inputs.end(), -0.0f);
912 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
913 const uint16_t reference_output = UINT16_C(0x8000);
914 ASSERT_EQ(reference_output, outputs[0])
915 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
916 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
917 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
918 }
919
TEST(CVT__NEON,positive_overflow)920 TEST(CVT__NEON, positive_overflow) {
921 TEST_REQUIRES_ARM_NEON;
922
923 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
924 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
925 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
926 for (uint32_t i = 0; i < kBlockSize; i++) {
927 inputs[i] = uint32_as_float(n + i);
928 }
929 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
930 for (uint32_t i = 0; i < kBlockSize; i++) {
931 const uint16_t reference_output = UINT16_C(0x7C00);
932 ASSERT_EQ(reference_output, outputs[i])
933 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
934 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
935 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
936 }
937 }
938 }
939
TEST(CVT__NEON,negative_overflow)940 TEST(CVT__NEON, negative_overflow) {
941 TEST_REQUIRES_ARM_NEON;
942
943 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
944 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
945 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
946 for (uint32_t i = 0; i < kBlockSize; i++) {
947 inputs[i] = uint32_as_float(n + i);
948 }
949 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
950 for (uint32_t i = 0; i < kBlockSize; i++) {
951 const uint16_t reference_output = UINT16_C(0xFC00);
952 ASSERT_EQ(reference_output, outputs[i])
953 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
954 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
955 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
956 }
957 }
958 }
959
TEST(CVT__NEON,positive_infinity)960 TEST(CVT__NEON, positive_infinity) {
961 TEST_REQUIRES_ARM_NEON;
962
963 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
964 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
965 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
966 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
967 const uint16_t reference_output = UINT16_C(0x7C00);
968 ASSERT_EQ(reference_output, outputs[0])
969 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
970 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
971 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
972 }
973
TEST(CVT__NEON,negative_infinity)974 TEST(CVT__NEON, negative_infinity) {
975 TEST_REQUIRES_ARM_NEON;
976
977 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
978 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
979 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
980 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
981 const uint16_t reference_output = UINT16_C(0xFC00);
982 ASSERT_EQ(reference_output, outputs[0])
983 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
984 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
985 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
986 }
987
TEST(CVT__NEON,positive_nan)988 TEST(CVT__NEON, positive_nan) {
989 TEST_REQUIRES_ARM_NEON;
990
991 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
992 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
993 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
994 for (uint32_t i = 0; i < kBlockSize; i++) {
995 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
996 }
997 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
998 for (uint32_t i = 0; i < kBlockSize; i++) {
999 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1000 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1001 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1002 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1003 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1004 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1005 }
1006 }
1007 }
1008
TEST(CVT__NEON,negative_nan)1009 TEST(CVT__NEON, negative_nan) {
1010 TEST_REQUIRES_ARM_NEON;
1011
1012 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1013 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1014 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1015 for (uint32_t i = 0; i < kBlockSize; i++) {
1016 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1017 }
1018 xnn_math_f32_f16_cvt__neon(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1019 for (uint32_t i = 0; i < kBlockSize; i++) {
1020 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1021 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1022 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1023 }
1024 }
1025 }
1026 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1027
1028 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(CVT__NEONFP16,positive_normal)1029 TEST(CVT__NEONFP16, positive_normal) {
1030 TEST_REQUIRES_ARM_NEON_FP16;
1031
1032 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1033 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1034 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1035 for (uint32_t i = 0; i < kBlockSize; i++) {
1036 inputs[i] = uint32_as_float(n + i);
1037 }
1038 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1039 for (uint32_t i = 0; i < kBlockSize; i++) {
1040 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1041 ASSERT_EQ(reference_output, outputs[i])
1042 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1043 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1044 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1045 }
1046 }
1047 }
1048
TEST(CVT__NEONFP16,negative_normal)1049 TEST(CVT__NEONFP16, negative_normal) {
1050 TEST_REQUIRES_ARM_NEON_FP16;
1051
1052 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1053 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1054 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1055 for (uint32_t i = 0; i < kBlockSize; i++) {
1056 inputs[i] = uint32_as_float(n + i);
1057 }
1058 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1059 for (uint32_t i = 0; i < kBlockSize; i++) {
1060 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1061 ASSERT_EQ(reference_output, outputs[i])
1062 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1063 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1064 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1065 }
1066 }
1067 }
1068
TEST(CVT__NEONFP16,positive_subnormal)1069 TEST(CVT__NEONFP16, positive_subnormal) {
1070 TEST_REQUIRES_ARM_NEON_FP16;
1071
1072 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1073 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1074 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1075 for (uint32_t i = 0; i < kBlockSize; i++) {
1076 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1077 }
1078 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1079 for (uint32_t i = 0; i < kBlockSize; i++) {
1080 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1081 ASSERT_EQ(reference_output, outputs[i])
1082 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1083 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1084 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1085 }
1086 }
1087 }
1088
TEST(CVT__NEONFP16,negative_subnormal)1089 TEST(CVT__NEONFP16, negative_subnormal) {
1090 TEST_REQUIRES_ARM_NEON_FP16;
1091
1092 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1093 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1094 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1095 for (uint32_t i = 0; i < kBlockSize; i++) {
1096 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1097 }
1098 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1099 for (uint32_t i = 0; i < kBlockSize; i++) {
1100 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1101 ASSERT_EQ(reference_output, outputs[i])
1102 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1103 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1104 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1105 }
1106 }
1107 }
1108
TEST(CVT__NEONFP16,positive_underflow)1109 TEST(CVT__NEONFP16, positive_underflow) {
1110 TEST_REQUIRES_ARM_NEON_FP16;
1111
1112 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1113 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1114 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1115 for (uint32_t i = 0; i < kBlockSize; i++) {
1116 inputs[i] = uint32_as_float(n + i);
1117 }
1118 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1119 for (uint32_t i = 0; i < kBlockSize; i++) {
1120 const uint16_t reference_output = UINT16_C(0x0000);
1121 ASSERT_EQ(reference_output, outputs[i])
1122 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1123 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1124 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1125 }
1126 }
1127 }
1128
TEST(CVT__NEONFP16,negative_underflow)1129 TEST(CVT__NEONFP16, negative_underflow) {
1130 TEST_REQUIRES_ARM_NEON_FP16;
1131
1132 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1133 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1134 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1135 for (uint32_t i = 0; i < kBlockSize; i++) {
1136 inputs[i] = uint32_as_float(n + i);
1137 }
1138 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1139 for (uint32_t i = 0; i < kBlockSize; i++) {
1140 const uint16_t reference_output = UINT16_C(0x8000);
1141 ASSERT_EQ(reference_output, outputs[i])
1142 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1143 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1144 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1145 }
1146 }
1147 }
1148
TEST(CVT__NEONFP16,positive_zero)1149 TEST(CVT__NEONFP16, positive_zero) {
1150 TEST_REQUIRES_ARM_NEON_FP16;
1151
1152 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1153 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1154 std::fill(inputs.begin(), inputs.end(), +0.0f);
1155 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1156 const uint16_t reference_output = UINT16_C(0x0000);
1157 ASSERT_EQ(reference_output, outputs[0])
1158 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1159 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1160 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1161 }
1162
TEST(CVT__NEONFP16,negative_zero)1163 TEST(CVT__NEONFP16, negative_zero) {
1164 TEST_REQUIRES_ARM_NEON_FP16;
1165
1166 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1167 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1168 std::fill(inputs.begin(), inputs.end(), -0.0f);
1169 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1170 const uint16_t reference_output = UINT16_C(0x8000);
1171 ASSERT_EQ(reference_output, outputs[0])
1172 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1173 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1174 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1175 }
1176
TEST(CVT__NEONFP16,positive_overflow)1177 TEST(CVT__NEONFP16, positive_overflow) {
1178 TEST_REQUIRES_ARM_NEON_FP16;
1179
1180 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1181 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1182 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1183 for (uint32_t i = 0; i < kBlockSize; i++) {
1184 inputs[i] = uint32_as_float(n + i);
1185 }
1186 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1187 for (uint32_t i = 0; i < kBlockSize; i++) {
1188 const uint16_t reference_output = UINT16_C(0x7C00);
1189 ASSERT_EQ(reference_output, outputs[i])
1190 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1191 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1192 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1193 }
1194 }
1195 }
1196
TEST(CVT__NEONFP16,negative_overflow)1197 TEST(CVT__NEONFP16, negative_overflow) {
1198 TEST_REQUIRES_ARM_NEON_FP16;
1199
1200 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1201 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1202 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1203 for (uint32_t i = 0; i < kBlockSize; i++) {
1204 inputs[i] = uint32_as_float(n + i);
1205 }
1206 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1207 for (uint32_t i = 0; i < kBlockSize; i++) {
1208 const uint16_t reference_output = UINT16_C(0xFC00);
1209 ASSERT_EQ(reference_output, outputs[i])
1210 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1211 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1212 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1213 }
1214 }
1215 }
1216
TEST(CVT__NEONFP16,positive_infinity)1217 TEST(CVT__NEONFP16, positive_infinity) {
1218 TEST_REQUIRES_ARM_NEON_FP16;
1219
1220 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1221 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1222 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1223 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1224 const uint16_t reference_output = UINT16_C(0x7C00);
1225 ASSERT_EQ(reference_output, outputs[0])
1226 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1227 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1228 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1229 }
1230
TEST(CVT__NEONFP16,negative_infinity)1231 TEST(CVT__NEONFP16, negative_infinity) {
1232 TEST_REQUIRES_ARM_NEON_FP16;
1233
1234 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1235 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1236 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1237 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1238 const uint16_t reference_output = UINT16_C(0xFC00);
1239 ASSERT_EQ(reference_output, outputs[0])
1240 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1241 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1242 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1243 }
1244
TEST(CVT__NEONFP16,positive_nan)1245 TEST(CVT__NEONFP16, positive_nan) {
1246 TEST_REQUIRES_ARM_NEON_FP16;
1247
1248 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1249 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1250 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1251 for (uint32_t i = 0; i < kBlockSize; i++) {
1252 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1253 }
1254 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1255 for (uint32_t i = 0; i < kBlockSize; i++) {
1256 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1257 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1258 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1259 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1260 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1261 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1262 }
1263 }
1264 }
1265
TEST(CVT__NEONFP16,negative_nan)1266 TEST(CVT__NEONFP16, negative_nan) {
1267 TEST_REQUIRES_ARM_NEON_FP16;
1268
1269 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1270 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1271 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1272 for (uint32_t i = 0; i < kBlockSize; i++) {
1273 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1274 }
1275 xnn_math_f32_f16_cvt__neonfp16(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1276 for (uint32_t i = 0; i < kBlockSize; i++) {
1277 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1278 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1279 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1280 }
1281 }
1282 }
1283 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1284
1285 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(CVT__WASMSIMD,positive_normal)1286 TEST(CVT__WASMSIMD, positive_normal) {
1287 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1288 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1289 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1290 for (uint32_t i = 0; i < kBlockSize; i++) {
1291 inputs[i] = uint32_as_float(n + i);
1292 }
1293 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1294 for (uint32_t i = 0; i < kBlockSize; i++) {
1295 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1296 ASSERT_EQ(reference_output, outputs[i])
1297 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1298 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1299 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1300 }
1301 }
1302 }
1303
TEST(CVT__WASMSIMD,negative_normal)1304 TEST(CVT__WASMSIMD, negative_normal) {
1305 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1306 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1307 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1308 for (uint32_t i = 0; i < kBlockSize; i++) {
1309 inputs[i] = uint32_as_float(n + i);
1310 }
1311 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1312 for (uint32_t i = 0; i < kBlockSize; i++) {
1313 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1314 ASSERT_EQ(reference_output, outputs[i])
1315 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1316 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1317 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1318 }
1319 }
1320 }
1321
TEST(CVT__WASMSIMD,positive_subnormal)1322 TEST(CVT__WASMSIMD, positive_subnormal) {
1323 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1324 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1325 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1326 for (uint32_t i = 0; i < kBlockSize; i++) {
1327 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1328 }
1329 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1330 for (uint32_t i = 0; i < kBlockSize; i++) {
1331 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1332 ASSERT_EQ(reference_output, outputs[i])
1333 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1334 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1335 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1336 }
1337 }
1338 }
1339
TEST(CVT__WASMSIMD,negative_subnormal)1340 TEST(CVT__WASMSIMD, negative_subnormal) {
1341 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1342 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1343 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1344 for (uint32_t i = 0; i < kBlockSize; i++) {
1345 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1346 }
1347 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1348 for (uint32_t i = 0; i < kBlockSize; i++) {
1349 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1350 ASSERT_EQ(reference_output, outputs[i])
1351 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1352 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1353 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1354 }
1355 }
1356 }
1357
TEST(CVT__WASMSIMD,positive_underflow)1358 TEST(CVT__WASMSIMD, positive_underflow) {
1359 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1360 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1361 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1362 for (uint32_t i = 0; i < kBlockSize; i++) {
1363 inputs[i] = uint32_as_float(n + i);
1364 }
1365 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1366 for (uint32_t i = 0; i < kBlockSize; i++) {
1367 const uint16_t reference_output = UINT16_C(0x0000);
1368 ASSERT_EQ(reference_output, outputs[i])
1369 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1370 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1371 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1372 }
1373 }
1374 }
1375
TEST(CVT__WASMSIMD,negative_underflow)1376 TEST(CVT__WASMSIMD, negative_underflow) {
1377 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1378 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1379 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1380 for (uint32_t i = 0; i < kBlockSize; i++) {
1381 inputs[i] = uint32_as_float(n + i);
1382 }
1383 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1384 for (uint32_t i = 0; i < kBlockSize; i++) {
1385 const uint16_t reference_output = UINT16_C(0x8000);
1386 ASSERT_EQ(reference_output, outputs[i])
1387 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1388 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1389 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1390 }
1391 }
1392 }
1393
TEST(CVT__WASMSIMD,positive_zero)1394 TEST(CVT__WASMSIMD, positive_zero) {
1395 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1396 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1397 std::fill(inputs.begin(), inputs.end(), +0.0f);
1398 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1399 const uint16_t reference_output = UINT16_C(0x0000);
1400 ASSERT_EQ(reference_output, outputs[0])
1401 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1402 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1403 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1404 }
1405
TEST(CVT__WASMSIMD,negative_zero)1406 TEST(CVT__WASMSIMD, negative_zero) {
1407 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1408 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1409 std::fill(inputs.begin(), inputs.end(), -0.0f);
1410 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1411 const uint16_t reference_output = UINT16_C(0x8000);
1412 ASSERT_EQ(reference_output, outputs[0])
1413 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1414 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1415 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1416 }
1417
TEST(CVT__WASMSIMD,positive_overflow)1418 TEST(CVT__WASMSIMD, positive_overflow) {
1419 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1420 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1421 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1422 for (uint32_t i = 0; i < kBlockSize; i++) {
1423 inputs[i] = uint32_as_float(n + i);
1424 }
1425 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1426 for (uint32_t i = 0; i < kBlockSize; i++) {
1427 const uint16_t reference_output = UINT16_C(0x7C00);
1428 ASSERT_EQ(reference_output, outputs[i])
1429 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1430 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1431 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1432 }
1433 }
1434 }
1435
TEST(CVT__WASMSIMD,negative_overflow)1436 TEST(CVT__WASMSIMD, negative_overflow) {
1437 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1438 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1439 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1440 for (uint32_t i = 0; i < kBlockSize; i++) {
1441 inputs[i] = uint32_as_float(n + i);
1442 }
1443 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1444 for (uint32_t i = 0; i < kBlockSize; i++) {
1445 const uint16_t reference_output = UINT16_C(0xFC00);
1446 ASSERT_EQ(reference_output, outputs[i])
1447 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1448 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1449 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1450 }
1451 }
1452 }
1453
TEST(CVT__WASMSIMD,positive_infinity)1454 TEST(CVT__WASMSIMD, positive_infinity) {
1455 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1456 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1457 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1458 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1459 const uint16_t reference_output = UINT16_C(0x7C00);
1460 ASSERT_EQ(reference_output, outputs[0])
1461 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1462 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1463 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1464 }
1465
TEST(CVT__WASMSIMD,negative_infinity)1466 TEST(CVT__WASMSIMD, negative_infinity) {
1467 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1468 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1469 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1470 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1471 const uint16_t reference_output = UINT16_C(0xFC00);
1472 ASSERT_EQ(reference_output, outputs[0])
1473 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1474 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1475 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1476 }
1477
TEST(CVT__WASMSIMD,positive_nan)1478 TEST(CVT__WASMSIMD, positive_nan) {
1479 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1480 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1481 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1482 for (uint32_t i = 0; i < kBlockSize; i++) {
1483 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1484 }
1485 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1486 for (uint32_t i = 0; i < kBlockSize; i++) {
1487 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1488 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1489 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1490 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1491 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1492 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1493 }
1494 }
1495 }
1496
TEST(CVT__WASMSIMD,negative_nan)1497 TEST(CVT__WASMSIMD, negative_nan) {
1498 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1499 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1500 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1501 for (uint32_t i = 0; i < kBlockSize; i++) {
1502 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1503 }
1504 xnn_math_f32_f16_cvt__wasmsimd(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1505 for (uint32_t i = 0; i < kBlockSize; i++) {
1506 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1507 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1508 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1509 }
1510 }
1511 }
1512 #endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1513
TEST(CVT__SCALAR_BITCAST,positive_normal)1514 TEST(CVT__SCALAR_BITCAST, positive_normal) {
1515 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1516 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1517 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1518 for (uint32_t i = 0; i < kBlockSize; i++) {
1519 inputs[i] = uint32_as_float(n + i);
1520 }
1521 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1522 for (uint32_t i = 0; i < kBlockSize; i++) {
1523 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1524 ASSERT_EQ(reference_output, outputs[i])
1525 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1526 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1527 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1528 }
1529 }
1530 }
1531
TEST(CVT__SCALAR_BITCAST,negative_normal)1532 TEST(CVT__SCALAR_BITCAST, negative_normal) {
1533 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1534 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1535 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1536 for (uint32_t i = 0; i < kBlockSize; i++) {
1537 inputs[i] = uint32_as_float(n + i);
1538 }
1539 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1540 for (uint32_t i = 0; i < kBlockSize; i++) {
1541 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1542 ASSERT_EQ(reference_output, outputs[i])
1543 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1544 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1545 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1546 }
1547 }
1548 }
1549
TEST(CVT__SCALAR_BITCAST,positive_subnormal)1550 TEST(CVT__SCALAR_BITCAST, positive_subnormal) {
1551 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1552 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1553 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1554 for (uint32_t i = 0; i < kBlockSize; i++) {
1555 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1556 }
1557 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1558 for (uint32_t i = 0; i < kBlockSize; i++) {
1559 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1560 ASSERT_EQ(reference_output, outputs[i])
1561 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1562 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1563 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1564 }
1565 }
1566 }
1567
TEST(CVT__SCALAR_BITCAST,negative_subnormal)1568 TEST(CVT__SCALAR_BITCAST, negative_subnormal) {
1569 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1570 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1571 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1572 for (uint32_t i = 0; i < kBlockSize; i++) {
1573 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1574 }
1575 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1576 for (uint32_t i = 0; i < kBlockSize; i++) {
1577 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1578 ASSERT_EQ(reference_output, outputs[i])
1579 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1580 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1581 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1582 }
1583 }
1584 }
1585
TEST(CVT__SCALAR_BITCAST,positive_underflow)1586 TEST(CVT__SCALAR_BITCAST, positive_underflow) {
1587 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1588 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1589 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1590 for (uint32_t i = 0; i < kBlockSize; i++) {
1591 inputs[i] = uint32_as_float(n + i);
1592 }
1593 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1594 for (uint32_t i = 0; i < kBlockSize; i++) {
1595 const uint16_t reference_output = UINT16_C(0x0000);
1596 ASSERT_EQ(reference_output, outputs[i])
1597 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1598 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1599 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1600 }
1601 }
1602 }
1603
TEST(CVT__SCALAR_BITCAST,negative_underflow)1604 TEST(CVT__SCALAR_BITCAST, negative_underflow) {
1605 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1606 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1607 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1608 for (uint32_t i = 0; i < kBlockSize; i++) {
1609 inputs[i] = uint32_as_float(n + i);
1610 }
1611 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1612 for (uint32_t i = 0; i < kBlockSize; i++) {
1613 const uint16_t reference_output = UINT16_C(0x8000);
1614 ASSERT_EQ(reference_output, outputs[i])
1615 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1616 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1617 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1618 }
1619 }
1620 }
1621
TEST(CVT__SCALAR_BITCAST,positive_zero)1622 TEST(CVT__SCALAR_BITCAST, positive_zero) {
1623 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1624 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1625 std::fill(inputs.begin(), inputs.end(), +0.0f);
1626 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1627 const uint16_t reference_output = UINT16_C(0x0000);
1628 ASSERT_EQ(reference_output, outputs[0])
1629 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1630 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1631 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1632 }
1633
TEST(CVT__SCALAR_BITCAST,negative_zero)1634 TEST(CVT__SCALAR_BITCAST, negative_zero) {
1635 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1636 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1637 std::fill(inputs.begin(), inputs.end(), -0.0f);
1638 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1639 const uint16_t reference_output = UINT16_C(0x8000);
1640 ASSERT_EQ(reference_output, outputs[0])
1641 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1642 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1643 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1644 }
1645
TEST(CVT__SCALAR_BITCAST,positive_overflow)1646 TEST(CVT__SCALAR_BITCAST, positive_overflow) {
1647 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1648 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1649 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1650 for (uint32_t i = 0; i < kBlockSize; i++) {
1651 inputs[i] = uint32_as_float(n + i);
1652 }
1653 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1654 for (uint32_t i = 0; i < kBlockSize; i++) {
1655 const uint16_t reference_output = UINT16_C(0x7C00);
1656 ASSERT_EQ(reference_output, outputs[i])
1657 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1658 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1659 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1660 }
1661 }
1662 }
1663
TEST(CVT__SCALAR_BITCAST,negative_overflow)1664 TEST(CVT__SCALAR_BITCAST, negative_overflow) {
1665 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1666 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1667 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1668 for (uint32_t i = 0; i < kBlockSize; i++) {
1669 inputs[i] = uint32_as_float(n + i);
1670 }
1671 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1672 for (uint32_t i = 0; i < kBlockSize; i++) {
1673 const uint16_t reference_output = UINT16_C(0xFC00);
1674 ASSERT_EQ(reference_output, outputs[i])
1675 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1676 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1677 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1678 }
1679 }
1680 }
1681
TEST(CVT__SCALAR_BITCAST,positive_infinity)1682 TEST(CVT__SCALAR_BITCAST, positive_infinity) {
1683 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1684 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1685 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1686 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1687 const uint16_t reference_output = UINT16_C(0x7C00);
1688 ASSERT_EQ(reference_output, outputs[0])
1689 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1690 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1691 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1692 }
1693
TEST(CVT__SCALAR_BITCAST,negative_infinity)1694 TEST(CVT__SCALAR_BITCAST, negative_infinity) {
1695 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1696 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1697 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1698 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1699 const uint16_t reference_output = UINT16_C(0xFC00);
1700 ASSERT_EQ(reference_output, outputs[0])
1701 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1702 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1703 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1704 }
1705
TEST(CVT__SCALAR_BITCAST,positive_nan)1706 TEST(CVT__SCALAR_BITCAST, positive_nan) {
1707 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1708 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1709 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1710 for (uint32_t i = 0; i < kBlockSize; i++) {
1711 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1712 }
1713 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1714 for (uint32_t i = 0; i < kBlockSize; i++) {
1715 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1716 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1717 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1718 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1719 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1720 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1721 }
1722 }
1723 }
1724
TEST(CVT__SCALAR_BITCAST,negative_nan)1725 TEST(CVT__SCALAR_BITCAST, negative_nan) {
1726 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1727 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1728 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1729 for (uint32_t i = 0; i < kBlockSize; i++) {
1730 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1731 }
1732 xnn_math_f32_f16_cvt__scalar_bitcast(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1733 for (uint32_t i = 0; i < kBlockSize; i++) {
1734 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1735 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1736 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1737 }
1738 }
1739 }
1740
TEST(CVT__SCALAR_FABSF,positive_normal)1741 TEST(CVT__SCALAR_FABSF, positive_normal) {
1742 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1743 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1744 for (uint32_t n = UINT32_C(0x387FE000); n < UINT32_C(0x477FF000); n += kBlockSize) {
1745 for (uint32_t i = 0; i < kBlockSize; i++) {
1746 inputs[i] = uint32_as_float(n + i);
1747 }
1748 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1749 for (uint32_t i = 0; i < kBlockSize; i++) {
1750 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1751 ASSERT_EQ(reference_output, outputs[i])
1752 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1753 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1754 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1755 }
1756 }
1757 }
1758
TEST(CVT__SCALAR_FABSF,negative_normal)1759 TEST(CVT__SCALAR_FABSF, negative_normal) {
1760 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1761 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1762 for (uint32_t n = UINT32_C(0xB87FE000); n < UINT32_C(0xC77FF000); n += kBlockSize) {
1763 for (uint32_t i = 0; i < kBlockSize; i++) {
1764 inputs[i] = uint32_as_float(n + i);
1765 }
1766 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1767 for (uint32_t i = 0; i < kBlockSize; i++) {
1768 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1769 ASSERT_EQ(reference_output, outputs[i])
1770 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1771 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1772 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1773 }
1774 }
1775 }
1776
TEST(CVT__SCALAR_FABSF,positive_subnormal)1777 TEST(CVT__SCALAR_FABSF, positive_subnormal) {
1778 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1779 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1780 for (uint32_t n = UINT32_C(0x33000001); n < UINT32_C(0x387FE000); n += kBlockSize) {
1781 for (uint32_t i = 0; i < kBlockSize; i++) {
1782 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x387FDFFF)));
1783 }
1784 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1785 for (uint32_t i = 0; i < kBlockSize; i++) {
1786 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1787 ASSERT_EQ(reference_output, outputs[i])
1788 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1789 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1790 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1791 }
1792 }
1793 }
1794
TEST(CVT__SCALAR_FABSF,negative_subnormal)1795 TEST(CVT__SCALAR_FABSF, negative_subnormal) {
1796 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1797 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1798 for (uint32_t n = UINT32_C(0xB3000001); n < UINT32_C(0xB87FE000); n += kBlockSize) {
1799 for (uint32_t i = 0; i < kBlockSize; i++) {
1800 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0xB87FDFFF)));
1801 }
1802 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1803 for (uint32_t i = 0; i < kBlockSize; i++) {
1804 const uint16_t reference_output = fp16_ieee_from_fp32_value(inputs[i]);
1805 ASSERT_EQ(reference_output, outputs[i])
1806 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1807 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1808 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1809 }
1810 }
1811 }
1812
TEST(CVT__SCALAR_FABSF,positive_underflow)1813 TEST(CVT__SCALAR_FABSF, positive_underflow) {
1814 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1815 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1816 for (uint32_t n = UINT32_C(0x00000001); n < UINT32_C(0x33000001); n += kBlockSize) {
1817 for (uint32_t i = 0; i < kBlockSize; i++) {
1818 inputs[i] = uint32_as_float(n + i);
1819 }
1820 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1821 for (uint32_t i = 0; i < kBlockSize; i++) {
1822 const uint16_t reference_output = UINT16_C(0x0000);
1823 ASSERT_EQ(reference_output, outputs[i])
1824 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1825 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1826 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1827 }
1828 }
1829 }
1830
TEST(CVT__SCALAR_FABSF,negative_underflow)1831 TEST(CVT__SCALAR_FABSF, negative_underflow) {
1832 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1833 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1834 for (uint32_t n = UINT32_C(0x80000001); n < UINT32_C(0xB3000001); n += kBlockSize) {
1835 for (uint32_t i = 0; i < kBlockSize; i++) {
1836 inputs[i] = uint32_as_float(n + i);
1837 }
1838 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1839 for (uint32_t i = 0; i < kBlockSize; i++) {
1840 const uint16_t reference_output = UINT16_C(0x8000);
1841 ASSERT_EQ(reference_output, outputs[i])
1842 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1843 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1844 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1845 }
1846 }
1847 }
1848
TEST(CVT__SCALAR_FABSF,positive_zero)1849 TEST(CVT__SCALAR_FABSF, positive_zero) {
1850 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1851 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1852 std::fill(inputs.begin(), inputs.end(), +0.0f);
1853 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1854 const uint16_t reference_output = UINT16_C(0x0000);
1855 ASSERT_EQ(reference_output, outputs[0])
1856 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1857 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1858 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1859 }
1860
TEST(CVT__SCALAR_FABSF,negative_zero)1861 TEST(CVT__SCALAR_FABSF, negative_zero) {
1862 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1863 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1864 std::fill(inputs.begin(), inputs.end(), -0.0f);
1865 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1866 const uint16_t reference_output = UINT16_C(0x8000);
1867 ASSERT_EQ(reference_output, outputs[0])
1868 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1869 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1870 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1871 }
1872
TEST(CVT__SCALAR_FABSF,positive_overflow)1873 TEST(CVT__SCALAR_FABSF, positive_overflow) {
1874 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1875 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1876 for (uint32_t n = UINT32_C(0x477FF000); n < UINT32_C(0x7F800000); n += kBlockSize) {
1877 for (uint32_t i = 0; i < kBlockSize; i++) {
1878 inputs[i] = uint32_as_float(n + i);
1879 }
1880 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1881 for (uint32_t i = 0; i < kBlockSize; i++) {
1882 const uint16_t reference_output = UINT16_C(0x7C00);
1883 ASSERT_EQ(reference_output, outputs[i])
1884 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1885 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1886 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1887 }
1888 }
1889 }
1890
TEST(CVT__SCALAR_FABSF,negative_overflow)1891 TEST(CVT__SCALAR_FABSF, negative_overflow) {
1892 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1893 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1894 for (uint32_t n = UINT32_C(0xC77FF000); n < UINT32_C(0xFF800000); n += kBlockSize) {
1895 for (uint32_t i = 0; i < kBlockSize; i++) {
1896 inputs[i] = uint32_as_float(n + i);
1897 }
1898 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1899 for (uint32_t i = 0; i < kBlockSize; i++) {
1900 const uint16_t reference_output = UINT16_C(0xFC00);
1901 ASSERT_EQ(reference_output, outputs[i])
1902 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1903 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1904 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1905 }
1906 }
1907 }
1908
TEST(CVT__SCALAR_FABSF,positive_infinity)1909 TEST(CVT__SCALAR_FABSF, positive_infinity) {
1910 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1911 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1912 std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
1913 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1914 const uint16_t reference_output = UINT16_C(0x7C00);
1915 ASSERT_EQ(reference_output, outputs[0])
1916 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1917 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1918 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1919 }
1920
TEST(CVT__SCALAR_FABSF,negative_infinity)1921 TEST(CVT__SCALAR_FABSF, negative_infinity) {
1922 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1923 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1924 std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
1925 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1926 const uint16_t reference_output = UINT16_C(0xFC00);
1927 ASSERT_EQ(reference_output, outputs[0])
1928 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[0])
1929 << ", reference = 0x" << std::hex << std::setw(4) << std::setfill('0') << reference_output
1930 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[0];
1931 }
1932
TEST(CVT__SCALAR_FABSF,positive_nan)1933 TEST(CVT__SCALAR_FABSF, positive_nan) {
1934 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1935 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1936 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1937 for (uint32_t i = 0; i < kBlockSize; i++) {
1938 inputs[i] = uint32_as_float(std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1939 }
1940 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1941 for (uint32_t i = 0; i < kBlockSize; i++) {
1942 ASSERT_GT(outputs[i], UINT16_C(0x7C00))
1943 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1944 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1945 ASSERT_LT(outputs[i], UINT16_C(0x8000))
1946 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1947 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1948 }
1949 }
1950 }
1951
TEST(CVT__SCALAR_FABSF,negative_nan)1952 TEST(CVT__SCALAR_FABSF, negative_nan) {
1953 std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
1954 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> outputs(kBlockSize);
1955 for (uint32_t n = UINT32_C(0x7F800001); n < UINT32_C(0x80000000); n += kBlockSize) {
1956 for (uint32_t i = 0; i < kBlockSize; i++) {
1957 inputs[i] = uint32_as_float(UINT32_C(0x80000000) | std::min<uint32_t>(n + i, UINT32_C(0x7FFFFFFF)));
1958 }
1959 xnn_math_f32_f16_cvt__scalar_fabsf(kBlockSize * sizeof(uint16_t), inputs.data(), outputs.data());
1960 for (uint32_t i = 0; i < kBlockSize; i++) {
1961 ASSERT_GT(outputs[i], UINT16_C(0xFC00))
1962 << "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << float_as_uint32(inputs[i])
1963 << ", optimized = 0x" << std::hex << std::setw(4) << std::setfill('0') << outputs[i];
1964 }
1965 }
1966 }
1967