1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <vector>
12
13 #include <gtest/gtest.h>
14
15 #include <xnnpack/aligned-allocator.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/math-stubs.h>
18
19
20 constexpr int kBlockSize = 1024;
21
22
23 namespace {
24
Sqrt(uint64_t n)25 uint64_t Sqrt(uint64_t n) {
26 if (n == 0) {
27 return n;
28 }
29
30 uint64_t x0 = n >> 1;
31 uint64_t x1 = (x0 + n / x0) >> 1;
32 do {
33 x0 = x1;
34 x1 = (x0 + n / x0) >> 1;
35 } while (x1 < x0);
36
37 // x0 is sqrt(n) rounded down, round up if needed
38 if (int64_t(x0 * x0 + x0 - n) < 0) {
39 x0 += 1;
40 }
41 return x0;
42 }
43
44 } // namespace
45
46
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_exact_input)47 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_exact_input) {
48 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
49 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
50 for (uint32_t i = 0; i < kBlockSize; i++) {
51 const uint32_t s = std::min<uint32_t>(i + 1, 11);
52 inputs[i] = UINT64_C(0x0010000000000000) << s;
53 }
54 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
55 for (uint32_t i = 0; i < kBlockSize; i++) {
56 const uint64_t input = inputs[i];
57 const uint64_t output = outputs[i];
58 const uint64_t reference_output = Sqrt(input);
59 ASSERT_EQ(output, reference_output) << "input: " << input;
60 }
61 }
62
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_min_input)63 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_min_input) {
64 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
65 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
66 for (uint32_t i = 0; i < kBlockSize; i++) {
67 const uint32_t s = std::min<uint32_t>(i + 1, 11);
68 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
69 }
70 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 const uint64_t input = inputs[i];
73 const uint64_t output = outputs[i];
74 const uint64_t reference_output = Sqrt(input);
75 ASSERT_EQ(output, reference_output) << "input: " << input;
76 }
77 }
78
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_max_input)79 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_max_input) {
80 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
81 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
82 for (uint32_t i = 0; i < kBlockSize; i++) {
83 const uint32_t s = std::min<uint32_t>(i + 1, 11);
84 inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
85 }
86 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
87 for (uint32_t i = 0; i < kBlockSize; i++) {
88 const uint64_t input = inputs[i];
89 const uint64_t output = outputs[i];
90 const uint64_t reference_output = Sqrt(input);
91 ASSERT_EQ(output, reference_output) << "input: " << input;
92 }
93 }
94
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_exact_input)95 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_exact_input) {
96 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
97 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
98 for (uint32_t i = 0; i < kBlockSize; i++) {
99 const uint32_t s = std::min<uint32_t>(i + 1, 11);
100 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
101 }
102 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
103 for (uint32_t i = 0; i < kBlockSize; i++) {
104 const uint64_t input = inputs[i];
105 const uint64_t output = outputs[i];
106 const uint64_t reference_output = Sqrt(input);
107 ASSERT_EQ(output, reference_output) << "input: " << input;
108 }
109 }
110
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_min_input)111 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_min_input) {
112 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
113 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
114 for (uint32_t i = 0; i < kBlockSize; i++) {
115 const uint32_t s = std::min<uint32_t>(i + 1, 11);
116 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
117 }
118 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
119 for (uint32_t i = 0; i < kBlockSize; i++) {
120 const uint64_t input = inputs[i];
121 const uint64_t output = outputs[i];
122 const uint64_t reference_output = Sqrt(input);
123 ASSERT_EQ(output, reference_output) << "input: " << input;
124 }
125 }
126
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_max_input)127 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_max_input) {
128 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
129 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
130 for (uint32_t i = 0; i < kBlockSize; i++) {
131 const uint32_t s = std::min<uint32_t>(i + 1, 11);
132 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
133 }
134 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
135 for (uint32_t i = 0; i < kBlockSize; i++) {
136 const uint64_t input = inputs[i];
137 const uint64_t output = outputs[i];
138 const uint64_t reference_output = Sqrt(input);
139 ASSERT_EQ(output, reference_output) << "input: " << input;
140 }
141 }
142
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,largest_inputs)143 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, largest_inputs) {
144 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
145 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
146 for (uint64_t i = 0; i < kBlockSize; i++) {
147 inputs[i] = -i;
148 }
149 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
150 for (uint32_t i = 0; i < kBlockSize; i++) {
151 const uint64_t input = inputs[i];
152 const uint64_t output = outputs[i];
153 const uint64_t reference_output = Sqrt(input);
154 ASSERT_EQ(output, reference_output) << "input: " << input;
155 }
156 }
157
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,double_rounding)158 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, double_rounding) {
159 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
160 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
161 for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
162 for (uint32_t i = 0; i < kBlockSize; i++) {
163 const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
164 inputs[i] = t * t + t;
165 }
166 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
167 for (uint32_t i = 0; i < kBlockSize; i++) {
168 const uint64_t input = inputs[i];
169 const uint64_t output = outputs[i];
170 const uint64_t reference_output = Sqrt(input);
171 ASSERT_EQ(output, reference_output) << "input: " << input;
172 }
173 }
174 }
175
176
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_exact_input)177 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_exact_input) {
178 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
179 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
180 for (uint32_t i = 0; i < kBlockSize; i++) {
181 const uint32_t s = std::min<uint32_t>(i + 1, 11);
182 inputs[i] = UINT64_C(0x0010000000000000) << s;
183 }
184 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 const uint64_t input = inputs[i];
187 const uint64_t output = outputs[i];
188 const uint64_t reference_output = Sqrt(input);
189 ASSERT_EQ(output, reference_output) << "input: " << input;
190 }
191 }
192
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_min_input)193 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_min_input) {
194 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
195 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
196 for (uint32_t i = 0; i < kBlockSize; i++) {
197 const uint32_t s = std::min<uint32_t>(i + 1, 11);
198 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
199 }
200 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
201 for (uint32_t i = 0; i < kBlockSize; i++) {
202 const uint64_t input = inputs[i];
203 const uint64_t output = outputs[i];
204 const uint64_t reference_output = Sqrt(input);
205 ASSERT_EQ(output, reference_output) << "input: " << input;
206 }
207 }
208
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_max_input)209 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_max_input) {
210 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
211 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
212 for (uint32_t i = 0; i < kBlockSize; i++) {
213 const uint32_t s = std::min<uint32_t>(i + 1, 11);
214 inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
215 }
216 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
217 for (uint32_t i = 0; i < kBlockSize; i++) {
218 const uint64_t input = inputs[i];
219 const uint64_t output = outputs[i];
220 const uint64_t reference_output = Sqrt(input);
221 ASSERT_EQ(output, reference_output) << "input: " << input;
222 }
223 }
224
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_exact_input)225 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_exact_input) {
226 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
227 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
228 for (uint32_t i = 0; i < kBlockSize; i++) {
229 const uint32_t s = std::min<uint32_t>(i + 1, 11);
230 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
231 }
232 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
233 for (uint32_t i = 0; i < kBlockSize; i++) {
234 const uint64_t input = inputs[i];
235 const uint64_t output = outputs[i];
236 const uint64_t reference_output = Sqrt(input);
237 ASSERT_EQ(output, reference_output) << "input: " << input;
238 }
239 }
240
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_min_input)241 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_min_input) {
242 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
243 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
244 for (uint32_t i = 0; i < kBlockSize; i++) {
245 const uint32_t s = std::min<uint32_t>(i + 1, 11);
246 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
247 }
248 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
249 for (uint32_t i = 0; i < kBlockSize; i++) {
250 const uint64_t input = inputs[i];
251 const uint64_t output = outputs[i];
252 const uint64_t reference_output = Sqrt(input);
253 ASSERT_EQ(output, reference_output) << "input: " << input;
254 }
255 }
256
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_max_input)257 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_max_input) {
258 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
259 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
260 for (uint32_t i = 0; i < kBlockSize; i++) {
261 const uint32_t s = std::min<uint32_t>(i + 1, 11);
262 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
263 }
264 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
265 for (uint32_t i = 0; i < kBlockSize; i++) {
266 const uint64_t input = inputs[i];
267 const uint64_t output = outputs[i];
268 const uint64_t reference_output = Sqrt(input);
269 ASSERT_EQ(output, reference_output) << "input: " << input;
270 }
271 }
272
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,largest_inputs)273 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, largest_inputs) {
274 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
275 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
276 for (uint64_t i = 0; i < kBlockSize; i++) {
277 inputs[i] = -i;
278 }
279 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
280 for (uint32_t i = 0; i < kBlockSize; i++) {
281 const uint64_t input = inputs[i];
282 const uint64_t output = outputs[i];
283 const uint64_t reference_output = Sqrt(input);
284 ASSERT_EQ(output, reference_output) << "input: " << input;
285 }
286 }
287
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,double_rounding)288 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, double_rounding) {
289 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
290 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
291 for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
292 for (uint32_t i = 0; i < kBlockSize; i++) {
293 const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
294 inputs[i] = t * t + t;
295 }
296 xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
297 for (uint32_t i = 0; i < kBlockSize; i++) {
298 const uint64_t input = inputs[i];
299 const uint64_t output = outputs[i];
300 const uint64_t reference_output = Sqrt(input);
301 ASSERT_EQ(output, reference_output) << "input: " << input;
302 }
303 }
304 }
305
306
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_exact_input)307 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_exact_input) {
308 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
309 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
310 for (uint32_t i = 0; i < kBlockSize; i++) {
311 const uint32_t s = std::min<uint32_t>(i + 1, 11);
312 inputs[i] = UINT64_C(0x0010000000000000) << s;
313 }
314 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
315 for (uint32_t i = 0; i < kBlockSize; i++) {
316 const uint64_t input = inputs[i];
317 const uint64_t output = outputs[i];
318 const uint64_t reference_output = Sqrt(input);
319 ASSERT_EQ(output, reference_output) << "input: " << input;
320 }
321 }
322
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_min_input)323 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_min_input) {
324 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
325 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
326 for (uint32_t i = 0; i < kBlockSize; i++) {
327 const uint32_t s = std::min<uint32_t>(i + 1, 11);
328 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
329 }
330 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
331 for (uint32_t i = 0; i < kBlockSize; i++) {
332 const uint64_t input = inputs[i];
333 const uint64_t output = outputs[i];
334 const uint64_t reference_output = Sqrt(input);
335 ASSERT_EQ(output, reference_output) << "input: " << input;
336 }
337 }
338
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_max_input)339 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_max_input) {
340 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
341 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
342 for (uint32_t i = 0; i < kBlockSize; i++) {
343 const uint32_t s = std::min<uint32_t>(i + 1, 11);
344 inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
345 }
346 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
347 for (uint32_t i = 0; i < kBlockSize; i++) {
348 const uint64_t input = inputs[i];
349 const uint64_t output = outputs[i];
350 const uint64_t reference_output = Sqrt(input);
351 ASSERT_EQ(output, reference_output) << "input: " << input;
352 }
353 }
354
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_exact_input)355 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_exact_input) {
356 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
357 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
358 for (uint32_t i = 0; i < kBlockSize; i++) {
359 const uint32_t s = std::min<uint32_t>(i + 1, 11);
360 inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
361 }
362 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
363 for (uint32_t i = 0; i < kBlockSize; i++) {
364 const uint64_t input = inputs[i];
365 const uint64_t output = outputs[i];
366 const uint64_t reference_output = Sqrt(input);
367 ASSERT_EQ(output, reference_output) << "input: " << input;
368 }
369 }
370
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_min_input)371 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_min_input) {
372 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
373 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
374 for (uint32_t i = 0; i < kBlockSize; i++) {
375 const uint32_t s = std::min<uint32_t>(i + 1, 11);
376 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
377 }
378 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
379 for (uint32_t i = 0; i < kBlockSize; i++) {
380 const uint64_t input = inputs[i];
381 const uint64_t output = outputs[i];
382 const uint64_t reference_output = Sqrt(input);
383 ASSERT_EQ(output, reference_output) << "input: " << input;
384 }
385 }
386
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_max_input)387 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_max_input) {
388 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
389 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
390 for (uint32_t i = 0; i < kBlockSize; i++) {
391 const uint32_t s = std::min<uint32_t>(i + 1, 11);
392 inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
393 }
394 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
395 for (uint32_t i = 0; i < kBlockSize; i++) {
396 const uint64_t input = inputs[i];
397 const uint64_t output = outputs[i];
398 const uint64_t reference_output = Sqrt(input);
399 ASSERT_EQ(output, reference_output) << "input: " << input;
400 }
401 }
402
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,largest_inputs)403 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, largest_inputs) {
404 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
405 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
406 for (uint64_t i = 0; i < kBlockSize; i++) {
407 inputs[i] = -i;
408 }
409 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
410 for (uint32_t i = 0; i < kBlockSize; i++) {
411 const uint64_t input = inputs[i];
412 const uint64_t output = outputs[i];
413 const uint64_t reference_output = Sqrt(input);
414 ASSERT_EQ(output, reference_output) << "input: " << input;
415 }
416 }
417
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,double_rounding)418 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, double_rounding) {
419 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
420 std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
421 for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
422 for (uint32_t i = 0; i < kBlockSize; i++) {
423 const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
424 inputs[i] = t * t + t;
425 }
426 xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint64_t input = inputs[i];
429 const uint64_t output = outputs[i];
430 const uint64_t reference_output = Sqrt(input);
431 ASSERT_EQ(output, reference_output) << "input: " << input;
432 }
433 }
434 }
435