xref: /aosp_15_r20/external/XNNPACK/eval/u64-sqrt.cc (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <vector>
12 
13 #include <gtest/gtest.h>
14 
15 #include <xnnpack/aligned-allocator.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/math-stubs.h>
18 
19 
20 constexpr int kBlockSize = 1024;
21 
22 
23 namespace {
24 
Sqrt(uint64_t n)25 uint64_t Sqrt(uint64_t n) {
26   if (n == 0) {
27     return n;
28   }
29 
30   uint64_t x0 = n >> 1;
31   uint64_t x1 = (x0 + n / x0) >> 1;
32   do {
33     x0 = x1;
34     x1 = (x0 + n / x0) >> 1;
35   } while (x1 < x0);
36 
37   // x0 is sqrt(n) rounded down, round up if needed
38   if (int64_t(x0 * x0 + x0 - n) < 0) {
39     x0 += 1;
40   }
41   return x0;
42 }
43 
44 }  // namespace
45 
46 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_exact_input)47 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_exact_input) {
48   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
49   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
50   for (uint32_t i = 0; i < kBlockSize; i++) {
51     const uint32_t s = std::min<uint32_t>(i + 1, 11);
52     inputs[i] = UINT64_C(0x0010000000000000) << s;
53   }
54   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
55   for (uint32_t i = 0; i < kBlockSize; i++) {
56     const uint64_t input = inputs[i];
57     const uint64_t output = outputs[i];
58     const uint64_t reference_output = Sqrt(input);
59     ASSERT_EQ(output, reference_output) << "input: " << input;
60   }
61 }
62 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_min_input)63 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_min_input) {
64   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
65   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
66   for (uint32_t i = 0; i < kBlockSize; i++) {
67     const uint32_t s = std::min<uint32_t>(i + 1, 11);
68     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
69   }
70   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
71   for (uint32_t i = 0; i < kBlockSize; i++) {
72     const uint64_t input = inputs[i];
73     const uint64_t output = outputs[i];
74     const uint64_t reference_output = Sqrt(input);
75     ASSERT_EQ(output, reference_output) << "input: " << input;
76   }
77 }
78 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,min_mantissa_max_input)79 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, min_mantissa_max_input) {
80   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
81   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
82   for (uint32_t i = 0; i < kBlockSize; i++) {
83     const uint32_t s = std::min<uint32_t>(i + 1, 11);
84     inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
85   }
86   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
87   for (uint32_t i = 0; i < kBlockSize; i++) {
88     const uint64_t input = inputs[i];
89     const uint64_t output = outputs[i];
90     const uint64_t reference_output = Sqrt(input);
91     ASSERT_EQ(output, reference_output) << "input: " << input;
92   }
93 }
94 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_exact_input)95 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_exact_input) {
96   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
97   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
98   for (uint32_t i = 0; i < kBlockSize; i++) {
99     const uint32_t s = std::min<uint32_t>(i + 1, 11);
100     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
101   }
102   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
103   for (uint32_t i = 0; i < kBlockSize; i++) {
104     const uint64_t input = inputs[i];
105     const uint64_t output = outputs[i];
106     const uint64_t reference_output = Sqrt(input);
107     ASSERT_EQ(output, reference_output) << "input: " << input;
108   }
109 }
110 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_min_input)111 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_min_input) {
112   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
113   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
114   for (uint32_t i = 0; i < kBlockSize; i++) {
115     const uint32_t s = std::min<uint32_t>(i + 1, 11);
116     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
117   }
118   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
119   for (uint32_t i = 0; i < kBlockSize; i++) {
120     const uint64_t input = inputs[i];
121     const uint64_t output = outputs[i];
122     const uint64_t reference_output = Sqrt(input);
123     ASSERT_EQ(output, reference_output) << "input: " << input;
124   }
125 }
126 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,max_mantissa_max_input)127 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, max_mantissa_max_input) {
128   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
129   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
130   for (uint32_t i = 0; i < kBlockSize; i++) {
131     const uint32_t s = std::min<uint32_t>(i + 1, 11);
132     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
133   }
134   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
135   for (uint32_t i = 0; i < kBlockSize; i++) {
136     const uint64_t input = inputs[i];
137     const uint64_t output = outputs[i];
138     const uint64_t reference_output = Sqrt(input);
139     ASSERT_EQ(output, reference_output) << "input: " << input;
140   }
141 }
142 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,largest_inputs)143 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, largest_inputs) {
144   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
145   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
146   for (uint64_t i = 0; i < kBlockSize; i++) {
147     inputs[i] = -i;
148   }
149   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
150   for (uint32_t i = 0; i < kBlockSize; i++) {
151     const uint64_t input = inputs[i];
152     const uint64_t output = outputs[i];
153     const uint64_t reference_output = Sqrt(input);
154     ASSERT_EQ(output, reference_output) << "input: " << input;
155   }
156 }
157 
TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64,double_rounding)158 TEST(SQRT__SCALAR_CVTU32_SQRT_CVTSATU32F64, double_rounding) {
159   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
160   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
161   for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
162     for (uint32_t i = 0; i < kBlockSize; i++) {
163       const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
164       inputs[i] = t * t + t;
165     }
166     xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
167     for (uint32_t i = 0; i < kBlockSize; i++) {
168       const uint64_t input = inputs[i];
169       const uint64_t output = outputs[i];
170       const uint64_t reference_output = Sqrt(input);
171       ASSERT_EQ(output, reference_output) << "input: " << input;
172     }
173   }
174 }
175 
176 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_exact_input)177 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_exact_input) {
178   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
179   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
180   for (uint32_t i = 0; i < kBlockSize; i++) {
181     const uint32_t s = std::min<uint32_t>(i + 1, 11);
182     inputs[i] = UINT64_C(0x0010000000000000) << s;
183   }
184   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
185   for (uint32_t i = 0; i < kBlockSize; i++) {
186     const uint64_t input = inputs[i];
187     const uint64_t output = outputs[i];
188     const uint64_t reference_output = Sqrt(input);
189     ASSERT_EQ(output, reference_output) << "input: " << input;
190   }
191 }
192 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_min_input)193 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_min_input) {
194   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
195   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
196   for (uint32_t i = 0; i < kBlockSize; i++) {
197     const uint32_t s = std::min<uint32_t>(i + 1, 11);
198     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
199   }
200   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
201   for (uint32_t i = 0; i < kBlockSize; i++) {
202     const uint64_t input = inputs[i];
203     const uint64_t output = outputs[i];
204     const uint64_t reference_output = Sqrt(input);
205     ASSERT_EQ(output, reference_output) << "input: " << input;
206   }
207 }
208 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,min_mantissa_max_input)209 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, min_mantissa_max_input) {
210   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
211   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
212   for (uint32_t i = 0; i < kBlockSize; i++) {
213     const uint32_t s = std::min<uint32_t>(i + 1, 11);
214     inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
215   }
216   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
217   for (uint32_t i = 0; i < kBlockSize; i++) {
218     const uint64_t input = inputs[i];
219     const uint64_t output = outputs[i];
220     const uint64_t reference_output = Sqrt(input);
221     ASSERT_EQ(output, reference_output) << "input: " << input;
222   }
223 }
224 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_exact_input)225 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_exact_input) {
226   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
227   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
228   for (uint32_t i = 0; i < kBlockSize; i++) {
229     const uint32_t s = std::min<uint32_t>(i + 1, 11);
230     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
231   }
232   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
233   for (uint32_t i = 0; i < kBlockSize; i++) {
234     const uint64_t input = inputs[i];
235     const uint64_t output = outputs[i];
236     const uint64_t reference_output = Sqrt(input);
237     ASSERT_EQ(output, reference_output) << "input: " << input;
238   }
239 }
240 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_min_input)241 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_min_input) {
242   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
243   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
244   for (uint32_t i = 0; i < kBlockSize; i++) {
245     const uint32_t s = std::min<uint32_t>(i + 1, 11);
246     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
247   }
248   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
249   for (uint32_t i = 0; i < kBlockSize; i++) {
250     const uint64_t input = inputs[i];
251     const uint64_t output = outputs[i];
252     const uint64_t reference_output = Sqrt(input);
253     ASSERT_EQ(output, reference_output) << "input: " << input;
254   }
255 }
256 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,max_mantissa_max_input)257 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, max_mantissa_max_input) {
258   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
259   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
260   for (uint32_t i = 0; i < kBlockSize; i++) {
261     const uint32_t s = std::min<uint32_t>(i + 1, 11);
262     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
263   }
264   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
265   for (uint32_t i = 0; i < kBlockSize; i++) {
266     const uint64_t input = inputs[i];
267     const uint64_t output = outputs[i];
268     const uint64_t reference_output = Sqrt(input);
269     ASSERT_EQ(output, reference_output) << "input: " << input;
270   }
271 }
272 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,largest_inputs)273 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, largest_inputs) {
274   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
275   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
276   for (uint64_t i = 0; i < kBlockSize; i++) {
277     inputs[i] = -i;
278   }
279   xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
280   for (uint32_t i = 0; i < kBlockSize; i++) {
281     const uint64_t input = inputs[i];
282     const uint64_t output = outputs[i];
283     const uint64_t reference_output = Sqrt(input);
284     ASSERT_EQ(output, reference_output) << "input: " << input;
285   }
286 }
287 
TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT,double_rounding)288 TEST(SQRT__SCALAR_CVTU32_SQRT_LLRINT, double_rounding) {
289   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
290   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
291   for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
292     for (uint32_t i = 0; i < kBlockSize; i++) {
293       const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
294       inputs[i] = t * t + t;
295     }
296     xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
297     for (uint32_t i = 0; i < kBlockSize; i++) {
298       const uint64_t input = inputs[i];
299       const uint64_t output = outputs[i];
300       const uint64_t reference_output = Sqrt(input);
301       ASSERT_EQ(output, reference_output) << "input: " << input;
302     }
303   }
304 }
305 
306 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_exact_input)307 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_exact_input) {
308   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
309   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
310   for (uint32_t i = 0; i < kBlockSize; i++) {
311     const uint32_t s = std::min<uint32_t>(i + 1, 11);
312     inputs[i] = UINT64_C(0x0010000000000000) << s;
313   }
314   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
315   for (uint32_t i = 0; i < kBlockSize; i++) {
316     const uint64_t input = inputs[i];
317     const uint64_t output = outputs[i];
318     const uint64_t reference_output = Sqrt(input);
319     ASSERT_EQ(output, reference_output) << "input: " << input;
320   }
321 }
322 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_min_input)323 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_min_input) {
324   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
325   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
326   for (uint32_t i = 0; i < kBlockSize; i++) {
327     const uint32_t s = std::min<uint32_t>(i + 1, 11);
328     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << (s - 1);
329   }
330   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
331   for (uint32_t i = 0; i < kBlockSize; i++) {
332     const uint64_t input = inputs[i];
333     const uint64_t output = outputs[i];
334     const uint64_t reference_output = Sqrt(input);
335     ASSERT_EQ(output, reference_output) << "input: " << input;
336   }
337 }
338 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,min_mantissa_max_input)339 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, min_mantissa_max_input) {
340   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
341   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
342   for (uint32_t i = 0; i < kBlockSize; i++) {
343     const uint32_t s = std::min<uint32_t>(i + 1, 11);
344     inputs[i] = UINT64_C(0x0020000000000001) << (s - 1);
345   }
346   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
347   for (uint32_t i = 0; i < kBlockSize; i++) {
348     const uint64_t input = inputs[i];
349     const uint64_t output = outputs[i];
350     const uint64_t reference_output = Sqrt(input);
351     ASSERT_EQ(output, reference_output) << "input: " << input;
352   }
353 }
354 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_exact_input)355 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_exact_input) {
356   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
357   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
358   for (uint32_t i = 0; i < kBlockSize; i++) {
359     const uint32_t s = std::min<uint32_t>(i + 1, 11);
360     inputs[i] = UINT64_C(0x001FFFFFFFFFFFFF) << s;
361   }
362   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
363   for (uint32_t i = 0; i < kBlockSize; i++) {
364     const uint64_t input = inputs[i];
365     const uint64_t output = outputs[i];
366     const uint64_t reference_output = Sqrt(input);
367     ASSERT_EQ(output, reference_output) << "input: " << input;
368   }
369 }
370 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_min_input)371 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_min_input) {
372   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
373   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
374   for (uint32_t i = 0; i < kBlockSize; i++) {
375     const uint32_t s = std::min<uint32_t>(i + 1, 11);
376     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFD) << (s - 1);
377   }
378   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
379   for (uint32_t i = 0; i < kBlockSize; i++) {
380     const uint64_t input = inputs[i];
381     const uint64_t output = outputs[i];
382     const uint64_t reference_output = Sqrt(input);
383     ASSERT_EQ(output, reference_output) << "input: " << input;
384   }
385 }
386 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,max_mantissa_max_input)387 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, max_mantissa_max_input) {
388   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
389   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
390   for (uint32_t i = 0; i < kBlockSize; i++) {
391     const uint32_t s = std::min<uint32_t>(i + 1, 11);
392     inputs[i] = UINT64_C(0x003FFFFFFFFFFFFF) << (s - 1);
393   }
394   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
395   for (uint32_t i = 0; i < kBlockSize; i++) {
396     const uint64_t input = inputs[i];
397     const uint64_t output = outputs[i];
398     const uint64_t reference_output = Sqrt(input);
399     ASSERT_EQ(output, reference_output) << "input: " << input;
400   }
401 }
402 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,largest_inputs)403 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, largest_inputs) {
404   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
405   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
406   for (uint64_t i = 0; i < kBlockSize; i++) {
407     inputs[i] = -i;
408   }
409   xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
410   for (uint32_t i = 0; i < kBlockSize; i++) {
411     const uint64_t input = inputs[i];
412     const uint64_t output = outputs[i];
413     const uint64_t reference_output = Sqrt(input);
414     ASSERT_EQ(output, reference_output) << "input: " << input;
415   }
416 }
417 
TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT,double_rounding)418 TEST(SQRT__SCALAR_CVTU64_SQRT_LLRINT, double_rounding) {
419   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> inputs(kBlockSize);
420   std::vector<uint64_t, AlignedAllocator<uint64_t, 64>> outputs(kBlockSize);
421   for (uint64_t n = UINT64_C(33554432); n <= UINT64_C(4294967295); n += kBlockSize) {
422     for (uint32_t i = 0; i < kBlockSize; i++) {
423       const uint64_t t = std::min(n + uint64_t(i), UINT64_C(4294967295));
424       inputs[i] = t * t + t;
425     }
426     xnn_math_u64_sqrt__scalar_cvtu64_sqrt_llrint(kBlockSize * sizeof(uint64_t), inputs.data(), outputs.data());
427     for (uint32_t i = 0; i < kBlockSize; i++) {
428       const uint64_t input = inputs[i];
429       const uint64_t output = outputs[i];
430       const uint64_t reference_output = Sqrt(input);
431       ASSERT_EQ(output, reference_output) << "input: " << input;
432     }
433   }
434 }
435