1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <cstdlib>
11 #include <vector>
12
13 #include <gtest/gtest.h>
14
15 #include <xnnpack/aligned-allocator.h>
16 #include <xnnpack/common.h>
17 #include <xnnpack/math-stubs.h>
18
19
20 constexpr int kBlockSize = 1024;
21
22
TEST(SQRT__SCALAR_BITMANIP,uint16_output)23 TEST(SQRT__SCALAR_BITMANIP, uint16_output) {
24 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
25 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
26 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
27 for (uint32_t i = 0; i < kBlockSize; i++) {
28 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
29 }
30 xnn_math_u32_sqrt__scalar_bitmanip(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
31 for (uint32_t i = 0; i < kBlockSize; i++) {
32 const uint32_t input = inputs[i];
33 const uint32_t output = outputs[i];
34 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
35
36 const uint32_t prev_output = output - 1;
37 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
38 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
39 << "input = " << input << ", output = " << output;
40
41 const uint32_t next_output = output + 1;
42 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
43 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
44 << "input = " << input << ", output = " << output;
45 }
46 }
47 }
48
49 TEST(SQRT__SCALAR_BITMANIP, 65536_output) {
50 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
51 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
52 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
53 for (uint32_t i = 0; i < kBlockSize; i++) {
54 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
55 }
56 xnn_math_u32_sqrt__scalar_bitmanip(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
57 for (uint32_t i = 0; i < kBlockSize; i++) {
58 const uint32_t input = inputs[i];
59 const uint32_t output = outputs[i];
60 ASSERT_EQ(output, UINT32_C(0x00010000))
61 << "input = " << input << ", output = " << output;
62 }
63 }
64 }
65
66
TEST(SQRT__SCALAR_CLZ_BINSEARCH,uint16_output)67 TEST(SQRT__SCALAR_CLZ_BINSEARCH, uint16_output) {
68 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
69 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
70 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
71 for (uint32_t i = 0; i < kBlockSize; i++) {
72 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
73 }
74 xnn_math_u32_sqrt__scalar_clz_binsearch(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
75 for (uint32_t i = 0; i < kBlockSize; i++) {
76 const uint32_t input = inputs[i];
77 const uint32_t output = outputs[i];
78 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
79
80 const uint32_t prev_output = output - 1;
81 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
82 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
83 << "input = " << input << ", output = " << output;
84
85 const uint32_t next_output = output + 1;
86 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
87 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
88 << "input = " << input << ", output = " << output;
89 }
90 }
91 }
92
93 TEST(SQRT__SCALAR_CLZ_BINSEARCH, 65536_output) {
94 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
95 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
96 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
97 for (uint32_t i = 0; i < kBlockSize; i++) {
98 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
99 }
100 xnn_math_u32_sqrt__scalar_clz_binsearch(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
101 for (uint32_t i = 0; i < kBlockSize; i++) {
102 const uint32_t input = inputs[i];
103 const uint32_t output = outputs[i];
104 ASSERT_EQ(output, UINT32_C(0x00010000))
105 << "input = " << input << ", output = " << output;
106 }
107 }
108 }
109
110
TEST(SQRT__SCALAR_CLZ_NEWTON,uint16_output)111 TEST(SQRT__SCALAR_CLZ_NEWTON, uint16_output) {
112 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
113 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
114 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
115 for (uint32_t i = 0; i < kBlockSize; i++) {
116 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
117 }
118 xnn_math_u32_sqrt__scalar_clz_newton(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
119 for (uint32_t i = 0; i < kBlockSize; i++) {
120 const uint32_t input = inputs[i];
121 const uint32_t output = outputs[i];
122 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
123
124 const uint32_t prev_output = output - 1;
125 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
126 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
127 << "input = " << input << ", output = " << output;
128
129 const uint32_t next_output = output + 1;
130 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
131 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
132 << "input = " << input << ", output = " << output;
133 }
134 }
135 }
136
137 TEST(SQRT__SCALAR_CLZ_NEWTON, 65536_output) {
138 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
139 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
140 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
141 for (uint32_t i = 0; i < kBlockSize; i++) {
142 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
143 }
144 xnn_math_u32_sqrt__scalar_clz_newton(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
145 for (uint32_t i = 0; i < kBlockSize; i++) {
146 const uint32_t input = inputs[i];
147 const uint32_t output = outputs[i];
148 ASSERT_EQ(output, UINT32_C(0x00010000))
149 << "input = " << input << ", output = " << output;
150 }
151 }
152 }
153
154
TEST(SQRT__SCALAR_CVTI32_SQRT_LRINT,uint16_output)155 TEST(SQRT__SCALAR_CVTI32_SQRT_LRINT, uint16_output) {
156 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
157 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
158 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
159 for (uint32_t i = 0; i < kBlockSize; i++) {
160 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
161 }
162 xnn_math_u32_sqrt__scalar_cvti32_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
163 for (uint32_t i = 0; i < kBlockSize; i++) {
164 const uint32_t input = inputs[i];
165 const uint32_t output = outputs[i];
166 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
167
168 const uint32_t prev_output = output - 1;
169 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
170 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
171 << "input = " << input << ", output = " << output;
172
173 const uint32_t next_output = output + 1;
174 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
175 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
176 << "input = " << input << ", output = " << output;
177 }
178 }
179 }
180
181 TEST(SQRT__SCALAR_CVTI32_SQRT_LRINT, 65536_output) {
182 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
183 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
184 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
185 for (uint32_t i = 0; i < kBlockSize; i++) {
186 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
187 }
188 xnn_math_u32_sqrt__scalar_cvti32_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
189 for (uint32_t i = 0; i < kBlockSize; i++) {
190 const uint32_t input = inputs[i];
191 const uint32_t output = outputs[i];
192 ASSERT_EQ(output, UINT32_C(0x00010000))
193 << "input = " << input << ", output = " << output;
194 }
195 }
196 }
197
198
TEST(SQRT__SCALAR_CVTI64_SQRT_LRINT,uint16_output)199 TEST(SQRT__SCALAR_CVTI64_SQRT_LRINT, uint16_output) {
200 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
201 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
202 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
203 for (uint32_t i = 0; i < kBlockSize; i++) {
204 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
205 }
206 xnn_math_u32_sqrt__scalar_cvti64_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
207 for (uint32_t i = 0; i < kBlockSize; i++) {
208 const uint32_t input = inputs[i];
209 const uint32_t output = outputs[i];
210 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
211
212 const uint32_t prev_output = output - 1;
213 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
214 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
215 << "input = " << input << ", output = " << output;
216
217 const uint32_t next_output = output + 1;
218 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
219 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
220 << "input = " << input << ", output = " << output;
221 }
222 }
223 }
224
225 TEST(SQRT__SCALAR_CVTI64_SQRT_LRINT, 65536_output) {
226 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
227 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
228 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
229 for (uint32_t i = 0; i < kBlockSize; i++) {
230 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
231 }
232 xnn_math_u32_sqrt__scalar_cvti64_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
233 for (uint32_t i = 0; i < kBlockSize; i++) {
234 const uint32_t input = inputs[i];
235 const uint32_t output = outputs[i];
236 ASSERT_EQ(output, UINT32_C(0x00010000))
237 << "input = " << input << ", output = " << output;
238 }
239 }
240 }
241
242
TEST(SQRT__SCALAR_CVTU32_SQRT_LRINT,uint16_output)243 TEST(SQRT__SCALAR_CVTU32_SQRT_LRINT, uint16_output) {
244 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
245 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
246 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
247 for (uint32_t i = 0; i < kBlockSize; i++) {
248 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
249 }
250 xnn_math_u32_sqrt__scalar_cvtu32_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
251 for (uint32_t i = 0; i < kBlockSize; i++) {
252 const uint32_t input = inputs[i];
253 const uint32_t output = outputs[i];
254 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
255
256 const uint32_t prev_output = output - 1;
257 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
258 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
259 << "input = " << input << ", output = " << output;
260
261 const uint32_t next_output = output + 1;
262 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
263 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
264 << "input = " << input << ", output = " << output;
265 }
266 }
267 }
268
269 TEST(SQRT__SCALAR_CVTU32_SQRT_LRINT, 65536_output) {
270 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
271 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
272 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
273 for (uint32_t i = 0; i < kBlockSize; i++) {
274 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
275 }
276 xnn_math_u32_sqrt__scalar_cvtu32_sqrt_lrint(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
277 for (uint32_t i = 0; i < kBlockSize; i++) {
278 const uint32_t input = inputs[i];
279 const uint32_t output = outputs[i];
280 ASSERT_EQ(output, UINT32_C(0x00010000))
281 << "input = " << input << ", output = " << output;
282 }
283 }
284 }
285
286
TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF,uint16_output)287 TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, uint16_output) {
288 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
289 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
290 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
291 for (uint32_t i = 0; i < kBlockSize; i++) {
292 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
293 }
294 xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
295 for (uint32_t i = 0; i < kBlockSize; i++) {
296 const uint32_t input = inputs[i];
297 const uint32_t output = outputs[i];
298 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
299
300 const uint32_t prev_output = output - 1;
301 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
302 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
303 << "input = " << input << ", output = " << output;
304
305 const uint32_t next_output = output + 1;
306 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
307 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
308 << "input = " << input << ", output = " << output;
309 }
310 }
311 }
312
313 TEST(SQRT__SCALAR_CVTI64_SQRTF_LRINTF, 65536_output) {
314 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
315 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
316 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
317 for (uint32_t i = 0; i < kBlockSize; i++) {
318 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
319 }
320 xnn_math_u32_sqrt__scalar_cvti64_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
321 for (uint32_t i = 0; i < kBlockSize; i++) {
322 const uint32_t input = inputs[i];
323 const uint32_t output = outputs[i];
324 ASSERT_EQ(output, UINT32_C(0x00010000))
325 << "input = " << input << ", output = " << output;
326 }
327 }
328 }
329
330
TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF,uint16_output)331 TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, uint16_output) {
332 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
333 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
334 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
335 for (uint32_t i = 0; i < kBlockSize; i++) {
336 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
337 }
338 xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
339 for (uint32_t i = 0; i < kBlockSize; i++) {
340 const uint32_t input = inputs[i];
341 const uint32_t output = outputs[i];
342 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
343
344 const uint32_t prev_output = output - 1;
345 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
346 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
347 << "input = " << input << ", output = " << output;
348
349 const uint32_t next_output = output + 1;
350 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
351 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
352 << "input = " << input << ", output = " << output;
353 }
354 }
355 }
356
357 TEST(SQRT__SCALAR_CVTU32_SQRTF_LRINTF, 65536_output) {
358 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
359 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
360 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
361 for (uint32_t i = 0; i < kBlockSize; i++) {
362 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
363 }
364 xnn_math_u32_sqrt__scalar_cvtu32_sqrtf_lrintf(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
365 for (uint32_t i = 0; i < kBlockSize; i++) {
366 const uint32_t input = inputs[i];
367 const uint32_t output = outputs[i];
368 ASSERT_EQ(output, UINT32_C(0x00010000))
369 << "input = " << input << ", output = " << output;
370 }
371 }
372 }
373
374
TEST(SQRT__SCALAR_HASHEMIAN,uint16_output)375 TEST(SQRT__SCALAR_HASHEMIAN, uint16_output) {
376 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
377 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
378 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
379 for (uint32_t i = 0; i < kBlockSize; i++) {
380 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
381 }
382 xnn_math_u32_sqrt__scalar_hashemian(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
383 for (uint32_t i = 0; i < kBlockSize; i++) {
384 const uint32_t input = inputs[i];
385 const uint32_t output = outputs[i];
386 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
387
388 const uint32_t prev_output = output - 1;
389 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
390 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
391 << "input = " << input << ", output = " << output;
392
393 const uint32_t next_output = output + 1;
394 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
395 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
396 << "input = " << input << ", output = " << output;
397 }
398 }
399 }
400
401 TEST(SQRT__SCALAR_HASHEMIAN, 65536_output) {
402 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
403 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
404 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
405 for (uint32_t i = 0; i < kBlockSize; i++) {
406 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
407 }
408 xnn_math_u32_sqrt__scalar_hashemian(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
409 for (uint32_t i = 0; i < kBlockSize; i++) {
410 const uint32_t input = inputs[i];
411 const uint32_t output = outputs[i];
412 ASSERT_EQ(output, UINT32_C(0x00010000))
413 << "input = " << input << ", output = " << output;
414 }
415 }
416 }
417
418
TEST(SQRT__SCALAR_TFLM,uint16_output)419 TEST(SQRT__SCALAR_TFLM, uint16_output) {
420 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
421 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
422 for (uint32_t n = 0; n <= UINT32_C(4294901760); n += kBlockSize) {
423 for (uint32_t i = 0; i < kBlockSize; i++) {
424 inputs[i] = std::min<uint32_t>(n + i, UINT32_C(4294901760));
425 }
426 xnn_math_u32_sqrt__scalar_tflm(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
427 for (uint32_t i = 0; i < kBlockSize; i++) {
428 const uint32_t input = inputs[i];
429 const uint32_t output = outputs[i];
430 const int64_t squared_output = int64_t(uint64_t(output) * uint64_t(output));
431
432 const uint32_t prev_output = output - 1;
433 const int64_t squared_prev_output = int64_t(uint64_t(prev_output) * uint64_t(prev_output));
434 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_prev_output - int64_t(input)))
435 << "input = " << input << ", output = " << output;
436
437 const uint32_t next_output = output + 1;
438 const int64_t squared_next_output = int64_t(uint64_t(next_output) * uint64_t(next_output));
439 ASSERT_LT(std::abs(squared_output - int64_t(input)), std::abs(squared_next_output - int64_t(input)))
440 << "input = " << input << ", output = " << output;
441 }
442 }
443 }
444
TEST(SQRT__SCALAR_TFLM,DISABLED_65536_output)445 TEST(SQRT__SCALAR_TFLM, DISABLED_65536_output) {
446 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> inputs(kBlockSize);
447 std::vector<uint32_t, AlignedAllocator<uint32_t, 64>> outputs(kBlockSize);
448 for (uint32_t n = UINT32_C(4294901761); n >= UINT32_C(4294901761); n += kBlockSize) {
449 for (uint32_t i = 0; i < kBlockSize; i++) {
450 inputs[i] = std::max<uint32_t>(n + i, UINT32_C(4294901761));
451 }
452 xnn_math_u32_sqrt__scalar_tflm(kBlockSize * sizeof(uint32_t), inputs.data(), outputs.data());
453 for (uint32_t i = 0; i < kBlockSize; i++) {
454 const uint32_t input = inputs[i];
455 const uint32_t output = outputs[i];
456 ASSERT_EQ(output, UINT32_C(0x00010000))
457 << "input = " << input << ", output = " << output;
458 }
459 }
460 }
461