xref: /aosp_15_r20/external/XNNPACK/src/math/sqrt-u64-scalar-cvtu32-sqrt-cvtsatu32f64.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <stddef.h>
8 #include <math.h>
9 
10 #include <xnnpack/common.h>
11 #include <xnnpack/math.h>
12 #include <xnnpack/math-stubs.h>
13 
14 
xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(size_t n,const uint64_t * input,uint64_t * output)15 void xnn_math_u64_sqrt__scalar_cvtu32_sqrt_cvtsatu32f64(
16     size_t n,
17     const uint64_t* input,
18     uint64_t* output)
19 {
20   assert(n % sizeof(uint32_t) == 0);
21 
22   for (; n != 0; n -= sizeof(uint64_t)) {
23     const uint64_t vx = *input++;
24 
25     uint64_t vy = vx;
26     if XNN_LIKELY(vx != 0) {
27       const uint32_t vx_lo = (uint32_t) vx;
28       const uint32_t vx_hi = (uint32_t) (vx >> 32);
29       const double vf_hi = (double) vx_hi;
30       const double vf_lo = (double) vx_lo;
31       double vf = vf_hi * 0x1.0p+32 + vf_lo;
32       vf = sqrt(vf);
33       vy = math_cvt_sat_u32_f64(vf);
34       #if XNN_ARCH_ARM || XNN_ARCH_X86
35         const uint64_t vsquared_y_less_x = math_mulext_u32((uint32_t) vy, (uint32_t) vy) - vx;
36       #else
37         const uint64_t vsquared_y_less_x = vy * vy - vx;
38       #endif
39       if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x + vy) < 0) {
40         vy += 1;
41       } else if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x - vy) >= 0) {
42         vy -= 1;
43       }
44     }
45 
46     *output++ = vy;
47   }
48 }
49