xref: /aosp_15_r20/external/XNNPACK/src/math/sqrt-u64-scalar-cvtu32-sqrt-llrint.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2022 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
9*4bdc9457SAndroid Build Coastguard Worker 
10*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h>
11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math-stubs.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker 
xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(size_t n,const uint64_t * input,uint64_t * output)15*4bdc9457SAndroid Build Coastguard Worker void xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(
16*4bdc9457SAndroid Build Coastguard Worker     size_t n,
17*4bdc9457SAndroid Build Coastguard Worker     const uint64_t* input,
18*4bdc9457SAndroid Build Coastguard Worker     uint64_t* output)
19*4bdc9457SAndroid Build Coastguard Worker {
20*4bdc9457SAndroid Build Coastguard Worker   assert(n % sizeof(uint32_t) == 0);
21*4bdc9457SAndroid Build Coastguard Worker 
22*4bdc9457SAndroid Build Coastguard Worker   for (; n != 0; n -= sizeof(uint64_t)) {
23*4bdc9457SAndroid Build Coastguard Worker     const uint64_t vx = *input++;
24*4bdc9457SAndroid Build Coastguard Worker 
25*4bdc9457SAndroid Build Coastguard Worker     uint64_t vy = vx;
26*4bdc9457SAndroid Build Coastguard Worker     if XNN_LIKELY(vx != 0) {
27*4bdc9457SAndroid Build Coastguard Worker       const uint32_t vx_lo = (uint32_t) vx;
28*4bdc9457SAndroid Build Coastguard Worker       const uint32_t vx_hi = (uint32_t) (vx >> 32);
29*4bdc9457SAndroid Build Coastguard Worker       const double vf_hi = (double) vx_hi;
30*4bdc9457SAndroid Build Coastguard Worker       const double vf_lo = (double) vx_lo;
31*4bdc9457SAndroid Build Coastguard Worker       double vf = vf_hi * 0x1.0p+32 + vf_lo;
32*4bdc9457SAndroid Build Coastguard Worker       vf = sqrt(vf);
33*4bdc9457SAndroid Build Coastguard Worker       vy = (uint64_t) (int64_t) llrint(vf);
34*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_ARM || XNN_ARCH_X86
35*4bdc9457SAndroid Build Coastguard Worker         const uint64_t vsquared_y_less_x = math_mulext_u32((uint32_t) vy, (uint32_t) vy) - vx;
36*4bdc9457SAndroid Build Coastguard Worker       #else
37*4bdc9457SAndroid Build Coastguard Worker         const uint64_t vsquared_y_less_x = vy * vy - vx;
38*4bdc9457SAndroid Build Coastguard Worker       #endif
39*4bdc9457SAndroid Build Coastguard Worker       if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x + vy) < 0) {
40*4bdc9457SAndroid Build Coastguard Worker         vy += 1;
41*4bdc9457SAndroid Build Coastguard Worker       } else if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x - vy) >= 0) {
42*4bdc9457SAndroid Build Coastguard Worker         vy -= 1;
43*4bdc9457SAndroid Build Coastguard Worker       }
44*4bdc9457SAndroid Build Coastguard Worker     }
45*4bdc9457SAndroid Build Coastguard Worker 
46*4bdc9457SAndroid Build Coastguard Worker     *output++ = vy;
47*4bdc9457SAndroid Build Coastguard Worker   }
48*4bdc9457SAndroid Build Coastguard Worker }
49