xref: /aosp_15_r20/external/XNNPACK/src/u64-u32-vsqrtshift/scalar-cvtu32-sqrt-cvtu32f64-x1.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2022 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
8*4bdc9457SAndroid Build Coastguard Worker #include <stdint.h>
9*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
10*4bdc9457SAndroid Build Coastguard Worker 
11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/vunary.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker 
xnn_u64_u32_vsqrtshift_ukernel__scalar_cvtu32_sqrt_cvtu32f64_x1(size_t batch,const uint64_t * input,uint32_t * output,uint32_t shift)15*4bdc9457SAndroid Build Coastguard Worker void xnn_u64_u32_vsqrtshift_ukernel__scalar_cvtu32_sqrt_cvtu32f64_x1(
16*4bdc9457SAndroid Build Coastguard Worker     size_t batch,
17*4bdc9457SAndroid Build Coastguard Worker     const uint64_t* input,
18*4bdc9457SAndroid Build Coastguard Worker     uint32_t* output,
19*4bdc9457SAndroid Build Coastguard Worker     uint32_t shift)
20*4bdc9457SAndroid Build Coastguard Worker {
21*4bdc9457SAndroid Build Coastguard Worker   assert(batch != 0);
22*4bdc9457SAndroid Build Coastguard Worker   assert(input != NULL);
23*4bdc9457SAndroid Build Coastguard Worker   assert(output != NULL);
24*4bdc9457SAndroid Build Coastguard Worker   assert(shift < 32);
25*4bdc9457SAndroid Build Coastguard Worker 
26*4bdc9457SAndroid Build Coastguard Worker   do {
27*4bdc9457SAndroid Build Coastguard Worker     const uint64_t vx = *input++;
28*4bdc9457SAndroid Build Coastguard Worker 
29*4bdc9457SAndroid Build Coastguard Worker     uint64_t vy = vx;
30*4bdc9457SAndroid Build Coastguard Worker     const uint32_t vx_hi = (uint32_t) (vx >> 32);
31*4bdc9457SAndroid Build Coastguard Worker     const uint32_t vx_lo = (uint32_t) vx;
32*4bdc9457SAndroid Build Coastguard Worker     if XNN_LIKELY(vx != 0) {
33*4bdc9457SAndroid Build Coastguard Worker       const double vf_hi = (double) vx_hi;
34*4bdc9457SAndroid Build Coastguard Worker       const double vf_lo = (double) vx_lo;
35*4bdc9457SAndroid Build Coastguard Worker       double vf = vf_hi * 0x1.0p+32 + vf_lo;
36*4bdc9457SAndroid Build Coastguard Worker       vf = sqrt(vf);
37*4bdc9457SAndroid Build Coastguard Worker       vy = math_cvt_sat_u32_f64(vf);
38*4bdc9457SAndroid Build Coastguard Worker       #if XNN_ARCH_ARM || XNN_ARCH_X86
39*4bdc9457SAndroid Build Coastguard Worker         const uint64_t vsquared_y_less_x = math_mulext_u32((uint32_t) vy, (uint32_t) vy) - vx;
40*4bdc9457SAndroid Build Coastguard Worker       #else
41*4bdc9457SAndroid Build Coastguard Worker         const uint64_t vsquared_y_less_x = vy * vy - vx;
42*4bdc9457SAndroid Build Coastguard Worker       #endif
43*4bdc9457SAndroid Build Coastguard Worker       if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x + vy) < 0) {
44*4bdc9457SAndroid Build Coastguard Worker         vy += 1;
45*4bdc9457SAndroid Build Coastguard Worker       } else if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x - vy) >= 0) {
46*4bdc9457SAndroid Build Coastguard Worker         vy -= 1;
47*4bdc9457SAndroid Build Coastguard Worker       }
48*4bdc9457SAndroid Build Coastguard Worker     }
49*4bdc9457SAndroid Build Coastguard Worker 
50*4bdc9457SAndroid Build Coastguard Worker     // Match TFLM is producing incorrect result for high 64-bit inputs
51*4bdc9457SAndroid Build Coastguard Worker     const uint32_t vy_lo = (uint32_t) vy;
52*4bdc9457SAndroid Build Coastguard Worker     const uint32_t vy_hi = (uint32_t) (vy >> 32);
53*4bdc9457SAndroid Build Coastguard Worker     uint32_t vout = vy_lo | -vy_hi;
54*4bdc9457SAndroid Build Coastguard Worker     // Match TFLM is producing incorrect result for high 32-bit inputs
55*4bdc9457SAndroid Build Coastguard Worker     if XNN_LIKELY(vx_hi == 0) {
56*4bdc9457SAndroid Build Coastguard Worker       if (vout == UINT32_C(0x00010000)) {
57*4bdc9457SAndroid Build Coastguard Worker         vout -= 1;
58*4bdc9457SAndroid Build Coastguard Worker       }
59*4bdc9457SAndroid Build Coastguard Worker     }
60*4bdc9457SAndroid Build Coastguard Worker 
61*4bdc9457SAndroid Build Coastguard Worker     *output++ = vout >> shift;
62*4bdc9457SAndroid Build Coastguard Worker 
63*4bdc9457SAndroid Build Coastguard Worker     batch -= sizeof(uint64_t);
64*4bdc9457SAndroid Build Coastguard Worker   } while (batch != 0);
65*4bdc9457SAndroid Build Coastguard Worker }
66