1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2022 Google LLC 2*4bdc9457SAndroid Build Coastguard Worker // 3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the 4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree. 5*4bdc9457SAndroid Build Coastguard Worker 6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h> 7*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h> 8*4bdc9457SAndroid Build Coastguard Worker #include <math.h> 9*4bdc9457SAndroid Build Coastguard Worker 10*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/common.h> 11*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h> 12*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math-stubs.h> 13*4bdc9457SAndroid Build Coastguard Worker 14*4bdc9457SAndroid Build Coastguard Worker xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint(size_t n,const uint64_t * input,uint64_t * output)15*4bdc9457SAndroid Build Coastguard Workervoid xnn_math_u64_sqrt__scalar_cvtu32_sqrt_llrint( 16*4bdc9457SAndroid Build Coastguard Worker size_t n, 17*4bdc9457SAndroid Build Coastguard Worker const uint64_t* input, 18*4bdc9457SAndroid Build Coastguard Worker uint64_t* output) 19*4bdc9457SAndroid Build Coastguard Worker { 20*4bdc9457SAndroid Build Coastguard Worker assert(n % sizeof(uint32_t) == 0); 21*4bdc9457SAndroid Build Coastguard Worker 22*4bdc9457SAndroid Build Coastguard Worker for (; n != 0; n -= sizeof(uint64_t)) { 23*4bdc9457SAndroid Build Coastguard Worker const uint64_t vx = *input++; 24*4bdc9457SAndroid Build Coastguard Worker 25*4bdc9457SAndroid Build Coastguard Worker uint64_t vy = vx; 26*4bdc9457SAndroid Build Coastguard Worker if XNN_LIKELY(vx != 0) { 27*4bdc9457SAndroid Build Coastguard Worker const uint32_t vx_lo = (uint32_t) vx; 28*4bdc9457SAndroid Build Coastguard Worker const uint32_t vx_hi = (uint32_t) (vx >> 32); 29*4bdc9457SAndroid Build Coastguard Worker const double vf_hi = (double) vx_hi; 30*4bdc9457SAndroid Build Coastguard Worker const double vf_lo = (double) vx_lo; 31*4bdc9457SAndroid Build Coastguard Worker double vf = vf_hi * 0x1.0p+32 + vf_lo; 32*4bdc9457SAndroid Build Coastguard Worker vf = sqrt(vf); 33*4bdc9457SAndroid Build Coastguard Worker vy = (uint64_t) (int64_t) llrint(vf); 34*4bdc9457SAndroid Build Coastguard Worker #if XNN_ARCH_ARM || XNN_ARCH_X86 35*4bdc9457SAndroid Build Coastguard Worker const uint64_t vsquared_y_less_x = math_mulext_u32((uint32_t) vy, (uint32_t) vy) - vx; 36*4bdc9457SAndroid Build Coastguard Worker #else 37*4bdc9457SAndroid Build Coastguard Worker const uint64_t vsquared_y_less_x = vy * vy - vx; 38*4bdc9457SAndroid Build Coastguard Worker #endif 39*4bdc9457SAndroid Build Coastguard Worker if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x + vy) < 0) { 40*4bdc9457SAndroid Build Coastguard Worker vy += 1; 41*4bdc9457SAndroid Build Coastguard Worker } else if XNN_UNPREDICTABLE((int64_t) (vsquared_y_less_x - vy) >= 0) { 42*4bdc9457SAndroid Build Coastguard Worker vy -= 1; 43*4bdc9457SAndroid Build Coastguard Worker } 44*4bdc9457SAndroid Build Coastguard Worker } 45*4bdc9457SAndroid Build Coastguard Worker 46*4bdc9457SAndroid Build Coastguard Worker *output++ = vy; 47*4bdc9457SAndroid Build Coastguard Worker } 48*4bdc9457SAndroid Build Coastguard Worker } 49