1*c9945492SAndroid Build Coastguard Worker #include <stdint.h>
2*c9945492SAndroid Build Coastguard Worker #include <math.h>
3*c9945492SAndroid Build Coastguard Worker #include <float.h>
4*c9945492SAndroid Build Coastguard Worker #include "libm.h"
5*c9945492SAndroid Build Coastguard Worker
6*c9945492SAndroid Build Coastguard Worker #if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
sqrtl(long double x)7*c9945492SAndroid Build Coastguard Worker long double sqrtl(long double x)
8*c9945492SAndroid Build Coastguard Worker {
9*c9945492SAndroid Build Coastguard Worker return sqrt(x);
10*c9945492SAndroid Build Coastguard Worker }
11*c9945492SAndroid Build Coastguard Worker #elif (LDBL_MANT_DIG == 113 || LDBL_MANT_DIG == 64) && LDBL_MAX_EXP == 16384
12*c9945492SAndroid Build Coastguard Worker #include "sqrt_data.h"
13*c9945492SAndroid Build Coastguard Worker
14*c9945492SAndroid Build Coastguard Worker #define FENV_SUPPORT 1
15*c9945492SAndroid Build Coastguard Worker
16*c9945492SAndroid Build Coastguard Worker typedef struct {
17*c9945492SAndroid Build Coastguard Worker uint64_t hi;
18*c9945492SAndroid Build Coastguard Worker uint64_t lo;
19*c9945492SAndroid Build Coastguard Worker } u128;
20*c9945492SAndroid Build Coastguard Worker
21*c9945492SAndroid Build Coastguard Worker /* top: 16 bit sign+exponent, x: significand. */
mkldbl(uint64_t top,u128 x)22*c9945492SAndroid Build Coastguard Worker static inline long double mkldbl(uint64_t top, u128 x)
23*c9945492SAndroid Build Coastguard Worker {
24*c9945492SAndroid Build Coastguard Worker union ldshape u;
25*c9945492SAndroid Build Coastguard Worker #if LDBL_MANT_DIG == 113
26*c9945492SAndroid Build Coastguard Worker u.i2.hi = x.hi;
27*c9945492SAndroid Build Coastguard Worker u.i2.lo = x.lo;
28*c9945492SAndroid Build Coastguard Worker u.i2.hi &= 0x0000ffffffffffff;
29*c9945492SAndroid Build Coastguard Worker u.i2.hi |= top << 48;
30*c9945492SAndroid Build Coastguard Worker #elif LDBL_MANT_DIG == 64
31*c9945492SAndroid Build Coastguard Worker u.i.se = top;
32*c9945492SAndroid Build Coastguard Worker u.i.m = x.lo;
33*c9945492SAndroid Build Coastguard Worker /* force the top bit on non-zero (and non-subnormal) results. */
34*c9945492SAndroid Build Coastguard Worker if (top & 0x7fff)
35*c9945492SAndroid Build Coastguard Worker u.i.m |= 0x8000000000000000;
36*c9945492SAndroid Build Coastguard Worker #endif
37*c9945492SAndroid Build Coastguard Worker return u.f;
38*c9945492SAndroid Build Coastguard Worker }
39*c9945492SAndroid Build Coastguard Worker
40*c9945492SAndroid Build Coastguard Worker /* return: top 16 bit is sign+exp and following bits are the significand. */
asu128(long double x)41*c9945492SAndroid Build Coastguard Worker static inline u128 asu128(long double x)
42*c9945492SAndroid Build Coastguard Worker {
43*c9945492SAndroid Build Coastguard Worker union ldshape u = {.f=x};
44*c9945492SAndroid Build Coastguard Worker u128 r;
45*c9945492SAndroid Build Coastguard Worker #if LDBL_MANT_DIG == 113
46*c9945492SAndroid Build Coastguard Worker r.hi = u.i2.hi;
47*c9945492SAndroid Build Coastguard Worker r.lo = u.i2.lo;
48*c9945492SAndroid Build Coastguard Worker #elif LDBL_MANT_DIG == 64
49*c9945492SAndroid Build Coastguard Worker r.lo = u.i.m<<49;
50*c9945492SAndroid Build Coastguard Worker /* ignore the top bit: pseudo numbers are not handled. */
51*c9945492SAndroid Build Coastguard Worker r.hi = u.i.m>>15;
52*c9945492SAndroid Build Coastguard Worker r.hi &= 0x0000ffffffffffff;
53*c9945492SAndroid Build Coastguard Worker r.hi |= (uint64_t)u.i.se << 48;
54*c9945492SAndroid Build Coastguard Worker #endif
55*c9945492SAndroid Build Coastguard Worker return r;
56*c9945492SAndroid Build Coastguard Worker }
57*c9945492SAndroid Build Coastguard Worker
58*c9945492SAndroid Build Coastguard Worker /* returns a*b*2^-32 - e, with error 0 <= e < 1. */
mul32(uint32_t a,uint32_t b)59*c9945492SAndroid Build Coastguard Worker static inline uint32_t mul32(uint32_t a, uint32_t b)
60*c9945492SAndroid Build Coastguard Worker {
61*c9945492SAndroid Build Coastguard Worker return (uint64_t)a*b >> 32;
62*c9945492SAndroid Build Coastguard Worker }
63*c9945492SAndroid Build Coastguard Worker
64*c9945492SAndroid Build Coastguard Worker /* returns a*b*2^-64 - e, with error 0 <= e < 3. */
mul64(uint64_t a,uint64_t b)65*c9945492SAndroid Build Coastguard Worker static inline uint64_t mul64(uint64_t a, uint64_t b)
66*c9945492SAndroid Build Coastguard Worker {
67*c9945492SAndroid Build Coastguard Worker uint64_t ahi = a>>32;
68*c9945492SAndroid Build Coastguard Worker uint64_t alo = a&0xffffffff;
69*c9945492SAndroid Build Coastguard Worker uint64_t bhi = b>>32;
70*c9945492SAndroid Build Coastguard Worker uint64_t blo = b&0xffffffff;
71*c9945492SAndroid Build Coastguard Worker return ahi*bhi + (ahi*blo >> 32) + (alo*bhi >> 32);
72*c9945492SAndroid Build Coastguard Worker }
73*c9945492SAndroid Build Coastguard Worker
add64(u128 a,uint64_t b)74*c9945492SAndroid Build Coastguard Worker static inline u128 add64(u128 a, uint64_t b)
75*c9945492SAndroid Build Coastguard Worker {
76*c9945492SAndroid Build Coastguard Worker u128 r;
77*c9945492SAndroid Build Coastguard Worker r.lo = a.lo + b;
78*c9945492SAndroid Build Coastguard Worker r.hi = a.hi;
79*c9945492SAndroid Build Coastguard Worker if (r.lo < a.lo)
80*c9945492SAndroid Build Coastguard Worker r.hi++;
81*c9945492SAndroid Build Coastguard Worker return r;
82*c9945492SAndroid Build Coastguard Worker }
83*c9945492SAndroid Build Coastguard Worker
add128(u128 a,u128 b)84*c9945492SAndroid Build Coastguard Worker static inline u128 add128(u128 a, u128 b)
85*c9945492SAndroid Build Coastguard Worker {
86*c9945492SAndroid Build Coastguard Worker u128 r;
87*c9945492SAndroid Build Coastguard Worker r.lo = a.lo + b.lo;
88*c9945492SAndroid Build Coastguard Worker r.hi = a.hi + b.hi;
89*c9945492SAndroid Build Coastguard Worker if (r.lo < a.lo)
90*c9945492SAndroid Build Coastguard Worker r.hi++;
91*c9945492SAndroid Build Coastguard Worker return r;
92*c9945492SAndroid Build Coastguard Worker }
93*c9945492SAndroid Build Coastguard Worker
sub64(u128 a,uint64_t b)94*c9945492SAndroid Build Coastguard Worker static inline u128 sub64(u128 a, uint64_t b)
95*c9945492SAndroid Build Coastguard Worker {
96*c9945492SAndroid Build Coastguard Worker u128 r;
97*c9945492SAndroid Build Coastguard Worker r.lo = a.lo - b;
98*c9945492SAndroid Build Coastguard Worker r.hi = a.hi;
99*c9945492SAndroid Build Coastguard Worker if (a.lo < b)
100*c9945492SAndroid Build Coastguard Worker r.hi--;
101*c9945492SAndroid Build Coastguard Worker return r;
102*c9945492SAndroid Build Coastguard Worker }
103*c9945492SAndroid Build Coastguard Worker
sub128(u128 a,u128 b)104*c9945492SAndroid Build Coastguard Worker static inline u128 sub128(u128 a, u128 b)
105*c9945492SAndroid Build Coastguard Worker {
106*c9945492SAndroid Build Coastguard Worker u128 r;
107*c9945492SAndroid Build Coastguard Worker r.lo = a.lo - b.lo;
108*c9945492SAndroid Build Coastguard Worker r.hi = a.hi - b.hi;
109*c9945492SAndroid Build Coastguard Worker if (a.lo < b.lo)
110*c9945492SAndroid Build Coastguard Worker r.hi--;
111*c9945492SAndroid Build Coastguard Worker return r;
112*c9945492SAndroid Build Coastguard Worker }
113*c9945492SAndroid Build Coastguard Worker
114*c9945492SAndroid Build Coastguard Worker /* a<<n, 0 <= n <= 127 */
lsh(u128 a,int n)115*c9945492SAndroid Build Coastguard Worker static inline u128 lsh(u128 a, int n)
116*c9945492SAndroid Build Coastguard Worker {
117*c9945492SAndroid Build Coastguard Worker if (n == 0)
118*c9945492SAndroid Build Coastguard Worker return a;
119*c9945492SAndroid Build Coastguard Worker if (n >= 64) {
120*c9945492SAndroid Build Coastguard Worker a.hi = a.lo<<(n-64);
121*c9945492SAndroid Build Coastguard Worker a.lo = 0;
122*c9945492SAndroid Build Coastguard Worker } else {
123*c9945492SAndroid Build Coastguard Worker a.hi = (a.hi<<n) | (a.lo>>(64-n));
124*c9945492SAndroid Build Coastguard Worker a.lo = a.lo<<n;
125*c9945492SAndroid Build Coastguard Worker }
126*c9945492SAndroid Build Coastguard Worker return a;
127*c9945492SAndroid Build Coastguard Worker }
128*c9945492SAndroid Build Coastguard Worker
129*c9945492SAndroid Build Coastguard Worker /* a>>n, 0 <= n <= 127 */
rsh(u128 a,int n)130*c9945492SAndroid Build Coastguard Worker static inline u128 rsh(u128 a, int n)
131*c9945492SAndroid Build Coastguard Worker {
132*c9945492SAndroid Build Coastguard Worker if (n == 0)
133*c9945492SAndroid Build Coastguard Worker return a;
134*c9945492SAndroid Build Coastguard Worker if (n >= 64) {
135*c9945492SAndroid Build Coastguard Worker a.lo = a.hi>>(n-64);
136*c9945492SAndroid Build Coastguard Worker a.hi = 0;
137*c9945492SAndroid Build Coastguard Worker } else {
138*c9945492SAndroid Build Coastguard Worker a.lo = (a.lo>>n) | (a.hi<<(64-n));
139*c9945492SAndroid Build Coastguard Worker a.hi = a.hi>>n;
140*c9945492SAndroid Build Coastguard Worker }
141*c9945492SAndroid Build Coastguard Worker return a;
142*c9945492SAndroid Build Coastguard Worker }
143*c9945492SAndroid Build Coastguard Worker
144*c9945492SAndroid Build Coastguard Worker /* returns a*b exactly. */
mul64_128(uint64_t a,uint64_t b)145*c9945492SAndroid Build Coastguard Worker static inline u128 mul64_128(uint64_t a, uint64_t b)
146*c9945492SAndroid Build Coastguard Worker {
147*c9945492SAndroid Build Coastguard Worker u128 r;
148*c9945492SAndroid Build Coastguard Worker uint64_t ahi = a>>32;
149*c9945492SAndroid Build Coastguard Worker uint64_t alo = a&0xffffffff;
150*c9945492SAndroid Build Coastguard Worker uint64_t bhi = b>>32;
151*c9945492SAndroid Build Coastguard Worker uint64_t blo = b&0xffffffff;
152*c9945492SAndroid Build Coastguard Worker uint64_t lo1 = ((ahi*blo)&0xffffffff) + ((alo*bhi)&0xffffffff) + (alo*blo>>32);
153*c9945492SAndroid Build Coastguard Worker uint64_t lo2 = (alo*blo)&0xffffffff;
154*c9945492SAndroid Build Coastguard Worker r.hi = ahi*bhi + (ahi*blo>>32) + (alo*bhi>>32) + (lo1>>32);
155*c9945492SAndroid Build Coastguard Worker r.lo = (lo1<<32) + lo2;
156*c9945492SAndroid Build Coastguard Worker return r;
157*c9945492SAndroid Build Coastguard Worker }
158*c9945492SAndroid Build Coastguard Worker
159*c9945492SAndroid Build Coastguard Worker /* returns a*b*2^-128 - e, with error 0 <= e < 7. */
mul128(u128 a,u128 b)160*c9945492SAndroid Build Coastguard Worker static inline u128 mul128(u128 a, u128 b)
161*c9945492SAndroid Build Coastguard Worker {
162*c9945492SAndroid Build Coastguard Worker u128 hi = mul64_128(a.hi, b.hi);
163*c9945492SAndroid Build Coastguard Worker uint64_t m1 = mul64(a.hi, b.lo);
164*c9945492SAndroid Build Coastguard Worker uint64_t m2 = mul64(a.lo, b.hi);
165*c9945492SAndroid Build Coastguard Worker return add64(add64(hi, m1), m2);
166*c9945492SAndroid Build Coastguard Worker }
167*c9945492SAndroid Build Coastguard Worker
168*c9945492SAndroid Build Coastguard Worker /* returns a*b % 2^128. */
mul128_tail(u128 a,u128 b)169*c9945492SAndroid Build Coastguard Worker static inline u128 mul128_tail(u128 a, u128 b)
170*c9945492SAndroid Build Coastguard Worker {
171*c9945492SAndroid Build Coastguard Worker u128 lo = mul64_128(a.lo, b.lo);
172*c9945492SAndroid Build Coastguard Worker lo.hi += a.hi*b.lo + a.lo*b.hi;
173*c9945492SAndroid Build Coastguard Worker return lo;
174*c9945492SAndroid Build Coastguard Worker }
175*c9945492SAndroid Build Coastguard Worker
176*c9945492SAndroid Build Coastguard Worker
177*c9945492SAndroid Build Coastguard Worker /* see sqrt.c for detailed comments. */
178*c9945492SAndroid Build Coastguard Worker
sqrtl(long double x)179*c9945492SAndroid Build Coastguard Worker long double sqrtl(long double x)
180*c9945492SAndroid Build Coastguard Worker {
181*c9945492SAndroid Build Coastguard Worker u128 ix, ml;
182*c9945492SAndroid Build Coastguard Worker uint64_t top;
183*c9945492SAndroid Build Coastguard Worker
184*c9945492SAndroid Build Coastguard Worker ix = asu128(x);
185*c9945492SAndroid Build Coastguard Worker top = ix.hi >> 48;
186*c9945492SAndroid Build Coastguard Worker if (predict_false(top - 0x0001 >= 0x7fff - 0x0001)) {
187*c9945492SAndroid Build Coastguard Worker /* x < 0x1p-16382 or inf or nan. */
188*c9945492SAndroid Build Coastguard Worker if (2*ix.hi == 0 && ix.lo == 0)
189*c9945492SAndroid Build Coastguard Worker return x;
190*c9945492SAndroid Build Coastguard Worker if (ix.hi == 0x7fff000000000000 && ix.lo == 0)
191*c9945492SAndroid Build Coastguard Worker return x;
192*c9945492SAndroid Build Coastguard Worker if (top >= 0x7fff)
193*c9945492SAndroid Build Coastguard Worker return __math_invalidl(x);
194*c9945492SAndroid Build Coastguard Worker /* x is subnormal, normalize it. */
195*c9945492SAndroid Build Coastguard Worker ix = asu128(x * 0x1p112);
196*c9945492SAndroid Build Coastguard Worker top = ix.hi >> 48;
197*c9945492SAndroid Build Coastguard Worker top -= 112;
198*c9945492SAndroid Build Coastguard Worker }
199*c9945492SAndroid Build Coastguard Worker
200*c9945492SAndroid Build Coastguard Worker /* x = 4^e m; with int e and m in [1, 4) */
201*c9945492SAndroid Build Coastguard Worker int even = top & 1;
202*c9945492SAndroid Build Coastguard Worker ml = lsh(ix, 15);
203*c9945492SAndroid Build Coastguard Worker ml.hi |= 0x8000000000000000;
204*c9945492SAndroid Build Coastguard Worker if (even) ml = rsh(ml, 1);
205*c9945492SAndroid Build Coastguard Worker top = (top + 0x3fff) >> 1;
206*c9945492SAndroid Build Coastguard Worker
207*c9945492SAndroid Build Coastguard Worker /* r ~ 1/sqrt(m) */
208*c9945492SAndroid Build Coastguard Worker const uint64_t three = 0xc0000000;
209*c9945492SAndroid Build Coastguard Worker uint64_t r, s, d, u, i;
210*c9945492SAndroid Build Coastguard Worker i = (ix.hi >> 42) % 128;
211*c9945492SAndroid Build Coastguard Worker r = (uint32_t)__rsqrt_tab[i] << 16;
212*c9945492SAndroid Build Coastguard Worker /* |r sqrt(m) - 1| < 0x1p-8 */
213*c9945492SAndroid Build Coastguard Worker s = mul32(ml.hi>>32, r);
214*c9945492SAndroid Build Coastguard Worker d = mul32(s, r);
215*c9945492SAndroid Build Coastguard Worker u = three - d;
216*c9945492SAndroid Build Coastguard Worker r = mul32(u, r) << 1;
217*c9945492SAndroid Build Coastguard Worker /* |r sqrt(m) - 1| < 0x1.7bp-16, switch to 64bit */
218*c9945492SAndroid Build Coastguard Worker r = r<<32;
219*c9945492SAndroid Build Coastguard Worker s = mul64(ml.hi, r);
220*c9945492SAndroid Build Coastguard Worker d = mul64(s, r);
221*c9945492SAndroid Build Coastguard Worker u = (three<<32) - d;
222*c9945492SAndroid Build Coastguard Worker r = mul64(u, r) << 1;
223*c9945492SAndroid Build Coastguard Worker /* |r sqrt(m) - 1| < 0x1.a5p-31 */
224*c9945492SAndroid Build Coastguard Worker s = mul64(u, s) << 1;
225*c9945492SAndroid Build Coastguard Worker d = mul64(s, r);
226*c9945492SAndroid Build Coastguard Worker u = (three<<32) - d;
227*c9945492SAndroid Build Coastguard Worker r = mul64(u, r) << 1;
228*c9945492SAndroid Build Coastguard Worker /* |r sqrt(m) - 1| < 0x1.c001p-59, switch to 128bit */
229*c9945492SAndroid Build Coastguard Worker
230*c9945492SAndroid Build Coastguard Worker const u128 threel = {.hi=three<<32, .lo=0};
231*c9945492SAndroid Build Coastguard Worker u128 rl, sl, dl, ul;
232*c9945492SAndroid Build Coastguard Worker rl.hi = r;
233*c9945492SAndroid Build Coastguard Worker rl.lo = 0;
234*c9945492SAndroid Build Coastguard Worker sl = mul128(ml, rl);
235*c9945492SAndroid Build Coastguard Worker dl = mul128(sl, rl);
236*c9945492SAndroid Build Coastguard Worker ul = sub128(threel, dl);
237*c9945492SAndroid Build Coastguard Worker sl = mul128(ul, sl); /* repr: 3.125 */
238*c9945492SAndroid Build Coastguard Worker /* -0x1p-116 < s - sqrt(m) < 0x3.8001p-125 */
239*c9945492SAndroid Build Coastguard Worker sl = rsh(sub64(sl, 4), 125-(LDBL_MANT_DIG-1));
240*c9945492SAndroid Build Coastguard Worker /* s < sqrt(m) < s + 1 ULP + tiny */
241*c9945492SAndroid Build Coastguard Worker
242*c9945492SAndroid Build Coastguard Worker long double y;
243*c9945492SAndroid Build Coastguard Worker u128 d2, d1, d0;
244*c9945492SAndroid Build Coastguard Worker d0 = sub128(lsh(ml, 2*(LDBL_MANT_DIG-1)-126), mul128_tail(sl,sl));
245*c9945492SAndroid Build Coastguard Worker d1 = sub128(sl, d0);
246*c9945492SAndroid Build Coastguard Worker d2 = add128(add64(sl, 1), d1);
247*c9945492SAndroid Build Coastguard Worker sl = add64(sl, d1.hi >> 63);
248*c9945492SAndroid Build Coastguard Worker y = mkldbl(top, sl);
249*c9945492SAndroid Build Coastguard Worker if (FENV_SUPPORT) {
250*c9945492SAndroid Build Coastguard Worker /* handle rounding modes and inexact exception. */
251*c9945492SAndroid Build Coastguard Worker top = predict_false((d2.hi|d2.lo)==0) ? 0 : 1;
252*c9945492SAndroid Build Coastguard Worker top |= ((d1.hi^d2.hi)&0x8000000000000000) >> 48;
253*c9945492SAndroid Build Coastguard Worker y += mkldbl(top, (u128){0});
254*c9945492SAndroid Build Coastguard Worker }
255*c9945492SAndroid Build Coastguard Worker return y;
256*c9945492SAndroid Build Coastguard Worker }
257*c9945492SAndroid Build Coastguard Worker #else
258*c9945492SAndroid Build Coastguard Worker #error unsupported long double format
259*c9945492SAndroid Build Coastguard Worker #endif
260