1*412f47f9SXin Li /*
2*412f47f9SXin Li * Double-precision log(1+x) function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2022-2023, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li #include "poly_scalar_f64.h"
9*412f47f9SXin Li #include "math_config.h"
10*412f47f9SXin Li #include "pl_sig.h"
11*412f47f9SXin Li #include "pl_test.h"
12*412f47f9SXin Li
13*412f47f9SXin Li #define Ln2Hi 0x1.62e42fefa3800p-1
14*412f47f9SXin Li #define Ln2Lo 0x1.ef35793c76730p-45
15*412f47f9SXin Li #define HfRt2Top 0x3fe6a09e /* top32(asuint64(sqrt(2)/2)). */
16*412f47f9SXin Li #define OneMHfRt2Top \
17*412f47f9SXin Li 0x00095f62 /* top32(asuint64(1)) - top32(asuint64(sqrt(2)/2)). */
18*412f47f9SXin Li #define OneTop12 0x3ff
19*412f47f9SXin Li #define BottomMask 0xffffffff
20*412f47f9SXin Li #define OneMHfRt2 0x3fd2bec333018866
21*412f47f9SXin Li #define Rt2MOne 0x3fda827999fcef32
22*412f47f9SXin Li #define AbsMask 0x7fffffffffffffff
23*412f47f9SXin Li #define ExpM63 0x3c00
24*412f47f9SXin Li
25*412f47f9SXin Li static inline double
eval_poly(double f)26*412f47f9SXin Li eval_poly (double f)
27*412f47f9SXin Li {
28*412f47f9SXin Li double f2 = f * f;
29*412f47f9SXin Li double f4 = f2 * f2;
30*412f47f9SXin Li double f8 = f4 * f4;
31*412f47f9SXin Li return estrin_18_f64 (f, f2, f4, f8, f8 * f8, __log1p_data.coeffs);
32*412f47f9SXin Li }
33*412f47f9SXin Li
34*412f47f9SXin Li /* log1p approximation using polynomial on reduced interval. Largest
35*412f47f9SXin Li observed errors are near the lower boundary of the region where k
36*412f47f9SXin Li is 0.
37*412f47f9SXin Li Maximum measured error: 1.75ULP.
38*412f47f9SXin Li log1p(-0x1.2e1aea97b3e5cp-2) got -0x1.65fb8659a2f9p-2
39*412f47f9SXin Li want -0x1.65fb8659a2f92p-2. */
40*412f47f9SXin Li double
log1p(double x)41*412f47f9SXin Li log1p (double x)
42*412f47f9SXin Li {
43*412f47f9SXin Li uint64_t ix = asuint64 (x);
44*412f47f9SXin Li uint64_t ia = ix & AbsMask;
45*412f47f9SXin Li uint32_t ia16 = ia >> 48;
46*412f47f9SXin Li
47*412f47f9SXin Li /* Handle special cases first. */
48*412f47f9SXin Li if (unlikely (ia16 >= 0x7ff0 || ix >= 0xbff0000000000000
49*412f47f9SXin Li || ix == 0x8000000000000000))
50*412f47f9SXin Li {
51*412f47f9SXin Li if (ix == 0x8000000000000000 || ix == 0x7ff0000000000000)
52*412f47f9SXin Li {
53*412f47f9SXin Li /* x == -0 => log1p(x) = -0.
54*412f47f9SXin Li x == Inf => log1p(x) = Inf. */
55*412f47f9SXin Li return x;
56*412f47f9SXin Li }
57*412f47f9SXin Li if (ix == 0xbff0000000000000)
58*412f47f9SXin Li {
59*412f47f9SXin Li /* x == -1 => log1p(x) = -Inf. */
60*412f47f9SXin Li return __math_divzero (-1);
61*412f47f9SXin Li ;
62*412f47f9SXin Li }
63*412f47f9SXin Li if (ia16 >= 0x7ff0)
64*412f47f9SXin Li {
65*412f47f9SXin Li /* x == +/-NaN => log1p(x) = NaN. */
66*412f47f9SXin Li return __math_invalid (asdouble (ia));
67*412f47f9SXin Li }
68*412f47f9SXin Li /* x < -1 => log1p(x) = NaN.
69*412f47f9SXin Li x == -Inf => log1p(x) = NaN. */
70*412f47f9SXin Li return __math_invalid (x);
71*412f47f9SXin Li }
72*412f47f9SXin Li
73*412f47f9SXin Li /* With x + 1 = t * 2^k (where t = f + 1 and k is chosen such that f
74*412f47f9SXin Li is in [sqrt(2)/2, sqrt(2)]):
75*412f47f9SXin Li log1p(x) = k*log(2) + log1p(f).
76*412f47f9SXin Li
77*412f47f9SXin Li f may not be representable exactly, so we need a correction term:
78*412f47f9SXin Li let m = round(1 + x), c = (1 + x) - m.
79*412f47f9SXin Li c << m: at very small x, log1p(x) ~ x, hence:
80*412f47f9SXin Li log(1+x) - log(m) ~ c/m.
81*412f47f9SXin Li
82*412f47f9SXin Li We therefore calculate log1p(x) by k*log2 + log1p(f) + c/m. */
83*412f47f9SXin Li
84*412f47f9SXin Li uint64_t sign = ix & ~AbsMask;
85*412f47f9SXin Li if (ia <= OneMHfRt2 || (!sign && ia <= Rt2MOne))
86*412f47f9SXin Li {
87*412f47f9SXin Li if (unlikely (ia16 <= ExpM63))
88*412f47f9SXin Li {
89*412f47f9SXin Li /* If exponent of x <= -63 then shortcut the polynomial and avoid
90*412f47f9SXin Li underflow by just returning x, which is exactly rounded in this
91*412f47f9SXin Li region. */
92*412f47f9SXin Li return x;
93*412f47f9SXin Li }
94*412f47f9SXin Li /* If x is in [sqrt(2)/2 - 1, sqrt(2) - 1] then we can shortcut all the
95*412f47f9SXin Li logic below, as k = 0 and f = x and therefore representable exactly.
96*412f47f9SXin Li All we need is to return the polynomial. */
97*412f47f9SXin Li return fma (x, eval_poly (x) * x, x);
98*412f47f9SXin Li }
99*412f47f9SXin Li
100*412f47f9SXin Li /* Obtain correctly scaled k by manipulation in the exponent. */
101*412f47f9SXin Li double m = x + 1;
102*412f47f9SXin Li uint64_t mi = asuint64 (m);
103*412f47f9SXin Li uint32_t u = (mi >> 32) + OneMHfRt2Top;
104*412f47f9SXin Li int32_t k = (int32_t) (u >> 20) - OneTop12;
105*412f47f9SXin Li
106*412f47f9SXin Li /* Correction term c/m. */
107*412f47f9SXin Li double cm = (x - (m - 1)) / m;
108*412f47f9SXin Li
109*412f47f9SXin Li /* Reduce x to f in [sqrt(2)/2, sqrt(2)]. */
110*412f47f9SXin Li uint32_t utop = (u & 0x000fffff) + HfRt2Top;
111*412f47f9SXin Li uint64_t u_red = ((uint64_t) utop << 32) | (mi & BottomMask);
112*412f47f9SXin Li double f = asdouble (u_red) - 1;
113*412f47f9SXin Li
114*412f47f9SXin Li /* Approximate log1p(x) on the reduced input using a polynomial. Because
115*412f47f9SXin Li log1p(0)=0 we choose an approximation of the form:
116*412f47f9SXin Li x + C0*x^2 + C1*x^3 + C2x^4 + ...
117*412f47f9SXin Li Hence approximation has the form f + f^2 * P(f)
118*412f47f9SXin Li where P(x) = C0 + C1*x + C2x^2 + ... */
119*412f47f9SXin Li double p = fma (f, eval_poly (f) * f, f);
120*412f47f9SXin Li
121*412f47f9SXin Li double kd = k;
122*412f47f9SXin Li double y = fma (Ln2Lo, kd, cm);
123*412f47f9SXin Li return y + fma (Ln2Hi, kd, p);
124*412f47f9SXin Li }
125*412f47f9SXin Li
126*412f47f9SXin Li PL_SIG (S, D, 1, log1p, -0.9, 10.0)
127*412f47f9SXin Li PL_TEST_ULP (log1p, 1.26)
128*412f47f9SXin Li PL_TEST_SYM_INTERVAL (log1p, 0.0, 0x1p-23, 50000)
129*412f47f9SXin Li PL_TEST_SYM_INTERVAL (log1p, 0x1p-23, 0.001, 50000)
130*412f47f9SXin Li PL_TEST_SYM_INTERVAL (log1p, 0.001, 1.0, 50000)
131*412f47f9SXin Li PL_TEST_SYM_INTERVAL (log1p, 1.0, inf, 5000)
132