1*412f47f9SXin Li /*
2*412f47f9SXin Li * Double-precision 10^x function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2023-2024, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li #include "math_config.h"
9*412f47f9SXin Li
10*412f47f9SXin Li #define N (1 << EXP_TABLE_BITS)
11*412f47f9SXin Li #define IndexMask (N - 1)
12*412f47f9SXin Li #define OFlowBound 0x1.34413509f79ffp8 /* log10(DBL_MAX). */
13*412f47f9SXin Li #define UFlowBound -0x1.5ep+8 /* -350. */
14*412f47f9SXin Li #define SmallTop 0x3c6 /* top12(0x1p-57). */
15*412f47f9SXin Li #define BigTop 0x407 /* top12(0x1p8). */
16*412f47f9SXin Li #define Thresh 0x41 /* BigTop - SmallTop. */
17*412f47f9SXin Li #define Shift __exp_data.shift
18*412f47f9SXin Li #define C(i) __exp_data.exp10_poly[i]
19*412f47f9SXin Li
20*412f47f9SXin Li static double
special_case(uint64_t sbits,double_t tmp,uint64_t ki)21*412f47f9SXin Li special_case (uint64_t sbits, double_t tmp, uint64_t ki)
22*412f47f9SXin Li {
23*412f47f9SXin Li double_t scale, y;
24*412f47f9SXin Li
25*412f47f9SXin Li if ((ki & 0x80000000) == 0)
26*412f47f9SXin Li {
27*412f47f9SXin Li /* The exponent of scale might have overflowed by 1. */
28*412f47f9SXin Li sbits -= 1ull << 52;
29*412f47f9SXin Li scale = asdouble (sbits);
30*412f47f9SXin Li y = 2 * (scale + scale * tmp);
31*412f47f9SXin Li return check_oflow (eval_as_double (y));
32*412f47f9SXin Li }
33*412f47f9SXin Li
34*412f47f9SXin Li /* n < 0, need special care in the subnormal range. */
35*412f47f9SXin Li sbits += 1022ull << 52;
36*412f47f9SXin Li scale = asdouble (sbits);
37*412f47f9SXin Li y = scale + scale * tmp;
38*412f47f9SXin Li
39*412f47f9SXin Li if (y < 1.0)
40*412f47f9SXin Li {
41*412f47f9SXin Li /* Round y to the right precision before scaling it into the subnormal
42*412f47f9SXin Li range to avoid double rounding that can cause 0.5+E/2 ulp error where
43*412f47f9SXin Li E is the worst-case ulp error outside the subnormal range. So this
44*412f47f9SXin Li is only useful if the goal is better than 1 ulp worst-case error. */
45*412f47f9SXin Li double_t lo = scale - y + scale * tmp;
46*412f47f9SXin Li double_t hi = 1.0 + y;
47*412f47f9SXin Li lo = 1.0 - hi + y + lo;
48*412f47f9SXin Li y = eval_as_double (hi + lo) - 1.0;
49*412f47f9SXin Li /* Avoid -0.0 with downward rounding. */
50*412f47f9SXin Li if (WANT_ROUNDING && y == 0.0)
51*412f47f9SXin Li y = 0.0;
52*412f47f9SXin Li /* The underflow exception needs to be signaled explicitly. */
53*412f47f9SXin Li force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
54*412f47f9SXin Li }
55*412f47f9SXin Li y = 0x1p-1022 * y;
56*412f47f9SXin Li
57*412f47f9SXin Li return check_uflow (y);
58*412f47f9SXin Li }
59*412f47f9SXin Li
60*412f47f9SXin Li /* Double-precision 10^x approximation. Largest observed error is ~0.513 ULP. */
61*412f47f9SXin Li double
exp10(double x)62*412f47f9SXin Li exp10 (double x)
63*412f47f9SXin Li {
64*412f47f9SXin Li uint64_t ix = asuint64 (x);
65*412f47f9SXin Li uint32_t abstop = (ix >> 52) & 0x7ff;
66*412f47f9SXin Li
67*412f47f9SXin Li if (unlikely (abstop - SmallTop >= Thresh))
68*412f47f9SXin Li {
69*412f47f9SXin Li if (abstop - SmallTop >= 0x80000000)
70*412f47f9SXin Li /* Avoid spurious underflow for tiny x.
71*412f47f9SXin Li Note: 0 is common input. */
72*412f47f9SXin Li return x + 1;
73*412f47f9SXin Li if (abstop == 0x7ff)
74*412f47f9SXin Li return ix == asuint64 (-INFINITY) ? 0.0 : x + 1.0;
75*412f47f9SXin Li if (x >= OFlowBound)
76*412f47f9SXin Li return __math_oflow (0);
77*412f47f9SXin Li if (x < UFlowBound)
78*412f47f9SXin Li return __math_uflow (0);
79*412f47f9SXin Li
80*412f47f9SXin Li /* Large x is special-cased below. */
81*412f47f9SXin Li abstop = 0;
82*412f47f9SXin Li }
83*412f47f9SXin Li
84*412f47f9SXin Li /* Reduce x: z = x * N / log10(2), k = round(z). */
85*412f47f9SXin Li double_t z = __exp_data.invlog10_2N * x;
86*412f47f9SXin Li double_t kd;
87*412f47f9SXin Li uint64_t ki;
88*412f47f9SXin Li #if TOINT_INTRINSICS
89*412f47f9SXin Li kd = roundtoint (z);
90*412f47f9SXin Li ki = converttoint (z);
91*412f47f9SXin Li #else
92*412f47f9SXin Li kd = eval_as_double (z + Shift);
93*412f47f9SXin Li ki = asuint64 (kd);
94*412f47f9SXin Li kd -= Shift;
95*412f47f9SXin Li #endif
96*412f47f9SXin Li
97*412f47f9SXin Li /* r = x - k * log10(2), r in [-0.5, 0.5]. */
98*412f47f9SXin Li double_t r = x;
99*412f47f9SXin Li r = __exp_data.neglog10_2hiN * kd + r;
100*412f47f9SXin Li r = __exp_data.neglog10_2loN * kd + r;
101*412f47f9SXin Li
102*412f47f9SXin Li /* exp10(x) = 2^(k/N) * 2^(r/N).
103*412f47f9SXin Li Approximate the two components separately. */
104*412f47f9SXin Li
105*412f47f9SXin Li /* s = 2^(k/N), using lookup table. */
106*412f47f9SXin Li uint64_t e = ki << (52 - EXP_TABLE_BITS);
107*412f47f9SXin Li uint64_t i = (ki & IndexMask) * 2;
108*412f47f9SXin Li uint64_t u = __exp_data.tab[i + 1];
109*412f47f9SXin Li uint64_t sbits = u + e;
110*412f47f9SXin Li
111*412f47f9SXin Li double_t tail = asdouble (__exp_data.tab[i]);
112*412f47f9SXin Li
113*412f47f9SXin Li /* 2^(r/N) ~= 1 + r * Poly(r). */
114*412f47f9SXin Li double_t r2 = r * r;
115*412f47f9SXin Li double_t p = C (0) + r * C (1);
116*412f47f9SXin Li double_t y = C (2) + r * C (3);
117*412f47f9SXin Li y = y + r2 * C (4);
118*412f47f9SXin Li y = p + r2 * y;
119*412f47f9SXin Li y = tail + y * r;
120*412f47f9SXin Li
121*412f47f9SXin Li if (unlikely (abstop == 0))
122*412f47f9SXin Li return special_case (sbits, y, ki);
123*412f47f9SXin Li
124*412f47f9SXin Li /* Assemble components:
125*412f47f9SXin Li y = 2^(r/N) * 2^(k/N)
126*412f47f9SXin Li ~= (y + 1) * s. */
127*412f47f9SXin Li double_t s = asdouble (sbits);
128*412f47f9SXin Li return eval_as_double (s * y + s);
129*412f47f9SXin Li }
130