1*412f47f9SXin Li /*
2*412f47f9SXin Li * Extended precision inverse error function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2023, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li #define _GNU_SOURCE
8*412f47f9SXin Li #include <math.h>
9*412f47f9SXin Li #include <stdbool.h>
10*412f47f9SXin Li #include <float.h>
11*412f47f9SXin Li
12*412f47f9SXin Li #include "math_config.h"
13*412f47f9SXin Li #include "poly_scalar_f64.h"
14*412f47f9SXin Li
15*412f47f9SXin Li #define SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p0l
16*412f47f9SXin Li #define HF_SQRT_PIl 0x1.c5bf891b4ef6aa79c3b0520d5db9p-1l
17*412f47f9SXin Li
18*412f47f9SXin Li const static struct
19*412f47f9SXin Li {
20*412f47f9SXin Li /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
21*412f47f9SXin Li coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
22*412f47f9SXin Li of the denominator. */
23*412f47f9SXin Li double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
24*412f47f9SXin Li } data = {
25*412f47f9SXin Li .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
26*412f47f9SXin Li -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
27*412f47f9SXin Li 0x1.689181bbafd0cp-3 },
28*412f47f9SXin Li .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
29*412f47f9SXin Li -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
30*412f47f9SXin Li 0x1p+0 },
31*412f47f9SXin Li .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
32*412f47f9SXin Li 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
33*412f47f9SXin Li -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
34*412f47f9SXin Li .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
35*412f47f9SXin Li 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
36*412f47f9SXin Li -0x1.4075c56404eecp+3, 0x1p+0 },
37*412f47f9SXin Li .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
38*412f47f9SXin Li 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
39*412f47f9SXin Li 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
40*412f47f9SXin Li .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
41*412f47f9SXin Li 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
42*412f47f9SXin Li 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
43*412f47f9SXin Li 0x1p+0 }
44*412f47f9SXin Li };
45*412f47f9SXin Li
46*412f47f9SXin Li /* Inverse error function approximation, based on rational approximation as
47*412f47f9SXin Li described in
48*412f47f9SXin Li J. M. Blair, C. A. Edwards, and J. H. Johnson,
49*412f47f9SXin Li "Rational Chebyshev approximations for the inverse of the error function",
50*412f47f9SXin Li Math. Comp. 30, pp. 827--830 (1976).
51*412f47f9SXin Li https://doi.org/10.1090/S0025-5718-1976-0421040-7. */
52*412f47f9SXin Li static inline double
__erfinv(double x)53*412f47f9SXin Li __erfinv (double x)
54*412f47f9SXin Li {
55*412f47f9SXin Li if (x == 1.0)
56*412f47f9SXin Li return __math_oflow (0);
57*412f47f9SXin Li if (x == -1.0)
58*412f47f9SXin Li return __math_oflow (1);
59*412f47f9SXin Li
60*412f47f9SXin Li double a = fabs (x);
61*412f47f9SXin Li if (a > 1)
62*412f47f9SXin Li return __math_invalid (x);
63*412f47f9SXin Li
64*412f47f9SXin Li if (a <= 0.75)
65*412f47f9SXin Li {
66*412f47f9SXin Li double t = x * x - 0.5625;
67*412f47f9SXin Li return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
68*412f47f9SXin Li }
69*412f47f9SXin Li
70*412f47f9SXin Li if (a <= 0.9375)
71*412f47f9SXin Li {
72*412f47f9SXin Li double t = x * x - 0.87890625;
73*412f47f9SXin Li return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
74*412f47f9SXin Li }
75*412f47f9SXin Li
76*412f47f9SXin Li double t = 1.0 / (sqrtl (-log1pl (-a)));
77*412f47f9SXin Li return horner_8_f64 (t, data.P_57)
78*412f47f9SXin Li / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
79*412f47f9SXin Li }
80*412f47f9SXin Li
81*412f47f9SXin Li /* Extended-precision variant, which uses the above (or asymptotic estimate) as
82*412f47f9SXin Li starting point for Newton refinement. This implementation is a port to C of
83*412f47f9SXin Li the version in the SpecialFunctions.jl Julia package, with relaxed stopping
84*412f47f9SXin Li criteria for the Newton refinement. */
85*412f47f9SXin Li long double
erfinvl(long double x)86*412f47f9SXin Li erfinvl (long double x)
87*412f47f9SXin Li {
88*412f47f9SXin Li if (x == 0)
89*412f47f9SXin Li return 0;
90*412f47f9SXin Li
91*412f47f9SXin Li double yf = __erfinv (x);
92*412f47f9SXin Li long double y;
93*412f47f9SXin Li if (isfinite (yf))
94*412f47f9SXin Li y = yf;
95*412f47f9SXin Li else
96*412f47f9SXin Li {
97*412f47f9SXin Li /* Double overflowed, use asymptotic estimate instead. */
98*412f47f9SXin Li y = copysignl (sqrtl (-logl (1.0l - fabsl (x)) * SQRT_PIl), x);
99*412f47f9SXin Li if (!isfinite (y))
100*412f47f9SXin Li return y;
101*412f47f9SXin Li }
102*412f47f9SXin Li
103*412f47f9SXin Li double eps = fabs (yf - nextafter (yf, 0));
104*412f47f9SXin Li while (true)
105*412f47f9SXin Li {
106*412f47f9SXin Li long double dy = HF_SQRT_PIl * (erfl (y) - x) * exp (y * y);
107*412f47f9SXin Li y -= dy;
108*412f47f9SXin Li /* Stopping criterion is different to Julia implementation, but is enough
109*412f47f9SXin Li to ensure result is accurate when rounded to double-precision. */
110*412f47f9SXin Li if (fabsl (dy) < eps)
111*412f47f9SXin Li break;
112*412f47f9SXin Li }
113*412f47f9SXin Li return y;
114*412f47f9SXin Li }
115