1*412f47f9SXin Li /*
2*412f47f9SXin Li * Double-precision inverse error function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2023, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li #include "math_config.h"
8*412f47f9SXin Li #include "poly_scalar_f64.h"
9*412f47f9SXin Li #include "pl_sig.h"
10*412f47f9SXin Li #define IGNORE_SCALAR_FENV
11*412f47f9SXin Li #include "pl_test.h"
12*412f47f9SXin Li
13*412f47f9SXin Li const static struct
14*412f47f9SXin Li {
15*412f47f9SXin Li /* We use P_N and Q_N to refer to arrays of coefficients, where P_N is the
16*412f47f9SXin Li coeffs of the numerator in table N of Blair et al, and Q_N is the coeffs
17*412f47f9SXin Li of the denominator. */
18*412f47f9SXin Li double P_17[7], Q_17[7], P_37[8], Q_37[8], P_57[9], Q_57[10];
19*412f47f9SXin Li } data = {
20*412f47f9SXin Li .P_17 = { 0x1.007ce8f01b2e8p+4, -0x1.6b23cc5c6c6d7p+6, 0x1.74e5f6ceb3548p+7,
21*412f47f9SXin Li -0x1.5200bb15cc6bbp+7, 0x1.05d193233a849p+6, -0x1.148c5474ee5e1p+3,
22*412f47f9SXin Li 0x1.689181bbafd0cp-3 },
23*412f47f9SXin Li .Q_17 = { 0x1.d8fb0f913bd7bp+3, -0x1.6d7f25a3f1c24p+6, 0x1.a450d8e7f4cbbp+7,
24*412f47f9SXin Li -0x1.bc3480485857p+7, 0x1.ae6b0c504ee02p+6, -0x1.499dfec1a7f5fp+4,
25*412f47f9SXin Li 0x1p+0 },
26*412f47f9SXin Li .P_37 = { -0x1.f3596123109edp-7, 0x1.60b8fe375999ep-2, -0x1.779bb9bef7c0fp+1,
27*412f47f9SXin Li 0x1.786ea384470a2p+3, -0x1.6a7c1453c85d3p+4, 0x1.31f0fc5613142p+4,
28*412f47f9SXin Li -0x1.5ea6c007d4dbbp+2, 0x1.e66f265ce9e5p-3 },
29*412f47f9SXin Li .Q_37 = { -0x1.636b2dcf4edbep-7, 0x1.0b5411e2acf29p-2, -0x1.3413109467a0bp+1,
30*412f47f9SXin Li 0x1.563e8136c554ap+3, -0x1.7b77aab1dcafbp+4, 0x1.8a3e174e05ddcp+4,
31*412f47f9SXin Li -0x1.4075c56404eecp+3, 0x1p+0 },
32*412f47f9SXin Li .P_57 = { 0x1.b874f9516f7f1p-14, 0x1.5921f2916c1c4p-7, 0x1.145ae7d5b8fa4p-2,
33*412f47f9SXin Li 0x1.29d6dcc3b2fb7p+1, 0x1.cabe2209a7985p+2, 0x1.11859f0745c4p+3,
34*412f47f9SXin Li 0x1.b7ec7bc6a2ce5p+2, 0x1.d0419e0bb42aep+1, 0x1.c5aa03eef7258p-1 },
35*412f47f9SXin Li .Q_57 = { 0x1.b8747e12691f1p-14, 0x1.59240d8ed1e0ap-7, 0x1.14aef2b181e2p-2,
36*412f47f9SXin Li 0x1.2cd181bcea52p+1, 0x1.e6e63e0b7aa4cp+2, 0x1.65cf8da94aa3ap+3,
37*412f47f9SXin Li 0x1.7e5c787b10a36p+3, 0x1.0626d68b6cea3p+3, 0x1.065c5f193abf6p+2,
38*412f47f9SXin Li 0x1p+0 }
39*412f47f9SXin Li };
40*412f47f9SXin Li
41*412f47f9SXin Li /* Inverse error function approximation, based on rational approximation as
42*412f47f9SXin Li described in
43*412f47f9SXin Li J. M. Blair, C. A. Edwards, and J. H. Johnson,
44*412f47f9SXin Li "Rational Chebyshev approximations for the inverse of the error function",
45*412f47f9SXin Li Math. Comp. 30, pp. 827--830 (1976).
46*412f47f9SXin Li https://doi.org/10.1090/S0025-5718-1976-0421040-7
47*412f47f9SXin Li Largest observed error is 24.46 ULP, in the extreme tail:
48*412f47f9SXin Li erfinv(0x1.fd9504351b757p-1) got 0x1.ff72c1092917p+0
49*412f47f9SXin Li want 0x1.ff72c10929158p+0. */
50*412f47f9SXin Li double
erfinv(double x)51*412f47f9SXin Li erfinv (double x)
52*412f47f9SXin Li {
53*412f47f9SXin Li double a = fabs (x);
54*412f47f9SXin Li
55*412f47f9SXin Li if (a <= 0.75)
56*412f47f9SXin Li {
57*412f47f9SXin Li /* Largest observed error in this region is 6.06 ULP:
58*412f47f9SXin Li erfinv(0x1.1884650fd2d41p-2) got 0x1.fb65998cbd3fep-3
59*412f47f9SXin Li want 0x1.fb65998cbd404p-3. */
60*412f47f9SXin Li double t = x * x - 0.5625;
61*412f47f9SXin Li return x * horner_6_f64 (t, data.P_17) / horner_6_f64 (t, data.Q_17);
62*412f47f9SXin Li }
63*412f47f9SXin Li
64*412f47f9SXin Li if (a <= 0.9375)
65*412f47f9SXin Li {
66*412f47f9SXin Li /* Largest observed error in this region is 6.95 ULP:
67*412f47f9SXin Li erfinv(0x1.a8d65b94d8c6p-1) got 0x1.f08325591b54p-1
68*412f47f9SXin Li want 0x1.f08325591b547p-1. */
69*412f47f9SXin Li double t = x * x - 0.87890625;
70*412f47f9SXin Li return x * horner_7_f64 (t, data.P_37) / horner_7_f64 (t, data.Q_37);
71*412f47f9SXin Li }
72*412f47f9SXin Li
73*412f47f9SXin Li double t = 1.0 / (sqrt (-log (1 - a)));
74*412f47f9SXin Li return horner_8_f64 (t, data.P_57)
75*412f47f9SXin Li / (copysign (t, x) * horner_9_f64 (t, data.Q_57));
76*412f47f9SXin Li }
77*412f47f9SXin Li
78*412f47f9SXin Li PL_SIG (S, D, 1, erfinv, -0.99, 0.99)
79*412f47f9SXin Li PL_TEST_ULP (erfinv, 24.0)
80*412f47f9SXin Li PL_TEST_INTERVAL (erfinv, 0, 1, 40000)
81*412f47f9SXin Li PL_TEST_INTERVAL (erfinv, -0x1p-1022, -1, 40000)
82