xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/asinhf_3u5.c (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Single-precision asinh(x) function.
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2022-2023, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #include "poly_scalar_f32.h"
9*412f47f9SXin Li #include "math_config.h"
10*412f47f9SXin Li #include "pl_sig.h"
11*412f47f9SXin Li #include "pl_test.h"
12*412f47f9SXin Li 
13*412f47f9SXin Li #define AbsMask (0x7fffffff)
14*412f47f9SXin Li #define SqrtFltMax (0x1.749e96p+10f)
15*412f47f9SXin Li #define Ln2 (0x1.62e4p-1f)
16*412f47f9SXin Li #define One (0x3f8)
17*412f47f9SXin Li #define ExpM12 (0x398)
18*412f47f9SXin Li 
19*412f47f9SXin Li float
20*412f47f9SXin Li optr_aor_log_f32 (float);
21*412f47f9SXin Li 
22*412f47f9SXin Li /* asinhf approximation using a variety of approaches on different intervals:
23*412f47f9SXin Li 
24*412f47f9SXin Li    |x| < 2^-12: Return x. Function is exactly rounded in this region.
25*412f47f9SXin Li 
26*412f47f9SXin Li    |x| < 1.0: Use custom order-8 polynomial. The largest observed
27*412f47f9SXin Li      error in this region is 1.3ulps:
28*412f47f9SXin Li      asinhf(0x1.f0f74cp-1) got 0x1.b88de4p-1 want 0x1.b88de2p-1.
29*412f47f9SXin Li 
30*412f47f9SXin Li    |x| <= SqrtFltMax: Calculate the result directly using the
31*412f47f9SXin Li      definition of asinh(x) = ln(x + sqrt(x*x + 1)). The largest
32*412f47f9SXin Li      observed error in this region is 1.99ulps.
33*412f47f9SXin Li      asinhf(0x1.00e358p+0) got 0x1.c4849ep-1 want 0x1.c484a2p-1.
34*412f47f9SXin Li 
35*412f47f9SXin Li    |x| > SqrtFltMax: We cannot square x without overflow at a low
36*412f47f9SXin Li      cost. At very large x, asinh(x) ~= ln(2x). At huge x we cannot
37*412f47f9SXin Li      even double x without overflow, so calculate this as ln(x) +
38*412f47f9SXin Li      ln(2). This largest observed error in this region is 3.39ulps.
39*412f47f9SXin Li      asinhf(0x1.749e9ep+10) got 0x1.fffff8p+2 want 0x1.fffffep+2.  */
40*412f47f9SXin Li float
asinhf(float x)41*412f47f9SXin Li asinhf (float x)
42*412f47f9SXin Li {
43*412f47f9SXin Li   uint32_t ix = asuint (x);
44*412f47f9SXin Li   uint32_t ia = ix & AbsMask;
45*412f47f9SXin Li   uint32_t ia12 = ia >> 20;
46*412f47f9SXin Li   float ax = asfloat (ia);
47*412f47f9SXin Li   uint32_t sign = ix & ~AbsMask;
48*412f47f9SXin Li 
49*412f47f9SXin Li   if (unlikely (ia12 < ExpM12 || ia == 0x7f800000))
50*412f47f9SXin Li     return x;
51*412f47f9SXin Li 
52*412f47f9SXin Li   if (unlikely (ia12 >= 0x7f8))
53*412f47f9SXin Li     return __math_invalidf (x);
54*412f47f9SXin Li 
55*412f47f9SXin Li   if (ia12 < One)
56*412f47f9SXin Li     {
57*412f47f9SXin Li       float x2 = ax * ax;
58*412f47f9SXin Li       float p = estrin_7_f32 (ax, x2, x2 * x2, __asinhf_data.coeffs);
59*412f47f9SXin Li       float y = fmaf (x2, p, ax);
60*412f47f9SXin Li       return asfloat (asuint (y) | sign);
61*412f47f9SXin Li     }
62*412f47f9SXin Li 
63*412f47f9SXin Li   if (unlikely (ax > SqrtFltMax))
64*412f47f9SXin Li     {
65*412f47f9SXin Li       return asfloat (asuint (optr_aor_log_f32 (ax) + Ln2) | sign);
66*412f47f9SXin Li     }
67*412f47f9SXin Li 
68*412f47f9SXin Li   return asfloat (asuint (optr_aor_log_f32 (ax + sqrtf (ax * ax + 1))) | sign);
69*412f47f9SXin Li }
70*412f47f9SXin Li 
71*412f47f9SXin Li PL_SIG (S, F, 1, asinh, -10.0, 10.0)
72*412f47f9SXin Li PL_TEST_ULP (asinhf, 2.9)
73*412f47f9SXin Li PL_TEST_INTERVAL (asinhf, 0, 0x1p-12, 5000)
74*412f47f9SXin Li PL_TEST_INTERVAL (asinhf, 0x1p-12, 1.0, 50000)
75*412f47f9SXin Li PL_TEST_INTERVAL (asinhf, 1.0, 0x1p11, 50000)
76*412f47f9SXin Li PL_TEST_INTERVAL (asinhf, 0x1p11, 0x1p127, 20000)
77