xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/sinhf_2u3.c (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Single-precision sinh(x) function.
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2022-2023, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #include "math_config.h"
9*412f47f9SXin Li #include "pl_sig.h"
10*412f47f9SXin Li #include "pl_test.h"
11*412f47f9SXin Li 
12*412f47f9SXin Li #define AbsMask 0x7fffffff
13*412f47f9SXin Li #define Half 0x3f000000
14*412f47f9SXin Li #define Expm1OFlowLimit                                                        \
15*412f47f9SXin Li   0x42b17218 /* 0x1.62e43p+6, 2^7*ln2, minimum value for which expm1f          \
16*412f47f9SXin Li 		overflows.  */
17*412f47f9SXin Li #define OFlowLimit                                                             \
18*412f47f9SXin Li   0x42b2d4fd /* 0x1.65a9fap+6, minimum positive value for which sinhf should   \
19*412f47f9SXin Li 		overflow.  */
20*412f47f9SXin Li 
21*412f47f9SXin Li float
22*412f47f9SXin Li optr_aor_exp_f32 (float);
23*412f47f9SXin Li 
24*412f47f9SXin Li /* Approximation for single-precision sinh(x) using expm1.
25*412f47f9SXin Li    sinh(x) = (exp(x) - exp(-x)) / 2.
26*412f47f9SXin Li    The maximum error is 2.26 ULP:
27*412f47f9SXin Li    sinhf(0x1.e34a9ep-4) got 0x1.e469ep-4 want 0x1.e469e4p-4.  */
28*412f47f9SXin Li float
sinhf(float x)29*412f47f9SXin Li sinhf (float x)
30*412f47f9SXin Li {
31*412f47f9SXin Li   uint32_t ix = asuint (x);
32*412f47f9SXin Li   uint32_t iax = ix & AbsMask;
33*412f47f9SXin Li   float ax = asfloat (iax);
34*412f47f9SXin Li   uint32_t sign = ix & ~AbsMask;
35*412f47f9SXin Li   float halfsign = asfloat (Half | sign);
36*412f47f9SXin Li 
37*412f47f9SXin Li   if (unlikely (iax >= Expm1OFlowLimit))
38*412f47f9SXin Li     {
39*412f47f9SXin Li       /* Special values and overflow.  */
40*412f47f9SXin Li       if (iax >= 0x7fc00001 || iax == 0x7f800000)
41*412f47f9SXin Li 	return x;
42*412f47f9SXin Li       if (iax >= 0x7f800000)
43*412f47f9SXin Li 	return __math_invalidf (x);
44*412f47f9SXin Li       if (iax >= OFlowLimit)
45*412f47f9SXin Li 	return __math_oflowf (sign);
46*412f47f9SXin Li 
47*412f47f9SXin Li       /* expm1f overflows a little before sinhf, (~88.7 vs ~89.4). We have to
48*412f47f9SXin Li 	 fill this gap by using a different algorithm, in this case we use a
49*412f47f9SXin Li 	 double-precision exp helper. For large x sinh(x) dominated by exp(x),
50*412f47f9SXin Li 	 however we cannot compute exp without overflow either. We use the
51*412f47f9SXin Li 	 identity:
52*412f47f9SXin Li 	 exp(a) = (exp(a / 2)) ^ 2.
53*412f47f9SXin Li 	 to compute sinh(x) ~= (exp(|x| / 2)) ^ 2 / 2    for x > 0
54*412f47f9SXin Li 			    ~= (exp(|x| / 2)) ^ 2 / -2   for x < 0.
55*412f47f9SXin Li 	 Greatest error in this region is 1.89 ULP:
56*412f47f9SXin Li 	 sinhf(0x1.65898cp+6) got 0x1.f00aep+127  want 0x1.f00adcp+127.  */
57*412f47f9SXin Li       float e = optr_aor_exp_f32 (ax / 2);
58*412f47f9SXin Li       return (e * halfsign) * e;
59*412f47f9SXin Li     }
60*412f47f9SXin Li 
61*412f47f9SXin Li   /* Use expm1f to retain acceptable precision for small numbers.
62*412f47f9SXin Li      Let t = e^(|x|) - 1.  */
63*412f47f9SXin Li   float t = expm1f (ax);
64*412f47f9SXin Li   /* Then sinh(x) = (t + t / (t + 1)) / 2   for x > 0
65*412f47f9SXin Li 		    (t + t / (t + 1)) / -2  for x < 0.  */
66*412f47f9SXin Li   return (t + t / (t + 1)) * halfsign;
67*412f47f9SXin Li }
68*412f47f9SXin Li 
69*412f47f9SXin Li PL_SIG (S, F, 1, sinh, -10.0, 10.0)
70*412f47f9SXin Li PL_TEST_ULP (sinhf, 1.76)
71*412f47f9SXin Li PL_TEST_SYM_INTERVAL (sinhf, 0, 0x1.62e43p+6, 100000)
72*412f47f9SXin Li PL_TEST_SYM_INTERVAL (sinhf, 0x1.62e43p+6, 0x1.65a9fap+6, 100)
73*412f47f9SXin Li PL_TEST_SYM_INTERVAL (sinhf, 0x1.65a9fap+6, inf, 100)
74