xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/cospif_2u6.c (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Single-precision scalar cospi function.
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2023, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #include "mathlib.h"
9*412f47f9SXin Li #include "math_config.h"
10*412f47f9SXin Li #include "pl_sig.h"
11*412f47f9SXin Li #include "pl_test.h"
12*412f47f9SXin Li 
13*412f47f9SXin Li /* Taylor series coefficents for sin(pi * x).  */
14*412f47f9SXin Li #define C0 0x1.921fb6p1f
15*412f47f9SXin Li #define C1 -0x1.4abbcep2f
16*412f47f9SXin Li #define C2 0x1.466bc6p1f
17*412f47f9SXin Li #define C3 -0x1.32d2ccp-1f
18*412f47f9SXin Li #define C4 0x1.50783p-4f
19*412f47f9SXin Li #define C5 -0x1.e30750p-8f
20*412f47f9SXin Li 
21*412f47f9SXin Li #define Shift 0x1.0p+23f
22*412f47f9SXin Li 
23*412f47f9SXin Li /* Approximation for scalar single-precision cospi(x) - cospif.
24*412f47f9SXin Li    Maximum error: 2.64 ULP:
25*412f47f9SXin Li    cospif(0x1.37e844p-4) got 0x1.f16b3p-1
26*412f47f9SXin Li 			want 0x1.f16b2ap-1.  */
27*412f47f9SXin Li float
cospif(float x)28*412f47f9SXin Li cospif (float x)
29*412f47f9SXin Li {
30*412f47f9SXin Li   if (isinf (x))
31*412f47f9SXin Li     return __math_invalidf (x);
32*412f47f9SXin Li 
33*412f47f9SXin Li   float ax = asfloat (asuint (x) & ~0x80000000);
34*412f47f9SXin Li 
35*412f47f9SXin Li   /* Edge cases for when cospif should be exactly +/- 1. (Integers)
36*412f47f9SXin Li      0x1p23 is the limit for single precision to store any decimal places.  */
37*412f47f9SXin Li   if (ax >= 0x1p24f)
38*412f47f9SXin Li     return 1;
39*412f47f9SXin Li 
40*412f47f9SXin Li   uint32_t m = roundf (ax);
41*412f47f9SXin Li   if (m == ax)
42*412f47f9SXin Li     return (m & 1) ? -1 : 1;
43*412f47f9SXin Li 
44*412f47f9SXin Li   /* Any non-integer values >= 0x1p22f will be int +0.5.
45*412f47f9SXin Li      These values should return exactly 0.  */
46*412f47f9SXin Li   if (ax >= 0x1p22f)
47*412f47f9SXin Li     return 0;
48*412f47f9SXin Li 
49*412f47f9SXin Li   /* For very small inputs, squaring r causes underflow.
50*412f47f9SXin Li      Values below this threshold can be approximated via cospi(x) ~= 1 -
51*412f47f9SXin Li      (pi*x).  */
52*412f47f9SXin Li   if (ax < 0x1p-31f)
53*412f47f9SXin Li     return 1 - (C0 * x);
54*412f47f9SXin Li 
55*412f47f9SXin Li   /* n = rint(|x|).  */
56*412f47f9SXin Li   float n = ax + Shift;
57*412f47f9SXin Li   uint32_t sign = asuint (n) << 31;
58*412f47f9SXin Li   n = n - Shift;
59*412f47f9SXin Li 
60*412f47f9SXin Li   /* We know that cospi(x) = sinpi(0.5 - x)
61*412f47f9SXin Li      range reduction and offset into sinpi range -1/2 .. 1/2
62*412f47f9SXin Li      r = 0.5 - |x - rint(x)|.  */
63*412f47f9SXin Li   float r = 0.5f - fabs (ax - n);
64*412f47f9SXin Li 
65*412f47f9SXin Li   /* y = sin(pi * r).  */
66*412f47f9SXin Li   float r2 = r * r;
67*412f47f9SXin Li   float y = fmaf (C5, r2, C4);
68*412f47f9SXin Li   y = fmaf (y, r2, C3);
69*412f47f9SXin Li   y = fmaf (y, r2, C2);
70*412f47f9SXin Li   y = fmaf (y, r2, C1);
71*412f47f9SXin Li   y = fmaf (y, r2, C0);
72*412f47f9SXin Li 
73*412f47f9SXin Li   /* As all values are reduced to -1/2 .. 1/2, the result of cos(x) always be
74*412f47f9SXin Li      positive, therefore, the sign must be introduced based upon if x rounds to
75*412f47f9SXin Li      odd or even.  */
76*412f47f9SXin Li   return asfloat (asuint (y * r) ^ sign);
77*412f47f9SXin Li }
78*412f47f9SXin Li 
79*412f47f9SXin Li PL_SIG (S, F, 1, cospi, -0.9, 0.9)
80*412f47f9SXin Li PL_TEST_ULP (cospif, 2.15)
81*412f47f9SXin Li PL_TEST_SYM_INTERVAL (cospif, 0, 0x1p-31, 5000)
82*412f47f9SXin Li PL_TEST_SYM_INTERVAL (cospif, 0x1p-31, 0.5, 10000)
83*412f47f9SXin Li PL_TEST_SYM_INTERVAL (cospif, 0.5, 0x1p22f, 10000)
84*412f47f9SXin Li PL_TEST_SYM_INTERVAL (cospif, 0x1p22f, inf, 10000)
85