xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/sv_acoshf_2u8.c (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Single-precision SVE acosh(x) function.
3*412f47f9SXin Li  * Copyright (c) 2023, Arm Limited.
4*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
5*412f47f9SXin Li  */
6*412f47f9SXin Li 
7*412f47f9SXin Li #include "sv_math.h"
8*412f47f9SXin Li #include "pl_sig.h"
9*412f47f9SXin Li #include "pl_test.h"
10*412f47f9SXin Li 
11*412f47f9SXin Li #define One 0x3f800000
12*412f47f9SXin Li #define Thres 0x20000000 /* asuint(0x1p64) - One.  */
13*412f47f9SXin Li 
14*412f47f9SXin Li #include "sv_log1pf_inline.h"
15*412f47f9SXin Li 
16*412f47f9SXin Li static svfloat32_t NOINLINE
special_case(svfloat32_t x,svfloat32_t y,svbool_t special)17*412f47f9SXin Li special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
18*412f47f9SXin Li {
19*412f47f9SXin Li   return sv_call_f32 (acoshf, x, y, special);
20*412f47f9SXin Li }
21*412f47f9SXin Li 
22*412f47f9SXin Li /* Single-precision SVE acosh(x) routine. Implements the same algorithm as
23*412f47f9SXin Li    vector acoshf and log1p.
24*412f47f9SXin Li 
25*412f47f9SXin Li    Maximum error is 2.78 ULPs:
26*412f47f9SXin Li    SV_NAME_F1 (acosh) (0x1.01e996p+0) got 0x1.f45b42p-4
27*412f47f9SXin Li 				     want 0x1.f45b3cp-4.  */
SV_NAME_F1(acosh)28*412f47f9SXin Li svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
29*412f47f9SXin Li {
30*412f47f9SXin Li   svuint32_t ix = svreinterpret_u32 (x);
31*412f47f9SXin Li   svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
32*412f47f9SXin Li 
33*412f47f9SXin Li   svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
34*412f47f9SXin Li   svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
35*412f47f9SXin Li   svfloat32_t y = sv_log1pf_inline (svadd_x (pg, xm1, svsqrt_x (pg, u)), pg);
36*412f47f9SXin Li 
37*412f47f9SXin Li   if (unlikely (svptest_any (pg, special)))
38*412f47f9SXin Li     return special_case (x, y, special);
39*412f47f9SXin Li   return y;
40*412f47f9SXin Li }
41*412f47f9SXin Li 
42*412f47f9SXin Li PL_SIG (SV, F, 1, acosh, 1.0, 10.0)
43*412f47f9SXin Li PL_TEST_ULP (SV_NAME_F1 (acosh), 2.29)
44*412f47f9SXin Li PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
45*412f47f9SXin Li PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
46*412f47f9SXin Li PL_TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
47*412f47f9SXin Li PL_TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
48