xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/test/ulp_wrappers.h (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li // clang-format off
2*412f47f9SXin Li /*
3*412f47f9SXin Li  * Function wrappers for ulp.
4*412f47f9SXin Li  *
5*412f47f9SXin Li  * Copyright (c) 2022-2024, Arm Limited.
6*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7*412f47f9SXin Li  */
8*412f47f9SXin Li 
9*412f47f9SXin Li #define _GNU_SOURCE
10*412f47f9SXin Li #include <stdbool.h>
11*412f47f9SXin Li #include <arm_neon.h>
12*412f47f9SXin Li 
13*412f47f9SXin Li #if USE_MPFR
sincos_mpfr_sin(mpfr_t y,const mpfr_t x,mpfr_rnd_t r)14*412f47f9SXin Li static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
15*412f47f9SXin Li   mpfr_cos(y, x, r);
16*412f47f9SXin Li   return mpfr_sin(y, x, r);
17*412f47f9SXin Li }
sincos_mpfr_cos(mpfr_t y,const mpfr_t x,mpfr_rnd_t r)18*412f47f9SXin Li static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
19*412f47f9SXin Li   mpfr_sin(y, x, r);
20*412f47f9SXin Li   return mpfr_cos(y, x, r);
21*412f47f9SXin Li }
wrap_mpfr_powi(mpfr_t ret,const mpfr_t x,const mpfr_t y,mpfr_rnd_t rnd)22*412f47f9SXin Li static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
23*412f47f9SXin Li   mpfr_t y2;
24*412f47f9SXin Li   mpfr_init(y2);
25*412f47f9SXin Li   mpfr_trunc(y2, y);
26*412f47f9SXin Li   return mpfr_pow(ret, x, y2, rnd);
27*412f47f9SXin Li }
28*412f47f9SXin Li #endif
29*412f47f9SXin Li 
30*412f47f9SXin Li /* Our implementations of powi/powk are too imprecise to verify
31*412f47f9SXin Li    against any established pow implementation. Instead we have the
32*412f47f9SXin Li    following simple implementation, against which it is enough to
33*412f47f9SXin Li    maintain bitwise reproducibility. Note the test framework expects
34*412f47f9SXin Li    the reference impl to be of higher precision than the function
35*412f47f9SXin Li    under test. For instance this means that the reference for
36*412f47f9SXin Li    double-precision powi will be passed a long double, so to check
37*412f47f9SXin Li    bitwise reproducibility we have to cast it back down to
38*412f47f9SXin Li    double. This is fine since a round-trip to higher precision and
39*412f47f9SXin Li    back down is correctly rounded.  */
40*412f47f9SXin Li #define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T)                            \
41*412f47f9SXin Li   static DBL_T __attribute__((unused)) NAME (DBL_T in_val, DBL_T y)            \
42*412f47f9SXin Li   {                                                                            \
43*412f47f9SXin Li     INT_T n = (INT_T) round (y);                                               \
44*412f47f9SXin Li     FLT_T acc = 1.0;                                                           \
45*412f47f9SXin Li     bool want_recip = n < 0;                                                   \
46*412f47f9SXin Li     n = n < 0 ? -n : n;                                                        \
47*412f47f9SXin Li                                                                                \
48*412f47f9SXin Li     for (FLT_T c = in_val; n; c *= c, n >>= 1)                                 \
49*412f47f9SXin Li       {                                                                        \
50*412f47f9SXin Li         if (n & 0x1)                                                           \
51*412f47f9SXin Li           {                                                                    \
52*412f47f9SXin Li             acc *= c;                                                          \
53*412f47f9SXin Li           }                                                                    \
54*412f47f9SXin Li       }                                                                        \
55*412f47f9SXin Li     if (want_recip)                                                            \
56*412f47f9SXin Li       {                                                                        \
57*412f47f9SXin Li         acc = 1.0 / acc;                                                       \
58*412f47f9SXin Li       }                                                                        \
59*412f47f9SXin Li     return acc;                                                                \
60*412f47f9SXin Li   }
61*412f47f9SXin Li 
DECL_POW_INT_REF(ref_powif,double,float,int)62*412f47f9SXin Li DECL_POW_INT_REF(ref_powif, double, float, int)
63*412f47f9SXin Li DECL_POW_INT_REF(ref_powi, long double, double, int)
64*412f47f9SXin Li 
65*412f47f9SXin Li #define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
66*412f47f9SXin Li #define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
67*412f47f9SXin Li #define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
68*412f47f9SXin Li #define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
69*412f47f9SXin Li 
70*412f47f9SXin Li #if defined(__vpcs) && __aarch64__
71*412f47f9SXin Li 
72*412f47f9SXin Li #define ZVNF1_WRAP(func) ZVF1_WRAP(func)
73*412f47f9SXin Li #define ZVNF2_WRAP(func) ZVF2_WRAP(func)
74*412f47f9SXin Li #define ZVND1_WRAP(func) ZVD1_WRAP(func)
75*412f47f9SXin Li #define ZVND2_WRAP(func) ZVD2_WRAP(func)
76*412f47f9SXin Li 
77*412f47f9SXin Li #else
78*412f47f9SXin Li 
79*412f47f9SXin Li #define ZVNF1_WRAP(func)
80*412f47f9SXin Li #define ZVNF2_WRAP(func)
81*412f47f9SXin Li #define ZVND1_WRAP(func)
82*412f47f9SXin Li #define ZVND2_WRAP(func)
83*412f47f9SXin Li 
84*412f47f9SXin Li #endif
85*412f47f9SXin Li 
86*412f47f9SXin Li #define ZSVF1_WRAP(func) static float Z_sv_##func##f(svbool_t pg, float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), pg), pg); }
87*412f47f9SXin Li #define ZSVF2_WRAP(func) static float Z_sv_##func##f(svbool_t pg, float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), pg), pg); }
88*412f47f9SXin Li #define ZSVD1_WRAP(func) static double Z_sv_##func(svbool_t pg, double x) { return svretd(_ZGVsMxv_##func(svargd(x), pg), pg); }
89*412f47f9SXin Li #define ZSVD2_WRAP(func) static double Z_sv_##func(svbool_t pg, double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), pg), pg); }
90*412f47f9SXin Li 
91*412f47f9SXin Li #if WANT_SVE_MATH
92*412f47f9SXin Li 
93*412f47f9SXin Li #define ZSVNF1_WRAP(func) ZSVF1_WRAP(func)
94*412f47f9SXin Li #define ZSVNF2_WRAP(func) ZSVF2_WRAP(func)
95*412f47f9SXin Li #define ZSVND1_WRAP(func) ZSVD1_WRAP(func)
96*412f47f9SXin Li #define ZSVND2_WRAP(func) ZSVD2_WRAP(func)
97*412f47f9SXin Li 
98*412f47f9SXin Li #else
99*412f47f9SXin Li 
100*412f47f9SXin Li #define ZSVNF1_WRAP(func)
101*412f47f9SXin Li #define ZSVNF2_WRAP(func)
102*412f47f9SXin Li #define ZSVND1_WRAP(func)
103*412f47f9SXin Li #define ZSVND2_WRAP(func)
104*412f47f9SXin Li 
105*412f47f9SXin Li #endif
106*412f47f9SXin Li 
107*412f47f9SXin Li /* No wrappers for scalar routines, but PL_SIG will emit them.  */
108*412f47f9SXin Li #define ZSNF1_WRAP(func)
109*412f47f9SXin Li #define ZSNF2_WRAP(func)
110*412f47f9SXin Li #define ZSND1_WRAP(func)
111*412f47f9SXin Li #define ZSND2_WRAP(func)
112*412f47f9SXin Li 
113*412f47f9SXin Li #include "ulp_wrappers_gen.h"
114*412f47f9SXin Li 
115*412f47f9SXin Li float v_sincosf_sin(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return s[0]; }
v_sincosf_cos(float x)116*412f47f9SXin Li float v_sincosf_cos(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return c[0]; }
v_cexpif_sin(float x)117*412f47f9SXin Li float v_cexpif_sin(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[0][0]; }
v_cexpif_cos(float x)118*412f47f9SXin Li float v_cexpif_cos(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[1][0]; }
119*412f47f9SXin Li 
v_sincos_sin(double x)120*412f47f9SXin Li double v_sincos_sin(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return s[0]; }
v_sincos_cos(double x)121*412f47f9SXin Li double v_sincos_cos(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return c[0]; }
v_cexpi_sin(double x)122*412f47f9SXin Li double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0]; }
v_cexpi_cos(double x)123*412f47f9SXin Li double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
124*412f47f9SXin Li 
125*412f47f9SXin Li #if WANT_SVE_MATH
Z_sv_powi(svbool_t pg,float x,float y)126*412f47f9SXin Li static float Z_sv_powi(svbool_t pg, float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), pg), pg); }
Z_sv_powk(svbool_t pg,double x,double y)127*412f47f9SXin Li static double Z_sv_powk(svbool_t pg, double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), pg), pg); }
128*412f47f9SXin Li 
sv_sincosf_sin(svbool_t pg,float x)129*412f47f9SXin Li float sv_sincosf_sin(svbool_t pg, float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, pg); return svretf(svld1(pg, s), pg); }
sv_sincosf_cos(svbool_t pg,float x)130*412f47f9SXin Li float sv_sincosf_cos(svbool_t pg, float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, pg); return svretf(svld1(pg, c), pg); }
sv_cexpif_sin(svbool_t pg,float x)131*412f47f9SXin Li float sv_cexpif_sin(svbool_t pg, float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), pg), 0), pg); }
sv_cexpif_cos(svbool_t pg,float x)132*412f47f9SXin Li float sv_cexpif_cos(svbool_t pg, float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), pg), 1), pg); }
133*412f47f9SXin Li 
sv_sincos_sin(svbool_t pg,double x)134*412f47f9SXin Li double sv_sincos_sin(svbool_t pg, double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, pg); return svretd(svld1(pg, s), pg); }
sv_sincos_cos(svbool_t pg,double x)135*412f47f9SXin Li double sv_sincos_cos(svbool_t pg, double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, pg); return svretd(svld1(pg, c), pg); }
sv_cexpi_sin(svbool_t pg,double x)136*412f47f9SXin Li double sv_cexpi_sin(svbool_t pg, double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), pg), 0), pg); }
sv_cexpi_cos(svbool_t pg,double x)137*412f47f9SXin Li double sv_cexpi_cos(svbool_t pg, double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), pg), 1), pg); }
138*412f47f9SXin Li 
139*412f47f9SXin Li #endif
140*412f47f9SXin Li // clang-format on
141