1*412f47f9SXin Li /*
2*412f47f9SXin Li * Single-precision SVE powf function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2023-2024, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li #include "sv_math.h"
9*412f47f9SXin Li #include "pl_sig.h"
10*412f47f9SXin Li #include "pl_test.h"
11*412f47f9SXin Li
12*412f47f9SXin Li /* The following data is used in the SVE pow core computation
13*412f47f9SXin Li and special case detection. */
14*412f47f9SXin Li #define Tinvc __v_powf_data.invc
15*412f47f9SXin Li #define Tlogc __v_powf_data.logc
16*412f47f9SXin Li #define Texp __v_powf_data.scale
17*412f47f9SXin Li #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
18*412f47f9SXin Li #define Shift 0x1.8p52
19*412f47f9SXin Li #define Norm 0x1p23f /* 0x4b000000. */
20*412f47f9SXin Li
21*412f47f9SXin Li /* Overall ULP error bound for pow is 2.6 ulp
22*412f47f9SXin Li ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2). */
23*412f47f9SXin Li static const struct data
24*412f47f9SXin Li {
25*412f47f9SXin Li double log_poly[4];
26*412f47f9SXin Li double exp_poly[3];
27*412f47f9SXin Li float uflow_bound, oflow_bound, small_bound;
28*412f47f9SXin Li uint32_t sign_bias, sign_mask, subnormal_bias, off;
29*412f47f9SXin Li } data = {
30*412f47f9SXin Li /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
31*412f47f9SXin Li V_POWF_EXP2_N. */
32*412f47f9SXin Li .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,
33*412f47f9SXin Li -0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },
34*412f47f9SXin Li /* rel err: 1.69 * 2^-34. */
35*412f47f9SXin Li .exp_poly = {
36*412f47f9SXin Li 0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3. */
37*412f47f9SXin Li 0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2. */
38*412f47f9SXin Li 0x1.62e42ff0c52d6p-6, /* A3 / V_POWF_EXP2_N. */
39*412f47f9SXin Li },
40*412f47f9SXin Li .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N. */
41*412f47f9SXin Li .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N. */
42*412f47f9SXin Li .small_bound = 0x1p-126f,
43*412f47f9SXin Li .off = 0x3f35d000,
44*412f47f9SXin Li .sign_bias = SignBias,
45*412f47f9SXin Li .sign_mask = 0x80000000,
46*412f47f9SXin Li .subnormal_bias = 0x0b800000, /* 23 << 23. */
47*412f47f9SXin Li };
48*412f47f9SXin Li
49*412f47f9SXin Li #define A(i) sv_f64 (d->log_poly[i])
50*412f47f9SXin Li #define C(i) sv_f64 (d->exp_poly[i])
51*412f47f9SXin Li
52*412f47f9SXin Li /* Check if x is an integer. */
53*412f47f9SXin Li static inline svbool_t
svisint(svbool_t pg,svfloat32_t x)54*412f47f9SXin Li svisint (svbool_t pg, svfloat32_t x)
55*412f47f9SXin Li {
56*412f47f9SXin Li return svcmpeq (pg, svrintz_z (pg, x), x);
57*412f47f9SXin Li }
58*412f47f9SXin Li
59*412f47f9SXin Li /* Check if x is real not integer valued. */
60*412f47f9SXin Li static inline svbool_t
svisnotint(svbool_t pg,svfloat32_t x)61*412f47f9SXin Li svisnotint (svbool_t pg, svfloat32_t x)
62*412f47f9SXin Li {
63*412f47f9SXin Li return svcmpne (pg, svrintz_z (pg, x), x);
64*412f47f9SXin Li }
65*412f47f9SXin Li
66*412f47f9SXin Li /* Check if x is an odd integer. */
67*412f47f9SXin Li static inline svbool_t
svisodd(svbool_t pg,svfloat32_t x)68*412f47f9SXin Li svisodd (svbool_t pg, svfloat32_t x)
69*412f47f9SXin Li {
70*412f47f9SXin Li svfloat32_t y = svmul_x (pg, x, 0.5f);
71*412f47f9SXin Li return svisnotint (pg, y);
72*412f47f9SXin Li }
73*412f47f9SXin Li
74*412f47f9SXin Li /* Check if zero, inf or nan. */
75*412f47f9SXin Li static inline svbool_t
sv_zeroinfnan(svbool_t pg,svuint32_t i)76*412f47f9SXin Li sv_zeroinfnan (svbool_t pg, svuint32_t i)
77*412f47f9SXin Li {
78*412f47f9SXin Li return svcmpge (pg, svsub_x (pg, svmul_x (pg, i, 2u), 1),
79*412f47f9SXin Li 2u * 0x7f800000 - 1);
80*412f47f9SXin Li }
81*412f47f9SXin Li
82*412f47f9SXin Li /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
83*412f47f9SXin Li the bit representation of a non-zero finite floating-point value. */
84*412f47f9SXin Li static inline int
checkint(uint32_t iy)85*412f47f9SXin Li checkint (uint32_t iy)
86*412f47f9SXin Li {
87*412f47f9SXin Li int e = iy >> 23 & 0xff;
88*412f47f9SXin Li if (e < 0x7f)
89*412f47f9SXin Li return 0;
90*412f47f9SXin Li if (e > 0x7f + 23)
91*412f47f9SXin Li return 2;
92*412f47f9SXin Li if (iy & ((1 << (0x7f + 23 - e)) - 1))
93*412f47f9SXin Li return 0;
94*412f47f9SXin Li if (iy & (1 << (0x7f + 23 - e)))
95*412f47f9SXin Li return 1;
96*412f47f9SXin Li return 2;
97*412f47f9SXin Li }
98*412f47f9SXin Li
99*412f47f9SXin Li /* Check if zero, inf or nan. */
100*412f47f9SXin Li static inline int
zeroinfnan(uint32_t ix)101*412f47f9SXin Li zeroinfnan (uint32_t ix)
102*412f47f9SXin Li {
103*412f47f9SXin Li return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
104*412f47f9SXin Li }
105*412f47f9SXin Li
106*412f47f9SXin Li /* A scalar subroutine used to fix main power special cases. Similar to the
107*412f47f9SXin Li preamble of scalar powf except that we do not update ix and sign_bias. This
108*412f47f9SXin Li is done in the preamble of the SVE powf. */
109*412f47f9SXin Li static inline float
powf_specialcase(float x,float y,float z)110*412f47f9SXin Li powf_specialcase (float x, float y, float z)
111*412f47f9SXin Li {
112*412f47f9SXin Li uint32_t ix = asuint (x);
113*412f47f9SXin Li uint32_t iy = asuint (y);
114*412f47f9SXin Li /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
115*412f47f9SXin Li if (unlikely (zeroinfnan (iy)))
116*412f47f9SXin Li {
117*412f47f9SXin Li if (2 * iy == 0)
118*412f47f9SXin Li return issignalingf_inline (x) ? x + y : 1.0f;
119*412f47f9SXin Li if (ix == 0x3f800000)
120*412f47f9SXin Li return issignalingf_inline (y) ? x + y : 1.0f;
121*412f47f9SXin Li if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
122*412f47f9SXin Li return x + y;
123*412f47f9SXin Li if (2 * ix == 2 * 0x3f800000)
124*412f47f9SXin Li return 1.0f;
125*412f47f9SXin Li if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
126*412f47f9SXin Li return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
127*412f47f9SXin Li return y * y;
128*412f47f9SXin Li }
129*412f47f9SXin Li if (unlikely (zeroinfnan (ix)))
130*412f47f9SXin Li {
131*412f47f9SXin Li float_t x2 = x * x;
132*412f47f9SXin Li if (ix & 0x80000000 && checkint (iy) == 1)
133*412f47f9SXin Li x2 = -x2;
134*412f47f9SXin Li return iy & 0x80000000 ? 1 / x2 : x2;
135*412f47f9SXin Li }
136*412f47f9SXin Li /* We need a return here in case x<0 and y is integer, but all other tests
137*412f47f9SXin Li need to be run. */
138*412f47f9SXin Li return z;
139*412f47f9SXin Li }
140*412f47f9SXin Li
141*412f47f9SXin Li /* Scalar fallback for special case routines with custom signature. */
142*412f47f9SXin Li static inline svfloat32_t
sv_call_powf_sc(svfloat32_t x1,svfloat32_t x2,svfloat32_t y,svbool_t cmp)143*412f47f9SXin Li sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y, svbool_t cmp)
144*412f47f9SXin Li {
145*412f47f9SXin Li svbool_t p = svpfirst (cmp, svpfalse ());
146*412f47f9SXin Li while (svptest_any (cmp, p))
147*412f47f9SXin Li {
148*412f47f9SXin Li float sx1 = svclastb (p, 0, x1);
149*412f47f9SXin Li float sx2 = svclastb (p, 0, x2);
150*412f47f9SXin Li float elem = svclastb (p, 0, y);
151*412f47f9SXin Li elem = powf_specialcase (sx1, sx2, elem);
152*412f47f9SXin Li svfloat32_t y2 = sv_f32 (elem);
153*412f47f9SXin Li y = svsel (p, y2, y);
154*412f47f9SXin Li p = svpnext_b32 (cmp, p);
155*412f47f9SXin Li }
156*412f47f9SXin Li return y;
157*412f47f9SXin Li }
158*412f47f9SXin Li
159*412f47f9SXin Li /* Compute core for half of the lanes in double precision. */
160*412f47f9SXin Li static inline svfloat64_t
sv_powf_core_ext(const svbool_t pg,svuint64_t i,svfloat64_t z,svint64_t k,svfloat64_t y,svuint64_t sign_bias,svfloat64_t * pylogx,const struct data * d)161*412f47f9SXin Li sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
162*412f47f9SXin Li svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
163*412f47f9SXin Li const struct data *d)
164*412f47f9SXin Li {
165*412f47f9SXin Li svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);
166*412f47f9SXin Li svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);
167*412f47f9SXin Li
168*412f47f9SXin Li /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k. */
169*412f47f9SXin Li svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);
170*412f47f9SXin Li svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));
171*412f47f9SXin Li
172*412f47f9SXin Li /* Polynomial to approximate log1p(r)/ln2. */
173*412f47f9SXin Li svfloat64_t logx = A (0);
174*412f47f9SXin Li logx = svmla_x (pg, A (1), r, logx);
175*412f47f9SXin Li logx = svmla_x (pg, A (2), r, logx);
176*412f47f9SXin Li logx = svmla_x (pg, A (3), r, logx);
177*412f47f9SXin Li logx = svmla_x (pg, y0, r, logx);
178*412f47f9SXin Li *pylogx = svmul_x (pg, y, logx);
179*412f47f9SXin Li
180*412f47f9SXin Li /* z - kd is in [-1, 1] in non-nearest rounding modes. */
181*412f47f9SXin Li svfloat64_t kd = svadd_x (pg, *pylogx, Shift);
182*412f47f9SXin Li svuint64_t ki = svreinterpret_u64 (kd);
183*412f47f9SXin Li kd = svsub_x (pg, kd, Shift);
184*412f47f9SXin Li
185*412f47f9SXin Li r = svsub_x (pg, *pylogx, kd);
186*412f47f9SXin Li
187*412f47f9SXin Li /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1). */
188*412f47f9SXin Li svuint64_t t
189*412f47f9SXin Li = svld1_gather_index (pg, Texp, svand_x (pg, ki, V_POWF_EXP2_N - 1));
190*412f47f9SXin Li svuint64_t ski = svadd_x (pg, ki, sign_bias);
191*412f47f9SXin Li t = svadd_x (pg, t, svlsl_x (pg, ski, 52 - V_POWF_EXP2_TABLE_BITS));
192*412f47f9SXin Li svfloat64_t s = svreinterpret_f64 (t);
193*412f47f9SXin Li
194*412f47f9SXin Li svfloat64_t p = C (0);
195*412f47f9SXin Li p = svmla_x (pg, C (1), p, r);
196*412f47f9SXin Li p = svmla_x (pg, C (2), p, r);
197*412f47f9SXin Li p = svmla_x (pg, s, p, svmul_x (pg, s, r));
198*412f47f9SXin Li
199*412f47f9SXin Li return p;
200*412f47f9SXin Li }
201*412f47f9SXin Li
202*412f47f9SXin Li /* Widen vector to double precision and compute core on both halves of the
203*412f47f9SXin Li vector. Lower cost of promotion by considering all lanes active. */
204*412f47f9SXin Li static inline svfloat32_t
sv_powf_core(const svbool_t pg,svuint32_t i,svuint32_t iz,svint32_t k,svfloat32_t y,svuint32_t sign_bias,svfloat32_t * pylogx,const struct data * d)205*412f47f9SXin Li sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
206*412f47f9SXin Li svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
207*412f47f9SXin Li const struct data *d)
208*412f47f9SXin Li {
209*412f47f9SXin Li const svbool_t ptrue = svptrue_b64 ();
210*412f47f9SXin Li
211*412f47f9SXin Li /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two in
212*412f47f9SXin Li order to perform core computation in double precision. */
213*412f47f9SXin Li const svbool_t pg_lo = svunpklo (pg);
214*412f47f9SXin Li const svbool_t pg_hi = svunpkhi (pg);
215*412f47f9SXin Li svfloat64_t y_lo = svcvt_f64_x (
216*412f47f9SXin Li ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
217*412f47f9SXin Li svfloat64_t y_hi = svcvt_f64_x (
218*412f47f9SXin Li ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
219*412f47f9SXin Li svfloat32_t z = svreinterpret_f32 (iz);
220*412f47f9SXin Li svfloat64_t z_lo = svcvt_f64_x (
221*412f47f9SXin Li ptrue, svreinterpret_f32 (svunpklo (svreinterpret_u32 (z))));
222*412f47f9SXin Li svfloat64_t z_hi = svcvt_f64_x (
223*412f47f9SXin Li ptrue, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (z))));
224*412f47f9SXin Li svuint64_t i_lo = svunpklo (i);
225*412f47f9SXin Li svuint64_t i_hi = svunpkhi (i);
226*412f47f9SXin Li svint64_t k_lo = svunpklo (k);
227*412f47f9SXin Li svint64_t k_hi = svunpkhi (k);
228*412f47f9SXin Li svuint64_t sign_bias_lo = svunpklo (sign_bias);
229*412f47f9SXin Li svuint64_t sign_bias_hi = svunpkhi (sign_bias);
230*412f47f9SXin Li
231*412f47f9SXin Li /* Compute each part in double precision. */
232*412f47f9SXin Li svfloat64_t ylogx_lo, ylogx_hi;
233*412f47f9SXin Li svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,
234*412f47f9SXin Li sign_bias_lo, &ylogx_lo, d);
235*412f47f9SXin Li svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,
236*412f47f9SXin Li sign_bias_hi, &ylogx_hi, d);
237*412f47f9SXin Li
238*412f47f9SXin Li /* Convert back to single-precision and interleave. */
239*412f47f9SXin Li svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);
240*412f47f9SXin Li svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);
241*412f47f9SXin Li *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);
242*412f47f9SXin Li svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);
243*412f47f9SXin Li svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);
244*412f47f9SXin Li return svuzp1 (lo_32, hi_32);
245*412f47f9SXin Li }
246*412f47f9SXin Li
247*412f47f9SXin Li /* Implementation of SVE powf.
248*412f47f9SXin Li Provides the same accuracy as AdvSIMD powf, since it relies on the same
249*412f47f9SXin Li algorithm. The theoretical maximum error is under 2.60 ULPs.
250*412f47f9SXin Li Maximum measured error is 2.56 ULPs:
251*412f47f9SXin Li SV_NAME_F2 (pow) (0x1.004118p+0, 0x1.5d14a4p+16) got 0x1.fd4bp+127
252*412f47f9SXin Li want 0x1.fd4b06p+127. */
SV_NAME_F2(pow)253*412f47f9SXin Li svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
254*412f47f9SXin Li {
255*412f47f9SXin Li const struct data *d = ptr_barrier (&data);
256*412f47f9SXin Li
257*412f47f9SXin Li svuint32_t vix0 = svreinterpret_u32 (x);
258*412f47f9SXin Li svuint32_t viy0 = svreinterpret_u32 (y);
259*412f47f9SXin Li
260*412f47f9SXin Li /* Negative x cases. */
261*412f47f9SXin Li svuint32_t sign_bit = svand_m (pg, vix0, d->sign_mask);
262*412f47f9SXin Li svbool_t xisneg = svcmpeq (pg, sign_bit, d->sign_mask);
263*412f47f9SXin Li
264*412f47f9SXin Li /* Set sign_bias and ix depending on sign of x and nature of y. */
265*412f47f9SXin Li svbool_t yisnotint_xisneg = svpfalse_b ();
266*412f47f9SXin Li svuint32_t sign_bias = sv_u32 (0);
267*412f47f9SXin Li svuint32_t vix = vix0;
268*412f47f9SXin Li if (unlikely (svptest_any (pg, xisneg)))
269*412f47f9SXin Li {
270*412f47f9SXin Li /* Determine nature of y. */
271*412f47f9SXin Li yisnotint_xisneg = svisnotint (xisneg, y);
272*412f47f9SXin Li svbool_t yisint_xisneg = svisint (xisneg, y);
273*412f47f9SXin Li svbool_t yisodd_xisneg = svisodd (xisneg, y);
274*412f47f9SXin Li /* ix set to abs(ix) if y is integer. */
275*412f47f9SXin Li vix = svand_m (yisint_xisneg, vix0, 0x7fffffff);
276*412f47f9SXin Li /* Set to SignBias if x is negative and y is odd. */
277*412f47f9SXin Li sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
278*412f47f9SXin Li }
279*412f47f9SXin Li
280*412f47f9SXin Li /* Special cases of x or y: zero, inf and nan. */
281*412f47f9SXin Li svbool_t xspecial = sv_zeroinfnan (pg, vix0);
282*412f47f9SXin Li svbool_t yspecial = sv_zeroinfnan (pg, viy0);
283*412f47f9SXin Li svbool_t cmp = svorr_z (pg, xspecial, yspecial);
284*412f47f9SXin Li
285*412f47f9SXin Li /* Small cases of x: |x| < 0x1p-126. */
286*412f47f9SXin Li svbool_t xsmall = svaclt (pg, x, d->small_bound);
287*412f47f9SXin Li if (unlikely (svptest_any (pg, xsmall)))
288*412f47f9SXin Li {
289*412f47f9SXin Li /* Normalize subnormal x so exponent becomes negative. */
290*412f47f9SXin Li svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
291*412f47f9SXin Li vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);
292*412f47f9SXin Li vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);
293*412f47f9SXin Li vix = svsel (xsmall, vix_norm, vix);
294*412f47f9SXin Li }
295*412f47f9SXin Li /* Part of core computation carried in working precision. */
296*412f47f9SXin Li svuint32_t tmp = svsub_x (pg, vix, d->off);
297*412f47f9SXin Li svuint32_t i = svand_x (pg, svlsr_x (pg, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
298*412f47f9SXin Li V_POWF_LOG2_N - 1);
299*412f47f9SXin Li svuint32_t top = svand_x (pg, tmp, 0xff800000);
300*412f47f9SXin Li svuint32_t iz = svsub_x (pg, vix, top);
301*412f47f9SXin Li svint32_t k
302*412f47f9SXin Li = svasr_x (pg, svreinterpret_s32 (top), (23 - V_POWF_EXP2_TABLE_BITS));
303*412f47f9SXin Li
304*412f47f9SXin Li /* Compute core in extended precision and return intermediate ylogx results to
305*412f47f9SXin Li handle cases of underflow and underflow in exp. */
306*412f47f9SXin Li svfloat32_t ylogx;
307*412f47f9SXin Li svfloat32_t ret = sv_powf_core (pg, i, iz, k, y, sign_bias, &ylogx, d);
308*412f47f9SXin Li
309*412f47f9SXin Li /* Handle exp special cases of underflow and overflow. */
310*412f47f9SXin Li svuint32_t sign = svlsl_x (pg, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
311*412f47f9SXin Li svfloat32_t ret_oflow
312*412f47f9SXin Li = svreinterpret_f32 (svorr_x (pg, sign, asuint (INFINITY)));
313*412f47f9SXin Li svfloat32_t ret_uflow = svreinterpret_f32 (sign);
314*412f47f9SXin Li ret = svsel (svcmple (pg, ylogx, d->uflow_bound), ret_uflow, ret);
315*412f47f9SXin Li ret = svsel (svcmpgt (pg, ylogx, d->oflow_bound), ret_oflow, ret);
316*412f47f9SXin Li
317*412f47f9SXin Li /* Cases of finite y and finite negative x. */
318*412f47f9SXin Li ret = svsel (yisnotint_xisneg, sv_f32 (__builtin_nanf ("")), ret);
319*412f47f9SXin Li
320*412f47f9SXin Li if (unlikely (svptest_any (pg, cmp)))
321*412f47f9SXin Li return sv_call_powf_sc (x, y, ret, cmp);
322*412f47f9SXin Li
323*412f47f9SXin Li return ret;
324*412f47f9SXin Li }
325*412f47f9SXin Li
326*412f47f9SXin Li PL_SIG (SV, F, 2, pow)
327*412f47f9SXin Li PL_TEST_ULP (SV_NAME_F2 (pow), 2.06)
328*412f47f9SXin Li /* Wide intervals spanning the whole domain but shared between x and y. */
329*412f47f9SXin Li #define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n) \
330*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n) \
331*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n) \
332*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n) \
333*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
334*412f47f9SXin Li SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
335*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
336*412f47f9SXin Li SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
337*412f47f9SXin Li /* x~1 or y~1. */
338*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
339*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
340*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
341*412f47f9SXin Li /* around estimated argmaxs of ULP error. */
342*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
343*412f47f9SXin Li SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
344*412f47f9SXin Li /* x is negative, y is odd or even integer, or y is real not integer. */
345*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
346*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
347*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
348*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
349*412f47f9SXin Li /* |x| is inf, y is odd or even integer, or y is real not integer. */
350*412f47f9SXin Li SV_POWF_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
351*412f47f9SXin Li SV_POWF_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
352*412f47f9SXin Li SV_POWF_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
353*412f47f9SXin Li SV_POWF_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
354*412f47f9SXin Li /* 0.0^y. */
355*412f47f9SXin Li SV_POWF_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
356*412f47f9SXin Li /* 1.0^y. */
357*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
358*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
359*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
360*412f47f9SXin Li PL_TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
361