1*412f47f9SXin Li /*
2*412f47f9SXin Li * Single-precision SVE powi(x, n) function.
3*412f47f9SXin Li *
4*412f47f9SXin Li * Copyright (c) 2020-2023, Arm Limited.
5*412f47f9SXin Li * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li */
7*412f47f9SXin Li
8*412f47f9SXin Li #include "sv_math.h"
9*412f47f9SXin Li
10*412f47f9SXin Li /* Optimized single-precision vector powi (float base, integer power).
11*412f47f9SXin Li powi is developed for environments in which accuracy is of much less
12*412f47f9SXin Li importance than performance, hence we provide no estimate for worst-case
13*412f47f9SXin Li error. */
14*412f47f9SXin Li svfloat32_t
_ZGVsMxvv_powi(svfloat32_t as,svint32_t ns,svbool_t p)15*412f47f9SXin Li _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p)
16*412f47f9SXin Li {
17*412f47f9SXin Li /* Compute powi by successive squaring, right to left. */
18*412f47f9SXin Li svfloat32_t acc = sv_f32 (1.f);
19*412f47f9SXin Li svbool_t want_recip = svcmplt (p, ns, 0);
20*412f47f9SXin Li svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns));
21*412f47f9SXin Li
22*412f47f9SXin Li /* We use a max to avoid needing to check whether any lane != 0 on each
23*412f47f9SXin Li iteration. */
24*412f47f9SXin Li uint32_t max_n = svmaxv (p, ns_abs);
25*412f47f9SXin Li
26*412f47f9SXin Li svfloat32_t c = as;
27*412f47f9SXin Li /* Successively square c, and use merging predication (_m) to determine
28*412f47f9SXin Li whether or not to perform the multiplication or keep the previous
29*412f47f9SXin Li iteration. */
30*412f47f9SXin Li while (true)
31*412f47f9SXin Li {
32*412f47f9SXin Li svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1);
33*412f47f9SXin Li acc = svmul_m (px, acc, c);
34*412f47f9SXin Li max_n >>= 1;
35*412f47f9SXin Li if (max_n == 0)
36*412f47f9SXin Li break;
37*412f47f9SXin Li
38*412f47f9SXin Li ns_abs = svlsr_x (p, ns_abs, 1);
39*412f47f9SXin Li c = svmul_x (p, c, c);
40*412f47f9SXin Li }
41*412f47f9SXin Li
42*412f47f9SXin Li /* Negative powers are handled by computing the abs(n) version and then
43*412f47f9SXin Li taking the reciprocal. */
44*412f47f9SXin Li if (svptest_any (want_recip, want_recip))
45*412f47f9SXin Li acc = svdivr_m (want_recip, acc, 1.0f);
46*412f47f9SXin Li
47*412f47f9SXin Li return acc;
48*412f47f9SXin Li }
49