xref: /aosp_15_r20/external/arm-optimized-routines/pl/math/sv_math.h (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Wrapper functions for SVE ACLE.
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2019-2023, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #ifndef SV_MATH_H
9*412f47f9SXin Li #define SV_MATH_H
10*412f47f9SXin Li 
11*412f47f9SXin Li #ifndef WANT_VMATH
12*412f47f9SXin Li /* Enable the build of vector math code.  */
13*412f47f9SXin Li # define WANT_VMATH 1
14*412f47f9SXin Li #endif
15*412f47f9SXin Li 
16*412f47f9SXin Li #if WANT_VMATH
17*412f47f9SXin Li 
18*412f47f9SXin Li # include <arm_sve.h>
19*412f47f9SXin Li # include <stdbool.h>
20*412f47f9SXin Li 
21*412f47f9SXin Li # include "math_config.h"
22*412f47f9SXin Li 
23*412f47f9SXin Li /* Double precision.  */
24*412f47f9SXin Li static inline svint64_t
sv_s64(int64_t x)25*412f47f9SXin Li sv_s64 (int64_t x)
26*412f47f9SXin Li {
27*412f47f9SXin Li   return svdup_s64 (x);
28*412f47f9SXin Li }
29*412f47f9SXin Li 
30*412f47f9SXin Li static inline svuint64_t
sv_u64(uint64_t x)31*412f47f9SXin Li sv_u64 (uint64_t x)
32*412f47f9SXin Li {
33*412f47f9SXin Li   return svdup_u64 (x);
34*412f47f9SXin Li }
35*412f47f9SXin Li 
36*412f47f9SXin Li static inline svfloat64_t
sv_f64(double x)37*412f47f9SXin Li sv_f64 (double x)
38*412f47f9SXin Li {
39*412f47f9SXin Li   return svdup_f64 (x);
40*412f47f9SXin Li }
41*412f47f9SXin Li 
42*412f47f9SXin Li static inline svfloat64_t
sv_call_f64(double (* f)(double),svfloat64_t x,svfloat64_t y,svbool_t cmp)43*412f47f9SXin Li sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
44*412f47f9SXin Li {
45*412f47f9SXin Li   svbool_t p = svpfirst (cmp, svpfalse ());
46*412f47f9SXin Li   while (svptest_any (cmp, p))
47*412f47f9SXin Li     {
48*412f47f9SXin Li       double elem = svclastb (p, 0, x);
49*412f47f9SXin Li       elem = (*f) (elem);
50*412f47f9SXin Li       svfloat64_t y2 = sv_f64 (elem);
51*412f47f9SXin Li       y = svsel (p, y2, y);
52*412f47f9SXin Li       p = svpnext_b64 (cmp, p);
53*412f47f9SXin Li     }
54*412f47f9SXin Li   return y;
55*412f47f9SXin Li }
56*412f47f9SXin Li 
57*412f47f9SXin Li static inline svfloat64_t
sv_call2_f64(double (* f)(double,double),svfloat64_t x1,svfloat64_t x2,svfloat64_t y,svbool_t cmp)58*412f47f9SXin Li sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
59*412f47f9SXin Li 	      svfloat64_t y, svbool_t cmp)
60*412f47f9SXin Li {
61*412f47f9SXin Li   svbool_t p = svpfirst (cmp, svpfalse ());
62*412f47f9SXin Li   while (svptest_any (cmp, p))
63*412f47f9SXin Li     {
64*412f47f9SXin Li       double elem1 = svclastb (p, 0, x1);
65*412f47f9SXin Li       double elem2 = svclastb (p, 0, x2);
66*412f47f9SXin Li       double ret = (*f) (elem1, elem2);
67*412f47f9SXin Li       svfloat64_t y2 = sv_f64 (ret);
68*412f47f9SXin Li       y = svsel (p, y2, y);
69*412f47f9SXin Li       p = svpnext_b64 (cmp, p);
70*412f47f9SXin Li     }
71*412f47f9SXin Li   return y;
72*412f47f9SXin Li }
73*412f47f9SXin Li 
74*412f47f9SXin Li static inline svuint64_t
sv_mod_n_u64_x(svbool_t pg,svuint64_t x,uint64_t y)75*412f47f9SXin Li sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y)
76*412f47f9SXin Li {
77*412f47f9SXin Li   svuint64_t q = svdiv_x (pg, x, y);
78*412f47f9SXin Li   return svmls_x (pg, x, q, y);
79*412f47f9SXin Li }
80*412f47f9SXin Li 
81*412f47f9SXin Li /* Single precision.  */
82*412f47f9SXin Li static inline svint32_t
sv_s32(int32_t x)83*412f47f9SXin Li sv_s32 (int32_t x)
84*412f47f9SXin Li {
85*412f47f9SXin Li   return svdup_s32 (x);
86*412f47f9SXin Li }
87*412f47f9SXin Li 
88*412f47f9SXin Li static inline svuint32_t
sv_u32(uint32_t x)89*412f47f9SXin Li sv_u32 (uint32_t x)
90*412f47f9SXin Li {
91*412f47f9SXin Li   return svdup_u32 (x);
92*412f47f9SXin Li }
93*412f47f9SXin Li 
94*412f47f9SXin Li static inline svfloat32_t
sv_f32(float x)95*412f47f9SXin Li sv_f32 (float x)
96*412f47f9SXin Li {
97*412f47f9SXin Li   return svdup_f32 (x);
98*412f47f9SXin Li }
99*412f47f9SXin Li 
100*412f47f9SXin Li static inline svfloat32_t
sv_call_f32(float (* f)(float),svfloat32_t x,svfloat32_t y,svbool_t cmp)101*412f47f9SXin Li sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
102*412f47f9SXin Li {
103*412f47f9SXin Li   svbool_t p = svpfirst (cmp, svpfalse ());
104*412f47f9SXin Li   while (svptest_any (cmp, p))
105*412f47f9SXin Li     {
106*412f47f9SXin Li       float elem = svclastb (p, 0, x);
107*412f47f9SXin Li       elem = (*f) (elem);
108*412f47f9SXin Li       svfloat32_t y2 = sv_f32 (elem);
109*412f47f9SXin Li       y = svsel (p, y2, y);
110*412f47f9SXin Li       p = svpnext_b32 (cmp, p);
111*412f47f9SXin Li     }
112*412f47f9SXin Li   return y;
113*412f47f9SXin Li }
114*412f47f9SXin Li 
115*412f47f9SXin Li static inline svfloat32_t
sv_call2_f32(float (* f)(float,float),svfloat32_t x1,svfloat32_t x2,svfloat32_t y,svbool_t cmp)116*412f47f9SXin Li sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
117*412f47f9SXin Li 	      svfloat32_t y, svbool_t cmp)
118*412f47f9SXin Li {
119*412f47f9SXin Li   svbool_t p = svpfirst (cmp, svpfalse ());
120*412f47f9SXin Li   while (svptest_any (cmp, p))
121*412f47f9SXin Li     {
122*412f47f9SXin Li       float elem1 = svclastb (p, 0, x1);
123*412f47f9SXin Li       float elem2 = svclastb (p, 0, x2);
124*412f47f9SXin Li       float ret = (*f) (elem1, elem2);
125*412f47f9SXin Li       svfloat32_t y2 = sv_f32 (ret);
126*412f47f9SXin Li       y = svsel (p, y2, y);
127*412f47f9SXin Li       p = svpnext_b32 (cmp, p);
128*412f47f9SXin Li     }
129*412f47f9SXin Li   return y;
130*412f47f9SXin Li }
131*412f47f9SXin Li #endif
132*412f47f9SXin Li 
133*412f47f9SXin Li #endif
134