xref: /aosp_15_r20/external/arm-optimized-routines/math/aarch64/v_math.h (revision 412f47f9e737e10ed5cc46ec6a8d7fa2264f8a14)
1*412f47f9SXin Li /*
2*412f47f9SXin Li  * Vector math abstractions.
3*412f47f9SXin Li  *
4*412f47f9SXin Li  * Copyright (c) 2019-2023, Arm Limited.
5*412f47f9SXin Li  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*412f47f9SXin Li  */
7*412f47f9SXin Li 
8*412f47f9SXin Li #ifndef _V_MATH_H
9*412f47f9SXin Li #define _V_MATH_H
10*412f47f9SXin Li 
11*412f47f9SXin Li #if !__aarch64__
12*412f47f9SXin Li # error "Cannot build without AArch64"
13*412f47f9SXin Li #endif
14*412f47f9SXin Li 
15*412f47f9SXin Li #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
16*412f47f9SXin Li 
17*412f47f9SXin Li #define V_NAME_F1(fun) _ZGVnN4v_##fun##f
18*412f47f9SXin Li #define V_NAME_D1(fun) _ZGVnN2v_##fun
19*412f47f9SXin Li #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
20*412f47f9SXin Li #define V_NAME_D2(fun) _ZGVnN2vv_##fun
21*412f47f9SXin Li 
22*412f47f9SXin Li #include <stdint.h>
23*412f47f9SXin Li #include "../math_config.h"
24*412f47f9SXin Li #include <arm_neon.h>
25*412f47f9SXin Li 
26*412f47f9SXin Li /* Shorthand helpers for declaring constants.  */
27*412f47f9SXin Li #  define V2(X) { X, X }
28*412f47f9SXin Li #  define V4(X) { X, X, X, X }
29*412f47f9SXin Li #  define V8(X) { X, X, X, X, X, X, X, X }
30*412f47f9SXin Li 
31*412f47f9SXin Li static inline int
v_any_u16h(uint16x4_t x)32*412f47f9SXin Li v_any_u16h (uint16x4_t x)
33*412f47f9SXin Li {
34*412f47f9SXin Li   return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
35*412f47f9SXin Li }
36*412f47f9SXin Li 
37*412f47f9SXin Li static inline int
v_lanes32(void)38*412f47f9SXin Li v_lanes32 (void)
39*412f47f9SXin Li {
40*412f47f9SXin Li   return 4;
41*412f47f9SXin Li }
42*412f47f9SXin Li 
43*412f47f9SXin Li static inline float32x4_t
v_f32(float x)44*412f47f9SXin Li v_f32 (float x)
45*412f47f9SXin Li {
46*412f47f9SXin Li   return (float32x4_t) V4 (x);
47*412f47f9SXin Li }
48*412f47f9SXin Li static inline uint32x4_t
v_u32(uint32_t x)49*412f47f9SXin Li v_u32 (uint32_t x)
50*412f47f9SXin Li {
51*412f47f9SXin Li   return (uint32x4_t) V4 (x);
52*412f47f9SXin Li }
53*412f47f9SXin Li /* true if any elements of a v_cond result is non-zero.  */
54*412f47f9SXin Li static inline int
v_any_u32(uint32x4_t x)55*412f47f9SXin Li v_any_u32 (uint32x4_t x)
56*412f47f9SXin Li {
57*412f47f9SXin Li   /* assume elements in x are either 0 or -1u.  */
58*412f47f9SXin Li   return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
59*412f47f9SXin Li }
60*412f47f9SXin Li static inline int
v_any_u32h(uint32x2_t x)61*412f47f9SXin Li v_any_u32h (uint32x2_t x)
62*412f47f9SXin Li {
63*412f47f9SXin Li   return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
64*412f47f9SXin Li }
65*412f47f9SXin Li static inline float32x4_t
v_lookup_f32(const float * tab,uint32x4_t idx)66*412f47f9SXin Li v_lookup_f32 (const float *tab, uint32x4_t idx)
67*412f47f9SXin Li {
68*412f47f9SXin Li   return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
69*412f47f9SXin Li }
70*412f47f9SXin Li static inline uint32x4_t
v_lookup_u32(const uint32_t * tab,uint32x4_t idx)71*412f47f9SXin Li v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
72*412f47f9SXin Li {
73*412f47f9SXin Li   return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
74*412f47f9SXin Li }
75*412f47f9SXin Li static inline float32x4_t
v_call_f32(float (* f)(float),float32x4_t x,float32x4_t y,uint32x4_t p)76*412f47f9SXin Li v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
77*412f47f9SXin Li {
78*412f47f9SXin Li   return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
79*412f47f9SXin Li 		       p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
80*412f47f9SXin Li }
81*412f47f9SXin Li static inline float32x4_t
v_call2_f32(float (* f)(float,float),float32x4_t x1,float32x4_t x2,float32x4_t y,uint32x4_t p)82*412f47f9SXin Li v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
83*412f47f9SXin Li 	     float32x4_t y, uint32x4_t p)
84*412f47f9SXin Li {
85*412f47f9SXin Li   return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0],
86*412f47f9SXin Li 		       p[1] ? f (x1[1], x2[1]) : y[1],
87*412f47f9SXin Li 		       p[2] ? f (x1[2], x2[2]) : y[2],
88*412f47f9SXin Li 		       p[3] ? f (x1[3], x2[3]) : y[3]};
89*412f47f9SXin Li }
90*412f47f9SXin Li 
91*412f47f9SXin Li static inline int
v_lanes64(void)92*412f47f9SXin Li v_lanes64 (void)
93*412f47f9SXin Li {
94*412f47f9SXin Li   return 2;
95*412f47f9SXin Li }
96*412f47f9SXin Li static inline float64x2_t
v_f64(double x)97*412f47f9SXin Li v_f64 (double x)
98*412f47f9SXin Li {
99*412f47f9SXin Li   return (float64x2_t) V2 (x);
100*412f47f9SXin Li }
101*412f47f9SXin Li static inline uint64x2_t
v_u64(uint64_t x)102*412f47f9SXin Li v_u64 (uint64_t x)
103*412f47f9SXin Li {
104*412f47f9SXin Li   return (uint64x2_t) V2 (x);
105*412f47f9SXin Li }
106*412f47f9SXin Li /* true if any elements of a v_cond result is non-zero.  */
107*412f47f9SXin Li static inline int
v_any_u64(uint64x2_t x)108*412f47f9SXin Li v_any_u64 (uint64x2_t x)
109*412f47f9SXin Li {
110*412f47f9SXin Li   /* assume elements in x are either 0 or -1u.  */
111*412f47f9SXin Li   return vpaddd_u64 (x) != 0;
112*412f47f9SXin Li }
113*412f47f9SXin Li static inline float64x2_t
v_lookup_f64(const double * tab,uint64x2_t idx)114*412f47f9SXin Li v_lookup_f64 (const double *tab, uint64x2_t idx)
115*412f47f9SXin Li {
116*412f47f9SXin Li   return (float64x2_t){tab[idx[0]], tab[idx[1]]};
117*412f47f9SXin Li }
118*412f47f9SXin Li static inline uint64x2_t
v_lookup_u64(const uint64_t * tab,uint64x2_t idx)119*412f47f9SXin Li v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
120*412f47f9SXin Li {
121*412f47f9SXin Li   return (uint64x2_t){tab[idx[0]], tab[idx[1]]};
122*412f47f9SXin Li }
123*412f47f9SXin Li static inline float64x2_t
v_call_f64(double (* f)(double),float64x2_t x,float64x2_t y,uint64x2_t p)124*412f47f9SXin Li v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
125*412f47f9SXin Li {
126*412f47f9SXin Li   double p1 = p[1];
127*412f47f9SXin Li   double x1 = x[1];
128*412f47f9SXin Li   if (likely (p[0]))
129*412f47f9SXin Li     y[0] = f (x[0]);
130*412f47f9SXin Li   if (likely (p1))
131*412f47f9SXin Li     y[1] = f (x1);
132*412f47f9SXin Li   return y;
133*412f47f9SXin Li }
134*412f47f9SXin Li 
135*412f47f9SXin Li #endif
136