1 /*
2 * Vector math abstractions.
3 *
4 * Copyright (c) 2019-2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8 #ifndef _V_MATH_H
9 #define _V_MATH_H
10
11 #if !__aarch64__
12 # error "Cannot build without AArch64"
13 #endif
14
15 #define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
16
17 #define V_NAME_F1(fun) _ZGVnN4v_##fun##f
18 #define V_NAME_D1(fun) _ZGVnN2v_##fun
19 #define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
20 #define V_NAME_D2(fun) _ZGVnN2vv_##fun
21
22 #include <stdint.h>
23 #include "../math_config.h"
24 #include <arm_neon.h>
25
26 /* Shorthand helpers for declaring constants. */
27 # define V2(X) { X, X }
28 # define V4(X) { X, X, X, X }
29 # define V8(X) { X, X, X, X, X, X, X, X }
30
31 static inline int
v_any_u16h(uint16x4_t x)32 v_any_u16h (uint16x4_t x)
33 {
34 return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
35 }
36
37 static inline int
v_lanes32(void)38 v_lanes32 (void)
39 {
40 return 4;
41 }
42
43 static inline float32x4_t
v_f32(float x)44 v_f32 (float x)
45 {
46 return (float32x4_t) V4 (x);
47 }
48 static inline uint32x4_t
v_u32(uint32_t x)49 v_u32 (uint32_t x)
50 {
51 return (uint32x4_t) V4 (x);
52 }
53 /* true if any elements of a v_cond result is non-zero. */
54 static inline int
v_any_u32(uint32x4_t x)55 v_any_u32 (uint32x4_t x)
56 {
57 /* assume elements in x are either 0 or -1u. */
58 return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
59 }
60 static inline int
v_any_u32h(uint32x2_t x)61 v_any_u32h (uint32x2_t x)
62 {
63 return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
64 }
65 static inline float32x4_t
v_lookup_f32(const float * tab,uint32x4_t idx)66 v_lookup_f32 (const float *tab, uint32x4_t idx)
67 {
68 return (float32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
69 }
70 static inline uint32x4_t
v_lookup_u32(const uint32_t * tab,uint32x4_t idx)71 v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
72 {
73 return (uint32x4_t){tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]};
74 }
75 static inline float32x4_t
v_call_f32(float (* f)(float),float32x4_t x,float32x4_t y,uint32x4_t p)76 v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
77 {
78 return (float32x4_t){p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
79 p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3]};
80 }
81 static inline float32x4_t
v_call2_f32(float (* f)(float,float),float32x4_t x1,float32x4_t x2,float32x4_t y,uint32x4_t p)82 v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
83 float32x4_t y, uint32x4_t p)
84 {
85 return (float32x4_t){p[0] ? f (x1[0], x2[0]) : y[0],
86 p[1] ? f (x1[1], x2[1]) : y[1],
87 p[2] ? f (x1[2], x2[2]) : y[2],
88 p[3] ? f (x1[3], x2[3]) : y[3]};
89 }
90
91 static inline int
v_lanes64(void)92 v_lanes64 (void)
93 {
94 return 2;
95 }
96 static inline float64x2_t
v_f64(double x)97 v_f64 (double x)
98 {
99 return (float64x2_t) V2 (x);
100 }
101 static inline uint64x2_t
v_u64(uint64_t x)102 v_u64 (uint64_t x)
103 {
104 return (uint64x2_t) V2 (x);
105 }
106 /* true if any elements of a v_cond result is non-zero. */
107 static inline int
v_any_u64(uint64x2_t x)108 v_any_u64 (uint64x2_t x)
109 {
110 /* assume elements in x are either 0 or -1u. */
111 return vpaddd_u64 (x) != 0;
112 }
113 static inline float64x2_t
v_lookup_f64(const double * tab,uint64x2_t idx)114 v_lookup_f64 (const double *tab, uint64x2_t idx)
115 {
116 return (float64x2_t){tab[idx[0]], tab[idx[1]]};
117 }
118 static inline uint64x2_t
v_lookup_u64(const uint64_t * tab,uint64x2_t idx)119 v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
120 {
121 return (uint64x2_t){tab[idx[0]], tab[idx[1]]};
122 }
123 static inline float64x2_t
v_call_f64(double (* f)(double),float64x2_t x,float64x2_t y,uint64x2_t p)124 v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
125 {
126 double p1 = p[1];
127 double x1 = x[1];
128 if (likely (p[0]))
129 y[0] = f (x[0]);
130 if (likely (p1))
131 y[1] = f (x1);
132 return y;
133 }
134
135 #endif
136