xref: /btstack/3rd-party/lc3-google/test/neon/neon.h (revision 4c4eb519208b4224604d94b3ed1931841ddd93bb)
14930cef6SMatthias Ringwald /******************************************************************************
24930cef6SMatthias Ringwald  *
34930cef6SMatthias Ringwald  *  Copyright 2022 Google LLC
44930cef6SMatthias Ringwald  *
54930cef6SMatthias Ringwald  *  Licensed under the Apache License, Version 2.0 (the "License");
64930cef6SMatthias Ringwald  *  you may not use this file except in compliance with the License.
74930cef6SMatthias Ringwald  *  You may obtain a copy of the License at:
84930cef6SMatthias Ringwald  *
94930cef6SMatthias Ringwald  *  http://www.apache.org/licenses/LICENSE-2.0
104930cef6SMatthias Ringwald  *
114930cef6SMatthias Ringwald  *  Unless required by applicable law or agreed to in writing, software
124930cef6SMatthias Ringwald  *  distributed under the License is distributed on an "AS IS" BASIS,
134930cef6SMatthias Ringwald  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
144930cef6SMatthias Ringwald  *  See the License for the specific language governing permissions and
154930cef6SMatthias Ringwald  *  limitations under the License.
164930cef6SMatthias Ringwald  *
174930cef6SMatthias Ringwald  ******************************************************************************/
184930cef6SMatthias Ringwald 
19*4c4eb519SMatthias Ringwald #if __ARM_NEON && __ARM_ARCH_ISA_A64
204930cef6SMatthias Ringwald 
214930cef6SMatthias Ringwald #include <arm_neon.h>
224930cef6SMatthias Ringwald 
234930cef6SMatthias Ringwald #else
244930cef6SMatthias Ringwald 
254930cef6SMatthias Ringwald #include <stdint.h>
264930cef6SMatthias Ringwald 
274930cef6SMatthias Ringwald 
284930cef6SMatthias Ringwald /* ----------------------------------------------------------------------------
294930cef6SMatthias Ringwald  *  Integer
304930cef6SMatthias Ringwald  * -------------------------------------------------------------------------- */
314930cef6SMatthias Ringwald 
324930cef6SMatthias Ringwald typedef struct { int16_t e[4]; } int16x4_t;
334930cef6SMatthias Ringwald 
344930cef6SMatthias Ringwald typedef struct { int16_t e[8]; } int16x8_t;
354930cef6SMatthias Ringwald typedef struct { int32_t e[4]; } int32x4_t;
364930cef6SMatthias Ringwald typedef struct { int64_t e[2]; } int64x2_t;
374930cef6SMatthias Ringwald 
384930cef6SMatthias Ringwald 
394930cef6SMatthias Ringwald /**
404930cef6SMatthias Ringwald  * Load / Store
414930cef6SMatthias Ringwald  */
424930cef6SMatthias Ringwald 
434930cef6SMatthias Ringwald __attribute__((unused))
vld1_s16(const int16_t * p)444930cef6SMatthias Ringwald static int16x4_t vld1_s16(const int16_t *p)
454930cef6SMatthias Ringwald {
464930cef6SMatthias Ringwald     return (int16x4_t){ { p[0], p[1], p[2], p[3] } };
474930cef6SMatthias Ringwald }
484930cef6SMatthias Ringwald 
494930cef6SMatthias Ringwald 
504930cef6SMatthias Ringwald /**
514930cef6SMatthias Ringwald  * Arithmetic
524930cef6SMatthias Ringwald  */
534930cef6SMatthias Ringwald 
544930cef6SMatthias Ringwald __attribute__((unused))
vmull_s16(int16x4_t a,int16x4_t b)554930cef6SMatthias Ringwald static int32x4_t vmull_s16(int16x4_t a, int16x4_t b)
564930cef6SMatthias Ringwald {
574930cef6SMatthias Ringwald     return (int32x4_t){ { a.e[0] * b.e[0], a.e[1] * b.e[1],
584930cef6SMatthias Ringwald                           a.e[2] * b.e[2], a.e[3] * b.e[3]  } };
594930cef6SMatthias Ringwald }
604930cef6SMatthias Ringwald 
614930cef6SMatthias Ringwald __attribute__((unused))
vmlal_s16(int32x4_t r,int16x4_t a,int16x4_t b)624930cef6SMatthias Ringwald static int32x4_t vmlal_s16(int32x4_t r, int16x4_t a, int16x4_t b)
634930cef6SMatthias Ringwald {
644930cef6SMatthias Ringwald     return (int32x4_t){ {
654930cef6SMatthias Ringwald         r.e[0] + a.e[0] * b.e[0], r.e[1] + a.e[1] * b.e[1],
664930cef6SMatthias Ringwald         r.e[2] + a.e[2] * b.e[2], r.e[3] + a.e[3] * b.e[3] } };
674930cef6SMatthias Ringwald }
684930cef6SMatthias Ringwald 
694930cef6SMatthias Ringwald __attribute__((unused))
vpadalq_s32(int64x2_t a,int32x4_t b)704930cef6SMatthias Ringwald static int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)
714930cef6SMatthias Ringwald {
724930cef6SMatthias Ringwald     int64x2_t r;
734930cef6SMatthias Ringwald 
744930cef6SMatthias Ringwald     r.e[0] = a.e[0] + ((int64_t)b.e[0] + b.e[1]);
754930cef6SMatthias Ringwald     r.e[1] = a.e[1] + ((int64_t)b.e[2] + b.e[3]);
764930cef6SMatthias Ringwald 
774930cef6SMatthias Ringwald     return r;
784930cef6SMatthias Ringwald }
794930cef6SMatthias Ringwald 
804930cef6SMatthias Ringwald 
814930cef6SMatthias Ringwald /**
824930cef6SMatthias Ringwald  * Reduce
834930cef6SMatthias Ringwald  */
844930cef6SMatthias Ringwald 
854930cef6SMatthias Ringwald __attribute__((unused))
vaddvq_s32(int32x4_t v)864930cef6SMatthias Ringwald static int32_t vaddvq_s32(int32x4_t v)
874930cef6SMatthias Ringwald {
884930cef6SMatthias Ringwald     return v.e[0] + v.e[1] + v.e[2] + v.e[3];
894930cef6SMatthias Ringwald }
904930cef6SMatthias Ringwald 
914930cef6SMatthias Ringwald __attribute__((unused))
vaddvq_s64(int64x2_t v)924930cef6SMatthias Ringwald static int64_t vaddvq_s64(int64x2_t v)
934930cef6SMatthias Ringwald {
944930cef6SMatthias Ringwald     return v.e[0] + v.e[1];
954930cef6SMatthias Ringwald }
964930cef6SMatthias Ringwald 
974930cef6SMatthias Ringwald 
984930cef6SMatthias Ringwald /**
994930cef6SMatthias Ringwald  * Manipulation
1004930cef6SMatthias Ringwald  */
1014930cef6SMatthias Ringwald 
1024930cef6SMatthias Ringwald __attribute__((unused))
vext_s16(int16x4_t a,int16x4_t b,const int n)1034930cef6SMatthias Ringwald static int16x4_t vext_s16(int16x4_t a, int16x4_t b, const int n)
1044930cef6SMatthias Ringwald {
1054930cef6SMatthias Ringwald     int16_t x[] = { a.e[0], a.e[1], a.e[2], a.e[3],
1064930cef6SMatthias Ringwald                     b.e[0], b.e[1], b.e[2], b.e[3] };
1074930cef6SMatthias Ringwald 
1084930cef6SMatthias Ringwald     return (int16x4_t){ { x[n], x[n+1], x[n+2], x[n+3] } };
1094930cef6SMatthias Ringwald }
1104930cef6SMatthias Ringwald 
1114930cef6SMatthias Ringwald __attribute__((unused))
vmovq_n_s32(uint32_t v)1124930cef6SMatthias Ringwald static int32x4_t vmovq_n_s32(uint32_t v)
1134930cef6SMatthias Ringwald {
1144930cef6SMatthias Ringwald     return (int32x4_t){ { v, v, v, v } };
1154930cef6SMatthias Ringwald }
1164930cef6SMatthias Ringwald 
1174930cef6SMatthias Ringwald __attribute__((unused))
vmovq_n_s64(int64_t v)1184930cef6SMatthias Ringwald static int64x2_t vmovq_n_s64(int64_t v)
1194930cef6SMatthias Ringwald {
1204930cef6SMatthias Ringwald     return (int64x2_t){ { v, v, } };
1214930cef6SMatthias Ringwald }
1224930cef6SMatthias Ringwald 
1234930cef6SMatthias Ringwald 
1244930cef6SMatthias Ringwald 
1254930cef6SMatthias Ringwald /* ----------------------------------------------------------------------------
1264930cef6SMatthias Ringwald  *  Floating Point
1274930cef6SMatthias Ringwald  * -------------------------------------------------------------------------- */
1284930cef6SMatthias Ringwald 
1294930cef6SMatthias Ringwald typedef struct { float e[2]; } float32x2_t;
1304930cef6SMatthias Ringwald typedef struct { float e[4]; } float32x4_t;
1314930cef6SMatthias Ringwald 
1324930cef6SMatthias Ringwald typedef struct { float32x2_t val[2]; } float32x2x2_t;
1334930cef6SMatthias Ringwald typedef struct { float32x4_t val[2]; } float32x4x2_t;
1344930cef6SMatthias Ringwald 
1354930cef6SMatthias Ringwald 
1364930cef6SMatthias Ringwald /**
1374930cef6SMatthias Ringwald  * Load / Store
1384930cef6SMatthias Ringwald  */
1394930cef6SMatthias Ringwald 
1404930cef6SMatthias Ringwald __attribute__((unused))
vld1_f32(const float * p)1414930cef6SMatthias Ringwald static float32x2_t vld1_f32(const float *p)
1424930cef6SMatthias Ringwald {
1434930cef6SMatthias Ringwald     return (float32x2_t){ { p[0], p[1] } };
1444930cef6SMatthias Ringwald }
1454930cef6SMatthias Ringwald 
1464930cef6SMatthias Ringwald __attribute__((unused))
vld1q_f32(const float * p)1474930cef6SMatthias Ringwald static float32x4_t vld1q_f32(const float *p)
1484930cef6SMatthias Ringwald {
1494930cef6SMatthias Ringwald     return (float32x4_t){ { p[0], p[1], p[2], p[3] } };
1504930cef6SMatthias Ringwald }
1514930cef6SMatthias Ringwald 
1524930cef6SMatthias Ringwald __attribute__((unused))
vld1q_dup_f32(const float * p)1534930cef6SMatthias Ringwald static float32x4_t vld1q_dup_f32(const float *p)
1544930cef6SMatthias Ringwald {
1554930cef6SMatthias Ringwald     return (float32x4_t){ { p[0], p[0], p[0], p[0] } };
1564930cef6SMatthias Ringwald }
1574930cef6SMatthias Ringwald 
1584930cef6SMatthias Ringwald __attribute__((unused))
vld2_f32(const float * p)1594930cef6SMatthias Ringwald static float32x2x2_t vld2_f32(const float *p)
1604930cef6SMatthias Ringwald {
1614930cef6SMatthias Ringwald     return (float32x2x2_t){ .val[0] = { { p[0], p[2] } },
1624930cef6SMatthias Ringwald                             .val[1] = { { p[1], p[3] } } };
1634930cef6SMatthias Ringwald }
1644930cef6SMatthias Ringwald 
1654930cef6SMatthias Ringwald __attribute__((unused))
vld2q_f32(const float * p)1664930cef6SMatthias Ringwald static float32x4x2_t vld2q_f32(const float *p)
1674930cef6SMatthias Ringwald {
1684930cef6SMatthias Ringwald     return (float32x4x2_t){ .val[0] = { { p[0], p[2], p[4], p[6] } },
1694930cef6SMatthias Ringwald                             .val[1] = { { p[1], p[3], p[5], p[7] } } };
1704930cef6SMatthias Ringwald }
1714930cef6SMatthias Ringwald 
1724930cef6SMatthias Ringwald __attribute__((unused))
vst1_f32(float * p,float32x2_t v)1734930cef6SMatthias Ringwald static void vst1_f32(float *p, float32x2_t v)
1744930cef6SMatthias Ringwald {
1754930cef6SMatthias Ringwald     p[0] = v.e[0], p[1] = v.e[1];
1764930cef6SMatthias Ringwald }
1774930cef6SMatthias Ringwald 
1784930cef6SMatthias Ringwald __attribute__((unused))
vst1q_f32(float * p,float32x4_t v)1794930cef6SMatthias Ringwald static void vst1q_f32(float *p, float32x4_t v)
1804930cef6SMatthias Ringwald {
1814930cef6SMatthias Ringwald     p[0] = v.e[0], p[1] = v.e[1], p[2] = v.e[2], p[3] = v.e[3];
1824930cef6SMatthias Ringwald }
1834930cef6SMatthias Ringwald 
1844930cef6SMatthias Ringwald /**
1854930cef6SMatthias Ringwald  * Arithmetic
1864930cef6SMatthias Ringwald  */
1874930cef6SMatthias Ringwald 
1884930cef6SMatthias Ringwald __attribute__((unused))
vneg_f32(float32x2_t a)1894930cef6SMatthias Ringwald static float32x2_t vneg_f32(float32x2_t a)
1904930cef6SMatthias Ringwald {
1914930cef6SMatthias Ringwald     return (float32x2_t){ { -a.e[0], -a.e[1] } };
1924930cef6SMatthias Ringwald }
1934930cef6SMatthias Ringwald 
1944930cef6SMatthias Ringwald __attribute__((unused))
vnegq_f32(float32x4_t a)1954930cef6SMatthias Ringwald static float32x4_t vnegq_f32(float32x4_t a)
1964930cef6SMatthias Ringwald {
1974930cef6SMatthias Ringwald     return (float32x4_t){ { -a.e[0], -a.e[1], -a.e[2], -a.e[3] } };
1984930cef6SMatthias Ringwald }
1994930cef6SMatthias Ringwald 
2004930cef6SMatthias Ringwald __attribute__((unused))
vaddq_f32(float32x4_t a,float32x4_t b)2014930cef6SMatthias Ringwald static float32x4_t vaddq_f32(float32x4_t a, float32x4_t b)
2024930cef6SMatthias Ringwald {
2034930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[0] + b.e[0], a.e[1] + b.e[1],
2044930cef6SMatthias Ringwald                             a.e[2] + b.e[2], a.e[3] + b.e[3] } };
2054930cef6SMatthias Ringwald }
2064930cef6SMatthias Ringwald 
2074930cef6SMatthias Ringwald __attribute__((unused))
vsubq_f32(float32x4_t a,float32x4_t b)2084930cef6SMatthias Ringwald static float32x4_t vsubq_f32(float32x4_t a, float32x4_t b)
2094930cef6SMatthias Ringwald {
2104930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[0] - b.e[0], a.e[1] - b.e[1],
2114930cef6SMatthias Ringwald                             a.e[2] - b.e[2], a.e[3] - b.e[3] } };
2124930cef6SMatthias Ringwald }
2134930cef6SMatthias Ringwald 
2144930cef6SMatthias Ringwald __attribute__((unused))
vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)2154930cef6SMatthias Ringwald static float32x2_t vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c)
2164930cef6SMatthias Ringwald {
2174930cef6SMatthias Ringwald     return (float32x2_t){ {
2184930cef6SMatthias Ringwald         a.e[0] + b.e[0] * c.e[0], a.e[1] + b.e[1] * c.e[1] } };
2194930cef6SMatthias Ringwald }
2204930cef6SMatthias Ringwald 
2214930cef6SMatthias Ringwald __attribute__((unused))
vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)2224930cef6SMatthias Ringwald static float32x4_t vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c)
2234930cef6SMatthias Ringwald {
2244930cef6SMatthias Ringwald     return (float32x4_t){ {
2254930cef6SMatthias Ringwald         a.e[0] + b.e[0] * c.e[0], a.e[1] + b.e[1] * c.e[1],
2264930cef6SMatthias Ringwald         a.e[2] + b.e[2] * c.e[2], a.e[3] + b.e[3] * c.e[3] } };
2274930cef6SMatthias Ringwald }
2284930cef6SMatthias Ringwald 
2294930cef6SMatthias Ringwald __attribute__((unused))
vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)2304930cef6SMatthias Ringwald static float32x2_t vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c)
2314930cef6SMatthias Ringwald {
2324930cef6SMatthias Ringwald     return (float32x2_t){ {
2334930cef6SMatthias Ringwald         a.e[0] - b.e[0] * c.e[0], a.e[1] - b.e[1] * c.e[1] } };
2344930cef6SMatthias Ringwald }
2354930cef6SMatthias Ringwald 
2364930cef6SMatthias Ringwald __attribute__((unused))
vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)2374930cef6SMatthias Ringwald static float32x4_t vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c)
2384930cef6SMatthias Ringwald {
2394930cef6SMatthias Ringwald     return (float32x4_t){ {
2404930cef6SMatthias Ringwald         a.e[0] - b.e[0] * c.e[0], a.e[1] - b.e[1] * c.e[1],
2414930cef6SMatthias Ringwald         a.e[2] - b.e[2] * c.e[2], a.e[3] - b.e[3] * c.e[3] } };
2424930cef6SMatthias Ringwald }
2434930cef6SMatthias Ringwald 
2444930cef6SMatthias Ringwald 
2454930cef6SMatthias Ringwald /**
2464930cef6SMatthias Ringwald  * Manipulation
2474930cef6SMatthias Ringwald  */
2484930cef6SMatthias Ringwald 
2494930cef6SMatthias Ringwald __attribute__((unused))
vcreate_f32(uint64_t u)2504930cef6SMatthias Ringwald static float32x2_t vcreate_f32(uint64_t u)
2514930cef6SMatthias Ringwald {
2524930cef6SMatthias Ringwald     float *f = (float *)&u;
2534930cef6SMatthias Ringwald     return (float32x2_t){ { f[0] , f[1] } };
2544930cef6SMatthias Ringwald }
2554930cef6SMatthias Ringwald 
2564930cef6SMatthias Ringwald __attribute__((unused))
vcombine_f32(float32x2_t a,float32x2_t b)2574930cef6SMatthias Ringwald static float32x4_t vcombine_f32(float32x2_t a, float32x2_t b)
2584930cef6SMatthias Ringwald {
2594930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[0], a.e[1], b.e[0], b.e[1] } };
2604930cef6SMatthias Ringwald }
2614930cef6SMatthias Ringwald 
2624930cef6SMatthias Ringwald __attribute__((unused))
vget_low_f32(float32x4_t a)2634930cef6SMatthias Ringwald static float32x2_t vget_low_f32(float32x4_t a)
2644930cef6SMatthias Ringwald {
2654930cef6SMatthias Ringwald     return (float32x2_t){ { a.e[0], a.e[1] } };
2664930cef6SMatthias Ringwald }
2674930cef6SMatthias Ringwald 
2684930cef6SMatthias Ringwald __attribute__((unused))
vget_high_f32(float32x4_t a)2694930cef6SMatthias Ringwald static float32x2_t vget_high_f32(float32x4_t a)
2704930cef6SMatthias Ringwald {
2714930cef6SMatthias Ringwald     return (float32x2_t){ { a.e[2], a.e[3] } };
2724930cef6SMatthias Ringwald }
2734930cef6SMatthias Ringwald 
2744930cef6SMatthias Ringwald __attribute__((unused))
vmovq_n_f32(float v)2754930cef6SMatthias Ringwald static float32x4_t vmovq_n_f32(float v)
2764930cef6SMatthias Ringwald {
2774930cef6SMatthias Ringwald     return (float32x4_t){ { v, v, v, v } };
2784930cef6SMatthias Ringwald }
2794930cef6SMatthias Ringwald 
2804930cef6SMatthias Ringwald __attribute__((unused))
vrev64_f32(float32x2_t v)2814930cef6SMatthias Ringwald static float32x2_t vrev64_f32(float32x2_t v)
2824930cef6SMatthias Ringwald {
2834930cef6SMatthias Ringwald     return (float32x2_t){ { v.e[1], v.e[0] } };
2844930cef6SMatthias Ringwald }
2854930cef6SMatthias Ringwald 
2864930cef6SMatthias Ringwald __attribute__((unused))
vrev64q_f32(float32x4_t v)2874930cef6SMatthias Ringwald static float32x4_t vrev64q_f32(float32x4_t v)
2884930cef6SMatthias Ringwald {
2894930cef6SMatthias Ringwald     return (float32x4_t){ { v.e[1], v.e[0], v.e[3], v.e[2] } };
2904930cef6SMatthias Ringwald }
2914930cef6SMatthias Ringwald 
2924930cef6SMatthias Ringwald __attribute__((unused))
vtrn1_f32(float32x2_t a,float32x2_t b)2934930cef6SMatthias Ringwald static float32x2_t vtrn1_f32(float32x2_t a, float32x2_t b)
2944930cef6SMatthias Ringwald {
2954930cef6SMatthias Ringwald     return (float32x2_t){ { a.e[0], b.e[0] } };
2964930cef6SMatthias Ringwald }
2974930cef6SMatthias Ringwald 
2984930cef6SMatthias Ringwald __attribute__((unused))
vtrn2_f32(float32x2_t a,float32x2_t b)2994930cef6SMatthias Ringwald static float32x2_t vtrn2_f32(float32x2_t a, float32x2_t b)
3004930cef6SMatthias Ringwald {
3014930cef6SMatthias Ringwald     return (float32x2_t){ { a.e[1], b.e[1] } };
3024930cef6SMatthias Ringwald }
3034930cef6SMatthias Ringwald 
3044930cef6SMatthias Ringwald __attribute__((unused))
vtrn1q_f32(float32x4_t a,float32x4_t b)3054930cef6SMatthias Ringwald static float32x4_t vtrn1q_f32(float32x4_t a, float32x4_t b)
3064930cef6SMatthias Ringwald {
3074930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[0], b.e[0], a.e[2], b.e[2] } };
3084930cef6SMatthias Ringwald }
3094930cef6SMatthias Ringwald 
3104930cef6SMatthias Ringwald __attribute__((unused))
vtrn2q_f32(float32x4_t a,float32x4_t b)3114930cef6SMatthias Ringwald static float32x4_t vtrn2q_f32(float32x4_t a, float32x4_t b)
3124930cef6SMatthias Ringwald {
3134930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[1], b.e[1], a.e[3], b.e[3] } };
3144930cef6SMatthias Ringwald }
3154930cef6SMatthias Ringwald 
3164930cef6SMatthias Ringwald __attribute__((unused))
vzip1q_f32(float32x4_t a,float32x4_t b)3174930cef6SMatthias Ringwald static float32x4_t vzip1q_f32(float32x4_t a, float32x4_t b)
3184930cef6SMatthias Ringwald {
3194930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[0], b.e[0], a.e[1], b.e[1] } };
3204930cef6SMatthias Ringwald }
3214930cef6SMatthias Ringwald 
3224930cef6SMatthias Ringwald __attribute__((unused))
vzip2q_f32(float32x4_t a,float32x4_t b)3234930cef6SMatthias Ringwald static float32x4_t vzip2q_f32(float32x4_t a, float32x4_t b)
3244930cef6SMatthias Ringwald {
3254930cef6SMatthias Ringwald     return (float32x4_t){ { a.e[2], b.e[2], a.e[3], b.e[3] } };
3264930cef6SMatthias Ringwald }
3274930cef6SMatthias Ringwald 
3284930cef6SMatthias Ringwald 
3294930cef6SMatthias Ringwald #endif /* __ARM_NEON */
330