xref: /aosp_15_r20/external/pffft/simd/pf_neon_float.h (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1*3f1979aaSAndroid Build Coastguard Worker 
2*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2013  Julien Pommier ( [email protected] )
3*3f1979aaSAndroid Build Coastguard Worker 
4*3f1979aaSAndroid Build Coastguard Worker    Redistribution and use of the Software in source and binary forms,
5*3f1979aaSAndroid Build Coastguard Worker    with or without modification, is permitted provided that the
6*3f1979aaSAndroid Build Coastguard Worker    following conditions are met:
7*3f1979aaSAndroid Build Coastguard Worker 
8*3f1979aaSAndroid Build Coastguard Worker    - Neither the names of NCAR's Computational and Information Systems
9*3f1979aaSAndroid Build Coastguard Worker    Laboratory, the University Corporation for Atmospheric Research,
10*3f1979aaSAndroid Build Coastguard Worker    nor the names of its sponsors or contributors may be used to
11*3f1979aaSAndroid Build Coastguard Worker    endorse or promote products derived from this Software without
12*3f1979aaSAndroid Build Coastguard Worker    specific prior written permission.
13*3f1979aaSAndroid Build Coastguard Worker 
14*3f1979aaSAndroid Build Coastguard Worker    - Redistributions of source code must retain the above copyright
15*3f1979aaSAndroid Build Coastguard Worker    notices, this list of conditions, and the disclaimer below.
16*3f1979aaSAndroid Build Coastguard Worker 
17*3f1979aaSAndroid Build Coastguard Worker    - Redistributions in binary form must reproduce the above copyright
18*3f1979aaSAndroid Build Coastguard Worker    notice, this list of conditions, and the disclaimer below in the
19*3f1979aaSAndroid Build Coastguard Worker    documentation and/or other materials provided with the
20*3f1979aaSAndroid Build Coastguard Worker    distribution.
21*3f1979aaSAndroid Build Coastguard Worker 
22*3f1979aaSAndroid Build Coastguard Worker    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23*3f1979aaSAndroid Build Coastguard Worker    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
24*3f1979aaSAndroid Build Coastguard Worker    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25*3f1979aaSAndroid Build Coastguard Worker    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
26*3f1979aaSAndroid Build Coastguard Worker    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
27*3f1979aaSAndroid Build Coastguard Worker    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28*3f1979aaSAndroid Build Coastguard Worker    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29*3f1979aaSAndroid Build Coastguard Worker    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
30*3f1979aaSAndroid Build Coastguard Worker    SOFTWARE.
31*3f1979aaSAndroid Build Coastguard Worker */
32*3f1979aaSAndroid Build Coastguard Worker 
33*3f1979aaSAndroid Build Coastguard Worker #ifndef PF_NEON_FLT_H
34*3f1979aaSAndroid Build Coastguard Worker #define PF_NEON_FLT_H
35*3f1979aaSAndroid Build Coastguard Worker 
36*3f1979aaSAndroid Build Coastguard Worker /*
37*3f1979aaSAndroid Build Coastguard Worker   ARM NEON support macros
38*3f1979aaSAndroid Build Coastguard Worker */
39*3f1979aaSAndroid Build Coastguard Worker #if !defined(PFFFT_SIMD_DISABLE) && defined(PFFFT_ENABLE_NEON) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64__))
40*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": ARM NEON macros are defined" )
41*3f1979aaSAndroid Build Coastguard Worker 
42*3f1979aaSAndroid Build Coastguard Worker #  include <arm_neon.h>
43*3f1979aaSAndroid Build Coastguard Worker typedef float32x4_t v4sf;
44*3f1979aaSAndroid Build Coastguard Worker 
45*3f1979aaSAndroid Build Coastguard Worker #  define SIMD_SZ 4
46*3f1979aaSAndroid Build Coastguard Worker 
47*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union {
48*3f1979aaSAndroid Build Coastguard Worker   v4sf  v;
49*3f1979aaSAndroid Build Coastguard Worker   float f[SIMD_SZ];
50*3f1979aaSAndroid Build Coastguard Worker } v4sf_union;
51*3f1979aaSAndroid Build Coastguard Worker 
52*3f1979aaSAndroid Build Coastguard Worker #  define VARCH "NEON"
53*3f1979aaSAndroid Build Coastguard Worker #  define VREQUIRES_ALIGN 0  /* usually no alignment required */
54*3f1979aaSAndroid Build Coastguard Worker #  define VZERO() vdupq_n_f32(0)
55*3f1979aaSAndroid Build Coastguard Worker #  define VMUL(a,b) vmulq_f32(a,b)
56*3f1979aaSAndroid Build Coastguard Worker #  define VADD(a,b) vaddq_f32(a,b)
57*3f1979aaSAndroid Build Coastguard Worker #  define VMADD(a,b,c) vmlaq_f32(c,a,b)
58*3f1979aaSAndroid Build Coastguard Worker #  define VSUB(a,b) vsubq_f32(a,b)
59*3f1979aaSAndroid Build Coastguard Worker #  define LD_PS1(p) vld1q_dup_f32(&(p))
60*3f1979aaSAndroid Build Coastguard Worker #  define VLOAD_UNALIGNED(ptr)  (*((v4sf*)(ptr)))
61*3f1979aaSAndroid Build Coastguard Worker #  define VLOAD_ALIGNED(ptr)    (*((v4sf*)(ptr)))
62*3f1979aaSAndroid Build Coastguard Worker #  define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
63*3f1979aaSAndroid Build Coastguard Worker #  define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; }
64*3f1979aaSAndroid Build Coastguard Worker #  define VTRANSPOSE4(x0,x1,x2,x3) {                                    \
65*3f1979aaSAndroid Build Coastguard Worker     float32x4x2_t t0_ = vzipq_f32(x0, x2);                              \
66*3f1979aaSAndroid Build Coastguard Worker     float32x4x2_t t1_ = vzipq_f32(x1, x3);                              \
67*3f1979aaSAndroid Build Coastguard Worker     float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]);              \
68*3f1979aaSAndroid Build Coastguard Worker     float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]);              \
69*3f1979aaSAndroid Build Coastguard Worker     x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \
70*3f1979aaSAndroid Build Coastguard Worker   }
71*3f1979aaSAndroid Build Coastguard Worker // marginally faster version
72*3f1979aaSAndroid Build Coastguard Worker //#  define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); }
73*3f1979aaSAndroid Build Coastguard Worker #  define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a))
74*3f1979aaSAndroid Build Coastguard Worker 
75*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip all floats */
76*3f1979aaSAndroid Build Coastguard Worker #  define VREV_S(a)    vcombine_f32(vrev64_f32(vget_high_f32(a)), vrev64_f32(vget_low_f32(a)))
77*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip complex floats */
78*3f1979aaSAndroid Build Coastguard Worker #  define VREV_C(a)    vextq_f32(a, a, 2)
79*3f1979aaSAndroid Build Coastguard Worker 
80*3f1979aaSAndroid Build Coastguard Worker #  define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0)
81*3f1979aaSAndroid Build Coastguard Worker 
82*3f1979aaSAndroid Build Coastguard Worker #else
83*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": ARM NEON macros are not defined" ) */
84*3f1979aaSAndroid Build Coastguard Worker #endif
85*3f1979aaSAndroid Build Coastguard Worker 
86*3f1979aaSAndroid Build Coastguard Worker #endif /* PF_NEON_FLT_H */
87*3f1979aaSAndroid Build Coastguard Worker 
88