1*3f1979aaSAndroid Build Coastguard Worker 2*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2013 Julien Pommier ( [email protected] ) 3*3f1979aaSAndroid Build Coastguard Worker 4*3f1979aaSAndroid Build Coastguard Worker Redistribution and use of the Software in source and binary forms, 5*3f1979aaSAndroid Build Coastguard Worker with or without modification, is permitted provided that the 6*3f1979aaSAndroid Build Coastguard Worker following conditions are met: 7*3f1979aaSAndroid Build Coastguard Worker 8*3f1979aaSAndroid Build Coastguard Worker - Neither the names of NCAR's Computational and Information Systems 9*3f1979aaSAndroid Build Coastguard Worker Laboratory, the University Corporation for Atmospheric Research, 10*3f1979aaSAndroid Build Coastguard Worker nor the names of its sponsors or contributors may be used to 11*3f1979aaSAndroid Build Coastguard Worker endorse or promote products derived from this Software without 12*3f1979aaSAndroid Build Coastguard Worker specific prior written permission. 13*3f1979aaSAndroid Build Coastguard Worker 14*3f1979aaSAndroid Build Coastguard Worker - Redistributions of source code must retain the above copyright 15*3f1979aaSAndroid Build Coastguard Worker notices, this list of conditions, and the disclaimer below. 16*3f1979aaSAndroid Build Coastguard Worker 17*3f1979aaSAndroid Build Coastguard Worker - Redistributions in binary form must reproduce the above copyright 18*3f1979aaSAndroid Build Coastguard Worker notice, this list of conditions, and the disclaimer below in the 19*3f1979aaSAndroid Build Coastguard Worker documentation and/or other materials provided with the 20*3f1979aaSAndroid Build Coastguard Worker distribution. 21*3f1979aaSAndroid Build Coastguard Worker 22*3f1979aaSAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23*3f1979aaSAndroid Build Coastguard Worker EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 24*3f1979aaSAndroid Build Coastguard Worker MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25*3f1979aaSAndroid Build Coastguard Worker NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 26*3f1979aaSAndroid Build Coastguard Worker HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 27*3f1979aaSAndroid Build Coastguard Worker EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28*3f1979aaSAndroid Build Coastguard Worker ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29*3f1979aaSAndroid Build Coastguard Worker CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 30*3f1979aaSAndroid Build Coastguard Worker SOFTWARE. 31*3f1979aaSAndroid Build Coastguard Worker */ 32*3f1979aaSAndroid Build Coastguard Worker 33*3f1979aaSAndroid Build Coastguard Worker #ifndef PF_NEON_FLT_H 34*3f1979aaSAndroid Build Coastguard Worker #define PF_NEON_FLT_H 35*3f1979aaSAndroid Build Coastguard Worker 36*3f1979aaSAndroid Build Coastguard Worker /* 37*3f1979aaSAndroid Build Coastguard Worker ARM NEON support macros 38*3f1979aaSAndroid Build Coastguard Worker */ 39*3f1979aaSAndroid Build Coastguard Worker #if !defined(PFFFT_SIMD_DISABLE) && defined(PFFFT_ENABLE_NEON) && (defined(__arm__) || defined(__aarch64__) || defined(__arm64__)) 40*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": ARM NEON macros are defined" ) 41*3f1979aaSAndroid Build Coastguard Worker 42*3f1979aaSAndroid Build Coastguard Worker # include <arm_neon.h> 43*3f1979aaSAndroid Build Coastguard Worker typedef float32x4_t v4sf; 44*3f1979aaSAndroid Build Coastguard Worker 45*3f1979aaSAndroid Build Coastguard Worker # define SIMD_SZ 4 46*3f1979aaSAndroid Build Coastguard Worker 47*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union { 48*3f1979aaSAndroid Build Coastguard Worker v4sf v; 49*3f1979aaSAndroid Build Coastguard Worker float f[SIMD_SZ]; 50*3f1979aaSAndroid Build Coastguard Worker } v4sf_union; 51*3f1979aaSAndroid Build Coastguard Worker 52*3f1979aaSAndroid Build Coastguard Worker # define VARCH "NEON" 53*3f1979aaSAndroid Build Coastguard Worker # define VREQUIRES_ALIGN 0 /* usually no alignment required */ 54*3f1979aaSAndroid Build Coastguard Worker # define VZERO() vdupq_n_f32(0) 55*3f1979aaSAndroid Build Coastguard Worker # define VMUL(a,b) vmulq_f32(a,b) 56*3f1979aaSAndroid Build Coastguard Worker # define VADD(a,b) vaddq_f32(a,b) 57*3f1979aaSAndroid Build Coastguard Worker # define VMADD(a,b,c) vmlaq_f32(c,a,b) 58*3f1979aaSAndroid Build Coastguard Worker # define VSUB(a,b) vsubq_f32(a,b) 59*3f1979aaSAndroid Build Coastguard Worker # define LD_PS1(p) vld1q_dup_f32(&(p)) 60*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_UNALIGNED(ptr) (*((v4sf*)(ptr))) 61*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_ALIGNED(ptr) (*((v4sf*)(ptr))) 62*3f1979aaSAndroid Build Coastguard Worker # define INTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vzipq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } 63*3f1979aaSAndroid Build Coastguard Worker # define UNINTERLEAVE2(in1, in2, out1, out2) { float32x4x2_t tmp__ = vuzpq_f32(in1,in2); out1=tmp__.val[0]; out2=tmp__.val[1]; } 64*3f1979aaSAndroid Build Coastguard Worker # define VTRANSPOSE4(x0,x1,x2,x3) { \ 65*3f1979aaSAndroid Build Coastguard Worker float32x4x2_t t0_ = vzipq_f32(x0, x2); \ 66*3f1979aaSAndroid Build Coastguard Worker float32x4x2_t t1_ = vzipq_f32(x1, x3); \ 67*3f1979aaSAndroid Build Coastguard Worker float32x4x2_t u0_ = vzipq_f32(t0_.val[0], t1_.val[0]); \ 68*3f1979aaSAndroid Build Coastguard Worker float32x4x2_t u1_ = vzipq_f32(t0_.val[1], t1_.val[1]); \ 69*3f1979aaSAndroid Build Coastguard Worker x0 = u0_.val[0]; x1 = u0_.val[1]; x2 = u1_.val[0]; x3 = u1_.val[1]; \ 70*3f1979aaSAndroid Build Coastguard Worker } 71*3f1979aaSAndroid Build Coastguard Worker // marginally faster version 72*3f1979aaSAndroid Build Coastguard Worker //# define VTRANSPOSE4(x0,x1,x2,x3) { asm("vtrn.32 %q0, %q1;\n vtrn.32 %q2,%q3\n vswp %f0,%e2\n vswp %f1,%e3" : "+w"(x0), "+w"(x1), "+w"(x2), "+w"(x3)::); } 73*3f1979aaSAndroid Build Coastguard Worker # define VSWAPHL(a,b) vcombine_f32(vget_low_f32(b), vget_high_f32(a)) 74*3f1979aaSAndroid Build Coastguard Worker 75*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip all floats */ 76*3f1979aaSAndroid Build Coastguard Worker # define VREV_S(a) vcombine_f32(vrev64_f32(vget_high_f32(a)), vrev64_f32(vget_low_f32(a))) 77*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip complex floats */ 78*3f1979aaSAndroid Build Coastguard Worker # define VREV_C(a) vextq_f32(a, a, 2) 79*3f1979aaSAndroid Build Coastguard Worker 80*3f1979aaSAndroid Build Coastguard Worker # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0x3) == 0) 81*3f1979aaSAndroid Build Coastguard Worker 82*3f1979aaSAndroid Build Coastguard Worker #else 83*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": ARM NEON macros are not defined" ) */ 84*3f1979aaSAndroid Build Coastguard Worker #endif 85*3f1979aaSAndroid Build Coastguard Worker 86*3f1979aaSAndroid Build Coastguard Worker #endif /* PF_NEON_FLT_H */ 87*3f1979aaSAndroid Build Coastguard Worker 88