1*3f1979aaSAndroid Build Coastguard Worker 2*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2013 Julien Pommier ( [email protected] ) 3*3f1979aaSAndroid Build Coastguard Worker Copyright (c) 2020 Hayati Ayguen ( [email protected] ) 4*3f1979aaSAndroid Build Coastguard Worker 5*3f1979aaSAndroid Build Coastguard Worker Redistribution and use of the Software in source and binary forms, 6*3f1979aaSAndroid Build Coastguard Worker with or without modification, is permitted provided that the 7*3f1979aaSAndroid Build Coastguard Worker following conditions are met: 8*3f1979aaSAndroid Build Coastguard Worker 9*3f1979aaSAndroid Build Coastguard Worker - Neither the names of NCAR's Computational and Information Systems 10*3f1979aaSAndroid Build Coastguard Worker Laboratory, the University Corporation for Atmospheric Research, 11*3f1979aaSAndroid Build Coastguard Worker nor the names of its sponsors or contributors may be used to 12*3f1979aaSAndroid Build Coastguard Worker endorse or promote products derived from this Software without 13*3f1979aaSAndroid Build Coastguard Worker specific prior written permission. 14*3f1979aaSAndroid Build Coastguard Worker 15*3f1979aaSAndroid Build Coastguard Worker - Redistributions of source code must retain the above copyright 16*3f1979aaSAndroid Build Coastguard Worker notices, this list of conditions, and the disclaimer below. 17*3f1979aaSAndroid Build Coastguard Worker 18*3f1979aaSAndroid Build Coastguard Worker - Redistributions in binary form must reproduce the above copyright 19*3f1979aaSAndroid Build Coastguard Worker notice, this list of conditions, and the disclaimer below in the 20*3f1979aaSAndroid Build Coastguard Worker documentation and/or other materials provided with the 21*3f1979aaSAndroid Build Coastguard Worker distribution. 22*3f1979aaSAndroid Build Coastguard Worker 23*3f1979aaSAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24*3f1979aaSAndroid Build Coastguard Worker EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 25*3f1979aaSAndroid Build Coastguard Worker MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26*3f1979aaSAndroid Build Coastguard Worker NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 27*3f1979aaSAndroid Build Coastguard Worker HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 28*3f1979aaSAndroid Build Coastguard Worker EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29*3f1979aaSAndroid Build Coastguard Worker ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30*3f1979aaSAndroid Build Coastguard Worker CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 31*3f1979aaSAndroid Build Coastguard Worker SOFTWARE. 32*3f1979aaSAndroid Build Coastguard Worker */ 33*3f1979aaSAndroid Build Coastguard Worker 34*3f1979aaSAndroid Build Coastguard Worker #ifndef PF_SCAL_FLT_H 35*3f1979aaSAndroid Build Coastguard Worker #define PF_SCAL_FLT_H 36*3f1979aaSAndroid Build Coastguard Worker 37*3f1979aaSAndroid Build Coastguard Worker /* 38*3f1979aaSAndroid Build Coastguard Worker fallback mode(s) for situations where SSE/AVX/NEON/Altivec are not available, use scalar mode instead 39*3f1979aaSAndroid Build Coastguard Worker */ 40*3f1979aaSAndroid Build Coastguard Worker 41*3f1979aaSAndroid Build Coastguard Worker #if !defined(SIMD_SZ) && defined(PFFFT_SCALVEC_ENABLED) 42*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": float SCALAR4 macros are defined" ) 43*3f1979aaSAndroid Build Coastguard Worker 44*3f1979aaSAndroid Build Coastguard Worker typedef struct { 45*3f1979aaSAndroid Build Coastguard Worker vsfscalar a; 46*3f1979aaSAndroid Build Coastguard Worker vsfscalar b; 47*3f1979aaSAndroid Build Coastguard Worker vsfscalar c; 48*3f1979aaSAndroid Build Coastguard Worker vsfscalar d; 49*3f1979aaSAndroid Build Coastguard Worker } v4sf; 50*3f1979aaSAndroid Build Coastguard Worker 51*3f1979aaSAndroid Build Coastguard Worker # define SIMD_SZ 4 52*3f1979aaSAndroid Build Coastguard Worker 53*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union { 54*3f1979aaSAndroid Build Coastguard Worker v4sf v; 55*3f1979aaSAndroid Build Coastguard Worker vsfscalar f[SIMD_SZ]; 56*3f1979aaSAndroid Build Coastguard Worker } v4sf_union; 57*3f1979aaSAndroid Build Coastguard Worker 58*3f1979aaSAndroid Build Coastguard Worker # define VARCH "4xScalar" 59*3f1979aaSAndroid Build Coastguard Worker # define VREQUIRES_ALIGN 0 60*3f1979aaSAndroid Build Coastguard Worker VZERO()61*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VZERO() { 62*3f1979aaSAndroid Build Coastguard Worker v4sf r = { 0.f, 0.f, 0.f, 0.f }; 63*3f1979aaSAndroid Build Coastguard Worker return r; 64*3f1979aaSAndroid Build Coastguard Worker } 65*3f1979aaSAndroid Build Coastguard Worker VMUL(v4sf A,v4sf B)66*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VMUL(v4sf A, v4sf B) { 67*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.a * B.a, A.b * B.b, A.c * B.c, A.d * B.d }; 68*3f1979aaSAndroid Build Coastguard Worker return r; 69*3f1979aaSAndroid Build Coastguard Worker } 70*3f1979aaSAndroid Build Coastguard Worker VADD(v4sf A,v4sf B)71*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VADD(v4sf A, v4sf B) { 72*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.a + B.a, A.b + B.b, A.c + B.c, A.d + B.d }; 73*3f1979aaSAndroid Build Coastguard Worker return r; 74*3f1979aaSAndroid Build Coastguard Worker } 75*3f1979aaSAndroid Build Coastguard Worker VMADD(v4sf A,v4sf B,v4sf C)76*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VMADD(v4sf A, v4sf B, v4sf C) { 77*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.a * B.a + C.a, A.b * B.b + C.b, A.c * B.c + C.c, A.d * B.d + C.d }; 78*3f1979aaSAndroid Build Coastguard Worker return r; 79*3f1979aaSAndroid Build Coastguard Worker } 80*3f1979aaSAndroid Build Coastguard Worker VSUB(v4sf A,v4sf B)81*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VSUB(v4sf A, v4sf B) { 82*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.a - B.a, A.b - B.b, A.c - B.c, A.d - B.d }; 83*3f1979aaSAndroid Build Coastguard Worker return r; 84*3f1979aaSAndroid Build Coastguard Worker } 85*3f1979aaSAndroid Build Coastguard Worker LD_PS1(vsfscalar v)86*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) LD_PS1(vsfscalar v) { 87*3f1979aaSAndroid Build Coastguard Worker v4sf r = { v, v, v, v }; 88*3f1979aaSAndroid Build Coastguard Worker return r; 89*3f1979aaSAndroid Build Coastguard Worker } 90*3f1979aaSAndroid Build Coastguard Worker 91*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_UNALIGNED(ptr) (*((v4sf*)(ptr))) 92*3f1979aaSAndroid Build Coastguard Worker 93*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_ALIGNED(ptr) (*((v4sf*)(ptr))) 94*3f1979aaSAndroid Build Coastguard Worker 95*3f1979aaSAndroid Build Coastguard Worker # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(v4sf)-1) ) == 0) 96*3f1979aaSAndroid Build Coastguard Worker 97*3f1979aaSAndroid Build Coastguard Worker 98*3f1979aaSAndroid Build Coastguard Worker /* INTERLEAVE2() */ 99*3f1979aaSAndroid Build Coastguard Worker #define INTERLEAVE2( A, B, C, D) \ 100*3f1979aaSAndroid Build Coastguard Worker do { \ 101*3f1979aaSAndroid Build Coastguard Worker v4sf Cr = { A.a, B.a, A.b, B.b }; \ 102*3f1979aaSAndroid Build Coastguard Worker v4sf Dr = { A.c, B.c, A.d, B.d }; \ 103*3f1979aaSAndroid Build Coastguard Worker C = Cr; \ 104*3f1979aaSAndroid Build Coastguard Worker D = Dr; \ 105*3f1979aaSAndroid Build Coastguard Worker } while (0) 106*3f1979aaSAndroid Build Coastguard Worker 107*3f1979aaSAndroid Build Coastguard Worker 108*3f1979aaSAndroid Build Coastguard Worker /* UNINTERLEAVE2() */ 109*3f1979aaSAndroid Build Coastguard Worker #define UNINTERLEAVE2(A, B, C, D) \ 110*3f1979aaSAndroid Build Coastguard Worker do { \ 111*3f1979aaSAndroid Build Coastguard Worker v4sf Cr = { A.a, A.c, B.a, B.c }; \ 112*3f1979aaSAndroid Build Coastguard Worker v4sf Dr = { A.b, A.d, B.b, B.d }; \ 113*3f1979aaSAndroid Build Coastguard Worker C = Cr; \ 114*3f1979aaSAndroid Build Coastguard Worker D = Dr; \ 115*3f1979aaSAndroid Build Coastguard Worker } while (0) 116*3f1979aaSAndroid Build Coastguard Worker 117*3f1979aaSAndroid Build Coastguard Worker 118*3f1979aaSAndroid Build Coastguard Worker /* VTRANSPOSE4() */ 119*3f1979aaSAndroid Build Coastguard Worker #define VTRANSPOSE4(A, B, C, D) \ 120*3f1979aaSAndroid Build Coastguard Worker do { \ 121*3f1979aaSAndroid Build Coastguard Worker v4sf Ar = { A.a, B.a, C.a, D.a }; \ 122*3f1979aaSAndroid Build Coastguard Worker v4sf Br = { A.b, B.b, C.b, D.b }; \ 123*3f1979aaSAndroid Build Coastguard Worker v4sf Cr = { A.c, B.c, C.c, D.c }; \ 124*3f1979aaSAndroid Build Coastguard Worker v4sf Dr = { A.d, B.d, C.d, D.d }; \ 125*3f1979aaSAndroid Build Coastguard Worker A = Ar; \ 126*3f1979aaSAndroid Build Coastguard Worker B = Br; \ 127*3f1979aaSAndroid Build Coastguard Worker C = Cr; \ 128*3f1979aaSAndroid Build Coastguard Worker D = Dr; \ 129*3f1979aaSAndroid Build Coastguard Worker } while (0) 130*3f1979aaSAndroid Build Coastguard Worker 131*3f1979aaSAndroid Build Coastguard Worker 132*3f1979aaSAndroid Build Coastguard Worker /* VSWAPHL() */ VSWAPHL(v4sf A,v4sf B)133*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VSWAPHL(v4sf A, v4sf B) { 134*3f1979aaSAndroid Build Coastguard Worker v4sf r = { B.a, B.b, A.c, A.d }; 135*3f1979aaSAndroid Build Coastguard Worker return r; 136*3f1979aaSAndroid Build Coastguard Worker } 137*3f1979aaSAndroid Build Coastguard Worker 138*3f1979aaSAndroid Build Coastguard Worker 139*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip all floats */ VREV_S(v4sf A)140*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VREV_S(v4sf A) { 141*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.d, A.c, A.b, A.a }; 142*3f1979aaSAndroid Build Coastguard Worker return r; 143*3f1979aaSAndroid Build Coastguard Worker } 144*3f1979aaSAndroid Build Coastguard Worker 145*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip complex floats */ VREV_C(v4sf A)146*3f1979aaSAndroid Build Coastguard Worker static ALWAYS_INLINE(v4sf) VREV_C(v4sf A) { 147*3f1979aaSAndroid Build Coastguard Worker v4sf r = { A.c, A.d, A.a, A.b }; 148*3f1979aaSAndroid Build Coastguard Worker return r; 149*3f1979aaSAndroid Build Coastguard Worker } 150*3f1979aaSAndroid Build Coastguard Worker 151*3f1979aaSAndroid Build Coastguard Worker #else 152*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": float SCALAR4 macros are not defined" ) */ 153*3f1979aaSAndroid Build Coastguard Worker #endif 154*3f1979aaSAndroid Build Coastguard Worker 155*3f1979aaSAndroid Build Coastguard Worker 156*3f1979aaSAndroid Build Coastguard Worker #if !defined(SIMD_SZ) 157*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": float SCALAR1 macros are defined" ) 158*3f1979aaSAndroid Build Coastguard Worker typedef vsfscalar v4sf; 159*3f1979aaSAndroid Build Coastguard Worker 160*3f1979aaSAndroid Build Coastguard Worker # define SIMD_SZ 1 161*3f1979aaSAndroid Build Coastguard Worker 162*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union { 163*3f1979aaSAndroid Build Coastguard Worker v4sf v; 164*3f1979aaSAndroid Build Coastguard Worker vsfscalar f[SIMD_SZ]; 165*3f1979aaSAndroid Build Coastguard Worker } v4sf_union; 166*3f1979aaSAndroid Build Coastguard Worker 167*3f1979aaSAndroid Build Coastguard Worker # define VARCH "Scalar" 168*3f1979aaSAndroid Build Coastguard Worker # define VREQUIRES_ALIGN 0 169*3f1979aaSAndroid Build Coastguard Worker # define VZERO() 0.f 170*3f1979aaSAndroid Build Coastguard Worker # define VMUL(a,b) ((a)*(b)) 171*3f1979aaSAndroid Build Coastguard Worker # define VADD(a,b) ((a)+(b)) 172*3f1979aaSAndroid Build Coastguard Worker # define VMADD(a,b,c) ((a)*(b)+(c)) 173*3f1979aaSAndroid Build Coastguard Worker # define VSUB(a,b) ((a)-(b)) 174*3f1979aaSAndroid Build Coastguard Worker # define LD_PS1(p) (p) 175*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_UNALIGNED(ptr) (*(ptr)) 176*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_ALIGNED(ptr) (*(ptr)) 177*3f1979aaSAndroid Build Coastguard Worker # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & (sizeof(vsfscalar)-1) ) == 0) 178*3f1979aaSAndroid Build Coastguard Worker 179*3f1979aaSAndroid Build Coastguard Worker #else 180*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": float SCALAR1 macros are not defined" ) */ 181*3f1979aaSAndroid Build Coastguard Worker #endif 182*3f1979aaSAndroid Build Coastguard Worker 183*3f1979aaSAndroid Build Coastguard Worker 184*3f1979aaSAndroid Build Coastguard Worker #endif /* PF_SCAL_FLT_H */ 185*3f1979aaSAndroid Build Coastguard Worker 186