1*3f1979aaSAndroid Build Coastguard Worker 2*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2013 Julien Pommier ( [email protected] ) 3*3f1979aaSAndroid Build Coastguard Worker 4*3f1979aaSAndroid Build Coastguard Worker Redistribution and use of the Software in source and binary forms, 5*3f1979aaSAndroid Build Coastguard Worker with or without modification, is permitted provided that the 6*3f1979aaSAndroid Build Coastguard Worker following conditions are met: 7*3f1979aaSAndroid Build Coastguard Worker 8*3f1979aaSAndroid Build Coastguard Worker - Neither the names of NCAR's Computational and Information Systems 9*3f1979aaSAndroid Build Coastguard Worker Laboratory, the University Corporation for Atmospheric Research, 10*3f1979aaSAndroid Build Coastguard Worker nor the names of its sponsors or contributors may be used to 11*3f1979aaSAndroid Build Coastguard Worker endorse or promote products derived from this Software without 12*3f1979aaSAndroid Build Coastguard Worker specific prior written permission. 13*3f1979aaSAndroid Build Coastguard Worker 14*3f1979aaSAndroid Build Coastguard Worker - Redistributions of source code must retain the above copyright 15*3f1979aaSAndroid Build Coastguard Worker notices, this list of conditions, and the disclaimer below. 16*3f1979aaSAndroid Build Coastguard Worker 17*3f1979aaSAndroid Build Coastguard Worker - Redistributions in binary form must reproduce the above copyright 18*3f1979aaSAndroid Build Coastguard Worker notice, this list of conditions, and the disclaimer below in the 19*3f1979aaSAndroid Build Coastguard Worker documentation and/or other materials provided with the 20*3f1979aaSAndroid Build Coastguard Worker distribution. 21*3f1979aaSAndroid Build Coastguard Worker 22*3f1979aaSAndroid Build Coastguard Worker THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 23*3f1979aaSAndroid Build Coastguard Worker EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 24*3f1979aaSAndroid Build Coastguard Worker MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 25*3f1979aaSAndroid Build Coastguard Worker NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 26*3f1979aaSAndroid Build Coastguard Worker HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 27*3f1979aaSAndroid Build Coastguard Worker EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28*3f1979aaSAndroid Build Coastguard Worker ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29*3f1979aaSAndroid Build Coastguard Worker CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 30*3f1979aaSAndroid Build Coastguard Worker SOFTWARE. 31*3f1979aaSAndroid Build Coastguard Worker */ 32*3f1979aaSAndroid Build Coastguard Worker 33*3f1979aaSAndroid Build Coastguard Worker #ifndef PF_SSE1_FLT_H 34*3f1979aaSAndroid Build Coastguard Worker #define PF_SSE1_FLT_H 35*3f1979aaSAndroid Build Coastguard Worker 36*3f1979aaSAndroid Build Coastguard Worker /* 37*3f1979aaSAndroid Build Coastguard Worker SSE1 support macros 38*3f1979aaSAndroid Build Coastguard Worker */ 39*3f1979aaSAndroid Build Coastguard Worker #if !defined(SIMD_SZ) && !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86)) 40*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": SSE1 float macros are defined" ) 41*3f1979aaSAndroid Build Coastguard Worker 42*3f1979aaSAndroid Build Coastguard Worker #include <xmmintrin.h> 43*3f1979aaSAndroid Build Coastguard Worker typedef __m128 v4sf; 44*3f1979aaSAndroid Build Coastguard Worker 45*3f1979aaSAndroid Build Coastguard Worker /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions 46*3f1979aaSAndroid Build Coastguard Worker * anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */ 47*3f1979aaSAndroid Build Coastguard Worker # define SIMD_SZ 4 48*3f1979aaSAndroid Build Coastguard Worker 49*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union { 50*3f1979aaSAndroid Build Coastguard Worker v4sf v; 51*3f1979aaSAndroid Build Coastguard Worker float f[SIMD_SZ]; 52*3f1979aaSAndroid Build Coastguard Worker } v4sf_union; 53*3f1979aaSAndroid Build Coastguard Worker 54*3f1979aaSAndroid Build Coastguard Worker # define VARCH "SSE1" 55*3f1979aaSAndroid Build Coastguard Worker # define VREQUIRES_ALIGN 1 56*3f1979aaSAndroid Build Coastguard Worker # define VZERO() _mm_setzero_ps() 57*3f1979aaSAndroid Build Coastguard Worker # define VMUL(a,b) _mm_mul_ps(a,b) 58*3f1979aaSAndroid Build Coastguard Worker # define VADD(a,b) _mm_add_ps(a,b) 59*3f1979aaSAndroid Build Coastguard Worker # define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c) 60*3f1979aaSAndroid Build Coastguard Worker # define VSUB(a,b) _mm_sub_ps(a,b) 61*3f1979aaSAndroid Build Coastguard Worker # define LD_PS1(p) _mm_set1_ps(p) 62*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_UNALIGNED(ptr) _mm_loadu_ps(ptr) 63*3f1979aaSAndroid Build Coastguard Worker # define VLOAD_ALIGNED(ptr) _mm_load_ps(ptr) 64*3f1979aaSAndroid Build Coastguard Worker 65*3f1979aaSAndroid Build Coastguard Worker # define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; } 66*3f1979aaSAndroid Build Coastguard Worker # define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; } 67*3f1979aaSAndroid Build Coastguard Worker # define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3) 68*3f1979aaSAndroid Build Coastguard Worker # define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0)) 69*3f1979aaSAndroid Build Coastguard Worker 70*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip all floats */ 71*3f1979aaSAndroid Build Coastguard Worker # define VREV_S(a) _mm_shuffle_ps(a, a, _MM_SHUFFLE(0,1,2,3)) 72*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip complex floats */ 73*3f1979aaSAndroid Build Coastguard Worker # define VREV_C(a) _mm_shuffle_ps(a, a, _MM_SHUFFLE(1,0,3,2)) 74*3f1979aaSAndroid Build Coastguard Worker 75*3f1979aaSAndroid Build Coastguard Worker # define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0) 76*3f1979aaSAndroid Build Coastguard Worker 77*3f1979aaSAndroid Build Coastguard Worker #else 78*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": SSE1 float macros are not defined" ) */ 79*3f1979aaSAndroid Build Coastguard Worker #endif 80*3f1979aaSAndroid Build Coastguard Worker 81*3f1979aaSAndroid Build Coastguard Worker #endif /* PF_SSE1_FLT_H */ 82*3f1979aaSAndroid Build Coastguard Worker 83