xref: /aosp_15_r20/external/pffft/simd/pf_sse1_float.h (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1*3f1979aaSAndroid Build Coastguard Worker 
2*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2013  Julien Pommier ( [email protected] )
3*3f1979aaSAndroid Build Coastguard Worker 
4*3f1979aaSAndroid Build Coastguard Worker    Redistribution and use of the Software in source and binary forms,
5*3f1979aaSAndroid Build Coastguard Worker    with or without modification, is permitted provided that the
6*3f1979aaSAndroid Build Coastguard Worker    following conditions are met:
7*3f1979aaSAndroid Build Coastguard Worker 
8*3f1979aaSAndroid Build Coastguard Worker    - Neither the names of NCAR's Computational and Information Systems
9*3f1979aaSAndroid Build Coastguard Worker    Laboratory, the University Corporation for Atmospheric Research,
10*3f1979aaSAndroid Build Coastguard Worker    nor the names of its sponsors or contributors may be used to
11*3f1979aaSAndroid Build Coastguard Worker    endorse or promote products derived from this Software without
12*3f1979aaSAndroid Build Coastguard Worker    specific prior written permission.
13*3f1979aaSAndroid Build Coastguard Worker 
14*3f1979aaSAndroid Build Coastguard Worker    - Redistributions of source code must retain the above copyright
15*3f1979aaSAndroid Build Coastguard Worker    notices, this list of conditions, and the disclaimer below.
16*3f1979aaSAndroid Build Coastguard Worker 
17*3f1979aaSAndroid Build Coastguard Worker    - Redistributions in binary form must reproduce the above copyright
18*3f1979aaSAndroid Build Coastguard Worker    notice, this list of conditions, and the disclaimer below in the
19*3f1979aaSAndroid Build Coastguard Worker    documentation and/or other materials provided with the
20*3f1979aaSAndroid Build Coastguard Worker    distribution.
21*3f1979aaSAndroid Build Coastguard Worker 
22*3f1979aaSAndroid Build Coastguard Worker    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23*3f1979aaSAndroid Build Coastguard Worker    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
24*3f1979aaSAndroid Build Coastguard Worker    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25*3f1979aaSAndroid Build Coastguard Worker    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
26*3f1979aaSAndroid Build Coastguard Worker    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
27*3f1979aaSAndroid Build Coastguard Worker    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28*3f1979aaSAndroid Build Coastguard Worker    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29*3f1979aaSAndroid Build Coastguard Worker    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
30*3f1979aaSAndroid Build Coastguard Worker    SOFTWARE.
31*3f1979aaSAndroid Build Coastguard Worker */
32*3f1979aaSAndroid Build Coastguard Worker 
33*3f1979aaSAndroid Build Coastguard Worker #ifndef PF_SSE1_FLT_H
34*3f1979aaSAndroid Build Coastguard Worker #define PF_SSE1_FLT_H
35*3f1979aaSAndroid Build Coastguard Worker 
36*3f1979aaSAndroid Build Coastguard Worker /*
37*3f1979aaSAndroid Build Coastguard Worker   SSE1 support macros
38*3f1979aaSAndroid Build Coastguard Worker */
39*3f1979aaSAndroid Build Coastguard Worker #if !defined(SIMD_SZ) && !defined(PFFFT_SIMD_DISABLE) && (defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86))
40*3f1979aaSAndroid Build Coastguard Worker #pragma message( __FILE__ ": SSE1 float macros are defined" )
41*3f1979aaSAndroid Build Coastguard Worker 
42*3f1979aaSAndroid Build Coastguard Worker #include <xmmintrin.h>
43*3f1979aaSAndroid Build Coastguard Worker typedef __m128 v4sf;
44*3f1979aaSAndroid Build Coastguard Worker 
45*3f1979aaSAndroid Build Coastguard Worker /* 4 floats by simd vector -- this is pretty much hardcoded in the preprocess/finalize functions
46*3f1979aaSAndroid Build Coastguard Worker  *  anyway so you will have to work if you want to enable AVX with its 256-bit vectors. */
47*3f1979aaSAndroid Build Coastguard Worker #  define SIMD_SZ 4
48*3f1979aaSAndroid Build Coastguard Worker 
49*3f1979aaSAndroid Build Coastguard Worker typedef union v4sf_union {
50*3f1979aaSAndroid Build Coastguard Worker   v4sf  v;
51*3f1979aaSAndroid Build Coastguard Worker   float f[SIMD_SZ];
52*3f1979aaSAndroid Build Coastguard Worker } v4sf_union;
53*3f1979aaSAndroid Build Coastguard Worker 
54*3f1979aaSAndroid Build Coastguard Worker #  define VARCH "SSE1"
55*3f1979aaSAndroid Build Coastguard Worker #  define VREQUIRES_ALIGN 1
56*3f1979aaSAndroid Build Coastguard Worker #  define VZERO() _mm_setzero_ps()
57*3f1979aaSAndroid Build Coastguard Worker #  define VMUL(a,b) _mm_mul_ps(a,b)
58*3f1979aaSAndroid Build Coastguard Worker #  define VADD(a,b) _mm_add_ps(a,b)
59*3f1979aaSAndroid Build Coastguard Worker #  define VMADD(a,b,c) _mm_add_ps(_mm_mul_ps(a,b), c)
60*3f1979aaSAndroid Build Coastguard Worker #  define VSUB(a,b) _mm_sub_ps(a,b)
61*3f1979aaSAndroid Build Coastguard Worker #  define LD_PS1(p) _mm_set1_ps(p)
62*3f1979aaSAndroid Build Coastguard Worker #  define VLOAD_UNALIGNED(ptr)  _mm_loadu_ps(ptr)
63*3f1979aaSAndroid Build Coastguard Worker #  define VLOAD_ALIGNED(ptr)    _mm_load_ps(ptr)
64*3f1979aaSAndroid Build Coastguard Worker 
65*3f1979aaSAndroid Build Coastguard Worker #  define INTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_unpacklo_ps(in1, in2); out2 = _mm_unpackhi_ps(in1, in2); out1 = tmp__; }
66*3f1979aaSAndroid Build Coastguard Worker #  define UNINTERLEAVE2(in1, in2, out1, out2) { v4sf tmp__ = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(2,0,2,0)); out2 = _mm_shuffle_ps(in1, in2, _MM_SHUFFLE(3,1,3,1)); out1 = tmp__; }
67*3f1979aaSAndroid Build Coastguard Worker #  define VTRANSPOSE4(x0,x1,x2,x3) _MM_TRANSPOSE4_PS(x0,x1,x2,x3)
68*3f1979aaSAndroid Build Coastguard Worker #  define VSWAPHL(a,b) _mm_shuffle_ps(b, a, _MM_SHUFFLE(3,2,1,0))
69*3f1979aaSAndroid Build Coastguard Worker 
70*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip all floats */
71*3f1979aaSAndroid Build Coastguard Worker #  define VREV_S(a)    _mm_shuffle_ps(a, a, _MM_SHUFFLE(0,1,2,3))
72*3f1979aaSAndroid Build Coastguard Worker /* reverse/flip complex floats */
73*3f1979aaSAndroid Build Coastguard Worker #  define VREV_C(a)    _mm_shuffle_ps(a, a, _MM_SHUFFLE(1,0,3,2))
74*3f1979aaSAndroid Build Coastguard Worker 
75*3f1979aaSAndroid Build Coastguard Worker #  define VALIGNED(ptr) ((((uintptr_t)(ptr)) & 0xF) == 0)
76*3f1979aaSAndroid Build Coastguard Worker 
77*3f1979aaSAndroid Build Coastguard Worker #else
78*3f1979aaSAndroid Build Coastguard Worker /* #pragma message( __FILE__ ": SSE1 float macros are not defined" ) */
79*3f1979aaSAndroid Build Coastguard Worker #endif
80*3f1979aaSAndroid Build Coastguard Worker 
81*3f1979aaSAndroid Build Coastguard Worker #endif /* PF_SSE1_FLT_H */
82*3f1979aaSAndroid Build Coastguard Worker 
83