xref: /aosp_15_r20/external/pffft/pffft_double.c (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1 /* Copyright (c) 2013  Julien Pommier ( [email protected] )
2    Copyright (c) 2020  Hayati Ayguen ( [email protected] )
3    Copyright (c) 2020  Dario Mambro ( [email protected] )
4 
5    Based on original fortran 77 code from FFTPACKv4 from NETLIB
6    (http://www.netlib.org/fftpack), authored by Dr Paul Swarztrauber
7    of NCAR, in 1985.
8 
9    As confirmed by the NCAR fftpack software curators, the following
10    FFTPACKv5 license applies to FFTPACKv4 sources. My changes are
11    released under the same terms.
12 
13    FFTPACK license:
14 
15    http://www.cisl.ucar.edu/css/software/fftpack5/ftpk.html
16 
17    Copyright (c) 2004 the University Corporation for Atmospheric
18    Research ("UCAR"). All rights reserved. Developed by NCAR's
19    Computational and Information Systems Laboratory, UCAR,
20    www.cisl.ucar.edu.
21 
22    Redistribution and use of the Software in source and binary forms,
23    with or without modification, is permitted provided that the
24    following conditions are met:
25 
26    - Neither the names of NCAR's Computational and Information Systems
27    Laboratory, the University Corporation for Atmospheric Research,
28    nor the names of its sponsors or contributors may be used to
29    endorse or promote products derived from this Software without
30    specific prior written permission.
31 
32    - Redistributions of source code must retain the above copyright
33    notices, this list of conditions, and the disclaimer below.
34 
35    - Redistributions in binary form must reproduce the above copyright
36    notice, this list of conditions, and the disclaimer below in the
37    documentation and/or other materials provided with the
38    distribution.
39 
40    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
41    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
42    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
43    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
44    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
45    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
46    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
47    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
48    SOFTWARE.
49 
50 
51    PFFFT : a Pretty Fast FFT.
52 
53    This file is largerly based on the original FFTPACK implementation, modified in
54    order to take advantage of SIMD instructions of modern CPUs.
55 */
56 
57 /*
58    NOTE: This file is adapted from Julien Pommier's original PFFFT,
59    which works on 32 bit floating point precision using SSE instructions,
60    to work with 64 bit floating point precision using AVX instructions.
61    Author: Dario Mambro @ https://github.com/unevens/pffft
62 */
63 
64 #include "pffft_double.h"
65 
66 /* detect compiler flavour */
67 #if defined(_MSC_VER)
68 #  define COMPILER_MSVC
69 #elif defined(__GNUC__)
70 #  define COMPILER_GCC
71 #endif
72 
73 #ifdef COMPILER_MSVC
74 #  define _USE_MATH_DEFINES
75 #  include <malloc.h>
76 #else
77 #  include <alloca.h>
78 #endif
79 
80 #include <stdlib.h>
81 #include <stdint.h>
82 #include <stdio.h>
83 #include <math.h>
84 #include <assert.h>
85 
86 #if defined(COMPILER_GCC)
87 #  define ALWAYS_INLINE(return_type) inline return_type __attribute__ ((always_inline))
88 #  define NEVER_INLINE(return_type) return_type __attribute__ ((noinline))
89 #  define RESTRICT __restrict
90 #  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ varname__[size__];
91 #elif defined(COMPILER_MSVC)
92 #  define ALWAYS_INLINE(return_type) __forceinline return_type
93 #  define NEVER_INLINE(return_type) __declspec(noinline) return_type
94 #  define RESTRICT __restrict
95 #  define VLA_ARRAY_ON_STACK(type__, varname__, size__) type__ *varname__ = (type__*)_alloca(size__ * sizeof(type__))
96 #endif
97 
98 
99 #ifdef COMPILER_MSVC
100 #pragma warning( disable : 4244 4305 4204 4456 )
101 #endif
102 
103 /*
104    vector support macros: the rest of the code is independant of
105    AVX -- adding support for other platforms with 4-element
106    vectors should be limited to these macros
107 */
108 #include "simd/pf_double.h"
109 
110 /* have code comparable with this definition */
111 #define float double
112 #define SETUP_STRUCT               PFFFTD_Setup
113 #define FUNC_NEW_SETUP             pffftd_new_setup
114 #define FUNC_DESTROY               pffftd_destroy_setup
115 #define FUNC_TRANSFORM_UNORDRD     pffftd_transform
116 #define FUNC_TRANSFORM_ORDERED     pffftd_transform_ordered
117 #define FUNC_ZREORDER              pffftd_zreorder
118 #define FUNC_ZCONVOLVE_ACCUMULATE  pffftd_zconvolve_accumulate
119 #define FUNC_ZCONVOLVE_NO_ACCU     pffftd_zconvolve_no_accu
120 
121 #define FUNC_ALIGNED_MALLOC        pffftd_aligned_malloc
122 #define FUNC_ALIGNED_FREE          pffftd_aligned_free
123 #define FUNC_SIMD_SIZE             pffftd_simd_size
124 #define FUNC_SIMD_ARCH             pffftd_simd_arch
125 #define FUNC_VALIDATE_SIMD_A       validate_pffftd_simd
126 #define FUNC_VALIDATE_SIMD_EX      validate_pffftd_simd_ex
127 
128 #define FUNC_CPLX_FINALIZE         pffftd_cplx_finalize
129 #define FUNC_CPLX_PREPROCESS       pffftd_cplx_preprocess
130 #define FUNC_REAL_PREPROCESS_4X4   pffftd_real_preprocess_4x4
131 #define FUNC_REAL_PREPROCESS       pffftd_real_preprocess
132 #define FUNC_REAL_FINALIZE_4X4     pffftd_real_finalize_4x4
133 #define FUNC_REAL_FINALIZE         pffftd_real_finalize
134 #define FUNC_TRANSFORM_INTERNAL    pffftd_transform_internal
135 
136 #define FUNC_COS  cos
137 #define FUNC_SIN  sin
138 
139 
140 #include "pffft_priv_impl.h"
141 
142 
143