xref: /aosp_15_r20/external/libopus/celt/kiss_fft.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /*Copyright (c) 2003-2004, Mark Borgerding
2*a58d3d2aSXin Li   Lots of modifications by Jean-Marc Valin
3*a58d3d2aSXin Li   Copyright (c) 2005-2007, Xiph.Org Foundation
4*a58d3d2aSXin Li   Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5*a58d3d2aSXin Li 
6*a58d3d2aSXin Li   All rights reserved.
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li   Redistribution and use in source and binary forms, with or without
9*a58d3d2aSXin Li    modification, are permitted provided that the following conditions are met:
10*a58d3d2aSXin Li 
11*a58d3d2aSXin Li     * Redistributions of source code must retain the above copyright notice,
12*a58d3d2aSXin Li        this list of conditions and the following disclaimer.
13*a58d3d2aSXin Li     * Redistributions in binary form must reproduce the above copyright notice,
14*a58d3d2aSXin Li        this list of conditions and the following disclaimer in the
15*a58d3d2aSXin Li        documentation and/or other materials provided with the distribution.
16*a58d3d2aSXin Li 
17*a58d3d2aSXin Li   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18*a58d3d2aSXin Li   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19*a58d3d2aSXin Li   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20*a58d3d2aSXin Li   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21*a58d3d2aSXin Li   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22*a58d3d2aSXin Li   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23*a58d3d2aSXin Li   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24*a58d3d2aSXin Li   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25*a58d3d2aSXin Li   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26*a58d3d2aSXin Li   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27*a58d3d2aSXin Li   POSSIBILITY OF SUCH DAMAGE.*/
28*a58d3d2aSXin Li 
29*a58d3d2aSXin Li #ifndef KISS_FFT_H
30*a58d3d2aSXin Li #define KISS_FFT_H
31*a58d3d2aSXin Li 
32*a58d3d2aSXin Li #include <stdlib.h>
33*a58d3d2aSXin Li #include <math.h>
34*a58d3d2aSXin Li #include "arch.h"
35*a58d3d2aSXin Li #include "cpu_support.h"
36*a58d3d2aSXin Li 
37*a58d3d2aSXin Li #ifdef __cplusplus
38*a58d3d2aSXin Li extern "C" {
39*a58d3d2aSXin Li #endif
40*a58d3d2aSXin Li 
41*a58d3d2aSXin Li #ifdef USE_SIMD
42*a58d3d2aSXin Li # include <xmmintrin.h>
43*a58d3d2aSXin Li # define kiss_fft_scalar __m128
44*a58d3d2aSXin Li #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
45*a58d3d2aSXin Li #else
46*a58d3d2aSXin Li #define KISS_FFT_MALLOC opus_alloc
47*a58d3d2aSXin Li #endif
48*a58d3d2aSXin Li 
49*a58d3d2aSXin Li #ifdef FIXED_POINT
50*a58d3d2aSXin Li #include "arch.h"
51*a58d3d2aSXin Li 
52*a58d3d2aSXin Li #  define kiss_fft_scalar opus_int32
53*a58d3d2aSXin Li #  define kiss_twiddle_scalar opus_int16
54*a58d3d2aSXin Li 
55*a58d3d2aSXin Li /* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory
56*a58d3d2aSXin Li  * access, and could benefit from additional alignment.
57*a58d3d2aSXin Li  */
58*a58d3d2aSXin Li #  define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
59*a58d3d2aSXin Li 
60*a58d3d2aSXin Li #else
61*a58d3d2aSXin Li # ifndef kiss_fft_scalar
62*a58d3d2aSXin Li /*  default is float */
63*a58d3d2aSXin Li #   define kiss_fft_scalar float
64*a58d3d2aSXin Li #   define kiss_twiddle_scalar float
65*a58d3d2aSXin Li #   define KF_SUFFIX _celt_single
66*a58d3d2aSXin Li # endif
67*a58d3d2aSXin Li #endif
68*a58d3d2aSXin Li 
69*a58d3d2aSXin Li #if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT)
70*a58d3d2aSXin Li #define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
71*a58d3d2aSXin Li #else
72*a58d3d2aSXin Li #define KISS_TWIDDLE_CPX_ALIGNED
73*a58d3d2aSXin Li #endif
74*a58d3d2aSXin Li 
75*a58d3d2aSXin Li typedef struct {
76*a58d3d2aSXin Li     kiss_fft_scalar r;
77*a58d3d2aSXin Li     kiss_fft_scalar i;
78*a58d3d2aSXin Li }kiss_fft_cpx;
79*a58d3d2aSXin Li 
80*a58d3d2aSXin Li typedef struct {
81*a58d3d2aSXin Li    kiss_twiddle_scalar r;
82*a58d3d2aSXin Li    kiss_twiddle_scalar i;
83*a58d3d2aSXin Li } KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx;
84*a58d3d2aSXin Li 
85*a58d3d2aSXin Li #define MAXFACTORS 8
86*a58d3d2aSXin Li /* e.g. an fft of length 128 has 4 factors
87*a58d3d2aSXin Li  as far as kissfft is concerned
88*a58d3d2aSXin Li  4*4*4*2
89*a58d3d2aSXin Li  */
90*a58d3d2aSXin Li 
91*a58d3d2aSXin Li typedef struct arch_fft_state{
92*a58d3d2aSXin Li    int is_supported;
93*a58d3d2aSXin Li    void *priv;
94*a58d3d2aSXin Li } arch_fft_state;
95*a58d3d2aSXin Li 
96*a58d3d2aSXin Li typedef struct kiss_fft_state{
97*a58d3d2aSXin Li     int nfft;
98*a58d3d2aSXin Li     opus_val16 scale;
99*a58d3d2aSXin Li #ifdef FIXED_POINT
100*a58d3d2aSXin Li     int scale_shift;
101*a58d3d2aSXin Li #endif
102*a58d3d2aSXin Li     int shift;
103*a58d3d2aSXin Li     opus_int16 factors[2*MAXFACTORS];
104*a58d3d2aSXin Li     const opus_int16 *bitrev;
105*a58d3d2aSXin Li     const kiss_twiddle_cpx *twiddles;
106*a58d3d2aSXin Li     arch_fft_state *arch_fft;
107*a58d3d2aSXin Li } kiss_fft_state;
108*a58d3d2aSXin Li 
109*a58d3d2aSXin Li #if defined(HAVE_ARM_NE10)
110*a58d3d2aSXin Li #include "arm/fft_arm.h"
111*a58d3d2aSXin Li #endif
112*a58d3d2aSXin Li 
113*a58d3d2aSXin Li /*typedef struct kiss_fft_state* kiss_fft_cfg;*/
114*a58d3d2aSXin Li 
115*a58d3d2aSXin Li /**
116*a58d3d2aSXin Li  *  opus_fft_alloc
117*a58d3d2aSXin Li  *
118*a58d3d2aSXin Li  *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
119*a58d3d2aSXin Li  *
120*a58d3d2aSXin Li  *  typical usage:      kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL);
121*a58d3d2aSXin Li  *
122*a58d3d2aSXin Li  *  The return value from fft_alloc is a cfg buffer used internally
123*a58d3d2aSXin Li  *  by the fft routine or NULL.
124*a58d3d2aSXin Li  *
125*a58d3d2aSXin Li  *  If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc.
126*a58d3d2aSXin Li  *  The returned value should be free()d when done to avoid memory leaks.
127*a58d3d2aSXin Li  *
128*a58d3d2aSXin Li  *  The state can be placed in a user supplied buffer 'mem':
129*a58d3d2aSXin Li  *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
130*a58d3d2aSXin Li  *      then the function places the cfg in mem and the size used in *lenmem
131*a58d3d2aSXin Li  *      and returns mem.
132*a58d3d2aSXin Li  *
133*a58d3d2aSXin Li  *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
134*a58d3d2aSXin Li  *      then the function returns NULL and places the minimum cfg
135*a58d3d2aSXin Li  *      buffer size in *lenmem.
136*a58d3d2aSXin Li  * */
137*a58d3d2aSXin Li 
138*a58d3d2aSXin Li kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
139*a58d3d2aSXin Li 
140*a58d3d2aSXin Li kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
141*a58d3d2aSXin Li 
142*a58d3d2aSXin Li /**
143*a58d3d2aSXin Li  * opus_fft(cfg,in_out_buf)
144*a58d3d2aSXin Li  *
145*a58d3d2aSXin Li  * Perform an FFT on a complex input buffer.
146*a58d3d2aSXin Li  * for a forward FFT,
147*a58d3d2aSXin Li  * fin should be  f[0] , f[1] , ... ,f[nfft-1]
148*a58d3d2aSXin Li  * fout will be   F[0] , F[1] , ... ,F[nfft-1]
149*a58d3d2aSXin Li  * Note that each element is complex and can be accessed like
150*a58d3d2aSXin Li     f[k].r and f[k].i
151*a58d3d2aSXin Li  * */
152*a58d3d2aSXin Li void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
153*a58d3d2aSXin Li void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
154*a58d3d2aSXin Li 
155*a58d3d2aSXin Li void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
156*a58d3d2aSXin Li void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
157*a58d3d2aSXin Li 
158*a58d3d2aSXin Li void opus_fft_free(const kiss_fft_state *cfg, int arch);
159*a58d3d2aSXin Li 
160*a58d3d2aSXin Li 
161*a58d3d2aSXin Li void opus_fft_free_arch_c(kiss_fft_state *st);
162*a58d3d2aSXin Li int opus_fft_alloc_arch_c(kiss_fft_state *st);
163*a58d3d2aSXin Li 
164*a58d3d2aSXin Li #if !defined(OVERRIDE_OPUS_FFT)
165*a58d3d2aSXin Li /* Is run-time CPU detection enabled on this platform? */
166*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
167*a58d3d2aSXin Li 
168*a58d3d2aSXin Li extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(
169*a58d3d2aSXin Li  kiss_fft_state *st);
170*a58d3d2aSXin Li 
171*a58d3d2aSXin Li #define opus_fft_alloc_arch(_st, arch) \
172*a58d3d2aSXin Li          ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
173*a58d3d2aSXin Li 
174*a58d3d2aSXin Li extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(
175*a58d3d2aSXin Li  kiss_fft_state *st);
176*a58d3d2aSXin Li #define opus_fft_free_arch(_st, arch) \
177*a58d3d2aSXin Li          ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
178*a58d3d2aSXin Li 
179*a58d3d2aSXin Li extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
180*a58d3d2aSXin Li  const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
181*a58d3d2aSXin Li #define opus_fft(_cfg, _fin, _fout, arch) \
182*a58d3d2aSXin Li    ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
183*a58d3d2aSXin Li 
184*a58d3d2aSXin Li extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
185*a58d3d2aSXin Li  const kiss_fft_cpx *fin, kiss_fft_cpx *fout);
186*a58d3d2aSXin Li #define opus_ifft(_cfg, _fin, _fout, arch) \
187*a58d3d2aSXin Li    ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
188*a58d3d2aSXin Li 
189*a58d3d2aSXin Li #else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
190*a58d3d2aSXin Li 
191*a58d3d2aSXin Li #define opus_fft_alloc_arch(_st, arch) \
192*a58d3d2aSXin Li          ((void)(arch), opus_fft_alloc_arch_c(_st))
193*a58d3d2aSXin Li 
194*a58d3d2aSXin Li #define opus_fft_free_arch(_st, arch) \
195*a58d3d2aSXin Li          ((void)(arch), opus_fft_free_arch_c(_st))
196*a58d3d2aSXin Li 
197*a58d3d2aSXin Li #define opus_fft(_cfg, _fin, _fout, arch) \
198*a58d3d2aSXin Li          ((void)(arch), opus_fft_c(_cfg, _fin, _fout))
199*a58d3d2aSXin Li 
200*a58d3d2aSXin Li #define opus_ifft(_cfg, _fin, _fout, arch) \
201*a58d3d2aSXin Li          ((void)(arch), opus_ifft_c(_cfg, _fin, _fout))
202*a58d3d2aSXin Li 
203*a58d3d2aSXin Li #endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
204*a58d3d2aSXin Li #endif /* end if !defined(OVERRIDE_OPUS_FFT) */
205*a58d3d2aSXin Li 
206*a58d3d2aSXin Li #ifdef __cplusplus
207*a58d3d2aSXin Li }
208*a58d3d2aSXin Li #endif
209*a58d3d2aSXin Li 
210*a58d3d2aSXin Li #endif
211