1*a58d3d2aSXin Li /*Copyright (c) 2003-2004, Mark Borgerding 2*a58d3d2aSXin Li Lots of modifications by Jean-Marc Valin 3*a58d3d2aSXin Li Copyright (c) 2005-2007, Xiph.Org Foundation 4*a58d3d2aSXin Li Copyright (c) 2008, Xiph.Org Foundation, CSIRO 5*a58d3d2aSXin Li 6*a58d3d2aSXin Li All rights reserved. 7*a58d3d2aSXin Li 8*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without 9*a58d3d2aSXin Li modification, are permitted provided that the following conditions are met: 10*a58d3d2aSXin Li 11*a58d3d2aSXin Li * Redistributions of source code must retain the above copyright notice, 12*a58d3d2aSXin Li this list of conditions and the following disclaimer. 13*a58d3d2aSXin Li * Redistributions in binary form must reproduce the above copyright notice, 14*a58d3d2aSXin Li this list of conditions and the following disclaimer in the 15*a58d3d2aSXin Li documentation and/or other materials provided with the distribution. 16*a58d3d2aSXin Li 17*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18*a58d3d2aSXin Li AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19*a58d3d2aSXin Li IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20*a58d3d2aSXin Li ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21*a58d3d2aSXin Li LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22*a58d3d2aSXin Li CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23*a58d3d2aSXin Li SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24*a58d3d2aSXin Li INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25*a58d3d2aSXin Li CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26*a58d3d2aSXin Li ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27*a58d3d2aSXin Li POSSIBILITY OF SUCH DAMAGE.*/ 28*a58d3d2aSXin Li 29*a58d3d2aSXin Li #ifndef KISS_FFT_H 30*a58d3d2aSXin Li #define KISS_FFT_H 31*a58d3d2aSXin Li 32*a58d3d2aSXin Li #include <stdlib.h> 33*a58d3d2aSXin Li #include <math.h> 34*a58d3d2aSXin Li #include "arch.h" 35*a58d3d2aSXin Li #include "cpu_support.h" 36*a58d3d2aSXin Li 37*a58d3d2aSXin Li #ifdef __cplusplus 38*a58d3d2aSXin Li extern "C" { 39*a58d3d2aSXin Li #endif 40*a58d3d2aSXin Li 41*a58d3d2aSXin Li #ifdef USE_SIMD 42*a58d3d2aSXin Li # include <xmmintrin.h> 43*a58d3d2aSXin Li # define kiss_fft_scalar __m128 44*a58d3d2aSXin Li #define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes) 45*a58d3d2aSXin Li #else 46*a58d3d2aSXin Li #define KISS_FFT_MALLOC opus_alloc 47*a58d3d2aSXin Li #endif 48*a58d3d2aSXin Li 49*a58d3d2aSXin Li #ifdef FIXED_POINT 50*a58d3d2aSXin Li #include "arch.h" 51*a58d3d2aSXin Li 52*a58d3d2aSXin Li # define kiss_fft_scalar opus_int32 53*a58d3d2aSXin Li # define kiss_twiddle_scalar opus_int16 54*a58d3d2aSXin Li 55*a58d3d2aSXin Li /* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory 56*a58d3d2aSXin Li * access, and could benefit from additional alignment. 57*a58d3d2aSXin Li */ 58*a58d3d2aSXin Li # define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) 59*a58d3d2aSXin Li 60*a58d3d2aSXin Li #else 61*a58d3d2aSXin Li # ifndef kiss_fft_scalar 62*a58d3d2aSXin Li /* default is float */ 63*a58d3d2aSXin Li # define kiss_fft_scalar float 64*a58d3d2aSXin Li # define kiss_twiddle_scalar float 65*a58d3d2aSXin Li # define KF_SUFFIX _celt_single 66*a58d3d2aSXin Li # endif 67*a58d3d2aSXin Li #endif 68*a58d3d2aSXin Li 69*a58d3d2aSXin Li #if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) 70*a58d3d2aSXin Li #define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) 71*a58d3d2aSXin Li #else 72*a58d3d2aSXin Li #define KISS_TWIDDLE_CPX_ALIGNED 73*a58d3d2aSXin Li #endif 74*a58d3d2aSXin Li 75*a58d3d2aSXin Li typedef struct { 76*a58d3d2aSXin Li kiss_fft_scalar r; 77*a58d3d2aSXin Li kiss_fft_scalar i; 78*a58d3d2aSXin Li }kiss_fft_cpx; 79*a58d3d2aSXin Li 80*a58d3d2aSXin Li typedef struct { 81*a58d3d2aSXin Li kiss_twiddle_scalar r; 82*a58d3d2aSXin Li kiss_twiddle_scalar i; 83*a58d3d2aSXin Li } KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx; 84*a58d3d2aSXin Li 85*a58d3d2aSXin Li #define MAXFACTORS 8 86*a58d3d2aSXin Li /* e.g. an fft of length 128 has 4 factors 87*a58d3d2aSXin Li as far as kissfft is concerned 88*a58d3d2aSXin Li 4*4*4*2 89*a58d3d2aSXin Li */ 90*a58d3d2aSXin Li 91*a58d3d2aSXin Li typedef struct arch_fft_state{ 92*a58d3d2aSXin Li int is_supported; 93*a58d3d2aSXin Li void *priv; 94*a58d3d2aSXin Li } arch_fft_state; 95*a58d3d2aSXin Li 96*a58d3d2aSXin Li typedef struct kiss_fft_state{ 97*a58d3d2aSXin Li int nfft; 98*a58d3d2aSXin Li opus_val16 scale; 99*a58d3d2aSXin Li #ifdef FIXED_POINT 100*a58d3d2aSXin Li int scale_shift; 101*a58d3d2aSXin Li #endif 102*a58d3d2aSXin Li int shift; 103*a58d3d2aSXin Li opus_int16 factors[2*MAXFACTORS]; 104*a58d3d2aSXin Li const opus_int16 *bitrev; 105*a58d3d2aSXin Li const kiss_twiddle_cpx *twiddles; 106*a58d3d2aSXin Li arch_fft_state *arch_fft; 107*a58d3d2aSXin Li } kiss_fft_state; 108*a58d3d2aSXin Li 109*a58d3d2aSXin Li #if defined(HAVE_ARM_NE10) 110*a58d3d2aSXin Li #include "arm/fft_arm.h" 111*a58d3d2aSXin Li #endif 112*a58d3d2aSXin Li 113*a58d3d2aSXin Li /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ 114*a58d3d2aSXin Li 115*a58d3d2aSXin Li /** 116*a58d3d2aSXin Li * opus_fft_alloc 117*a58d3d2aSXin Li * 118*a58d3d2aSXin Li * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. 119*a58d3d2aSXin Li * 120*a58d3d2aSXin Li * typical usage: kiss_fft_cfg mycfg=opus_fft_alloc(1024,0,NULL,NULL); 121*a58d3d2aSXin Li * 122*a58d3d2aSXin Li * The return value from fft_alloc is a cfg buffer used internally 123*a58d3d2aSXin Li * by the fft routine or NULL. 124*a58d3d2aSXin Li * 125*a58d3d2aSXin Li * If lenmem is NULL, then opus_fft_alloc will allocate a cfg buffer using malloc. 126*a58d3d2aSXin Li * The returned value should be free()d when done to avoid memory leaks. 127*a58d3d2aSXin Li * 128*a58d3d2aSXin Li * The state can be placed in a user supplied buffer 'mem': 129*a58d3d2aSXin Li * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, 130*a58d3d2aSXin Li * then the function places the cfg in mem and the size used in *lenmem 131*a58d3d2aSXin Li * and returns mem. 132*a58d3d2aSXin Li * 133*a58d3d2aSXin Li * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), 134*a58d3d2aSXin Li * then the function returns NULL and places the minimum cfg 135*a58d3d2aSXin Li * buffer size in *lenmem. 136*a58d3d2aSXin Li * */ 137*a58d3d2aSXin Li 138*a58d3d2aSXin Li kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); 139*a58d3d2aSXin Li 140*a58d3d2aSXin Li kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); 141*a58d3d2aSXin Li 142*a58d3d2aSXin Li /** 143*a58d3d2aSXin Li * opus_fft(cfg,in_out_buf) 144*a58d3d2aSXin Li * 145*a58d3d2aSXin Li * Perform an FFT on a complex input buffer. 146*a58d3d2aSXin Li * for a forward FFT, 147*a58d3d2aSXin Li * fin should be f[0] , f[1] , ... ,f[nfft-1] 148*a58d3d2aSXin Li * fout will be F[0] , F[1] , ... ,F[nfft-1] 149*a58d3d2aSXin Li * Note that each element is complex and can be accessed like 150*a58d3d2aSXin Li f[k].r and f[k].i 151*a58d3d2aSXin Li * */ 152*a58d3d2aSXin Li void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 153*a58d3d2aSXin Li void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 154*a58d3d2aSXin Li 155*a58d3d2aSXin Li void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 156*a58d3d2aSXin Li void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); 157*a58d3d2aSXin Li 158*a58d3d2aSXin Li void opus_fft_free(const kiss_fft_state *cfg, int arch); 159*a58d3d2aSXin Li 160*a58d3d2aSXin Li 161*a58d3d2aSXin Li void opus_fft_free_arch_c(kiss_fft_state *st); 162*a58d3d2aSXin Li int opus_fft_alloc_arch_c(kiss_fft_state *st); 163*a58d3d2aSXin Li 164*a58d3d2aSXin Li #if !defined(OVERRIDE_OPUS_FFT) 165*a58d3d2aSXin Li /* Is run-time CPU detection enabled on this platform? */ 166*a58d3d2aSXin Li #if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) 167*a58d3d2aSXin Li 168*a58d3d2aSXin Li extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( 169*a58d3d2aSXin Li kiss_fft_state *st); 170*a58d3d2aSXin Li 171*a58d3d2aSXin Li #define opus_fft_alloc_arch(_st, arch) \ 172*a58d3d2aSXin Li ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) 173*a58d3d2aSXin Li 174*a58d3d2aSXin Li extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( 175*a58d3d2aSXin Li kiss_fft_state *st); 176*a58d3d2aSXin Li #define opus_fft_free_arch(_st, arch) \ 177*a58d3d2aSXin Li ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) 178*a58d3d2aSXin Li 179*a58d3d2aSXin Li extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, 180*a58d3d2aSXin Li const kiss_fft_cpx *fin, kiss_fft_cpx *fout); 181*a58d3d2aSXin Li #define opus_fft(_cfg, _fin, _fout, arch) \ 182*a58d3d2aSXin Li ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) 183*a58d3d2aSXin Li 184*a58d3d2aSXin Li extern void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, 185*a58d3d2aSXin Li const kiss_fft_cpx *fin, kiss_fft_cpx *fout); 186*a58d3d2aSXin Li #define opus_ifft(_cfg, _fin, _fout, arch) \ 187*a58d3d2aSXin Li ((*OPUS_IFFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) 188*a58d3d2aSXin Li 189*a58d3d2aSXin Li #else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ 190*a58d3d2aSXin Li 191*a58d3d2aSXin Li #define opus_fft_alloc_arch(_st, arch) \ 192*a58d3d2aSXin Li ((void)(arch), opus_fft_alloc_arch_c(_st)) 193*a58d3d2aSXin Li 194*a58d3d2aSXin Li #define opus_fft_free_arch(_st, arch) \ 195*a58d3d2aSXin Li ((void)(arch), opus_fft_free_arch_c(_st)) 196*a58d3d2aSXin Li 197*a58d3d2aSXin Li #define opus_fft(_cfg, _fin, _fout, arch) \ 198*a58d3d2aSXin Li ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) 199*a58d3d2aSXin Li 200*a58d3d2aSXin Li #define opus_ifft(_cfg, _fin, _fout, arch) \ 201*a58d3d2aSXin Li ((void)(arch), opus_ifft_c(_cfg, _fin, _fout)) 202*a58d3d2aSXin Li 203*a58d3d2aSXin Li #endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ 204*a58d3d2aSXin Li #endif /* end if !defined(OVERRIDE_OPUS_FFT) */ 205*a58d3d2aSXin Li 206*a58d3d2aSXin Li #ifdef __cplusplus 207*a58d3d2aSXin Li } 208*a58d3d2aSXin Li #endif 209*a58d3d2aSXin Li 210*a58d3d2aSXin Li #endif 211