xref: /aosp_15_r20/external/rnnoise/src/kiss_fft.c (revision 1295d6828459cc82c3c29cc5d7d297215250a74b)
1*1295d682SXin Li /*Copyright (c) 2003-2004, Mark Borgerding
2*1295d682SXin Li   Lots of modifications by Jean-Marc Valin
3*1295d682SXin Li   Copyright (c) 2005-2007, Xiph.Org Foundation
4*1295d682SXin Li   Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
5*1295d682SXin Li 
6*1295d682SXin Li   All rights reserved.
7*1295d682SXin Li 
8*1295d682SXin Li   Redistribution and use in source and binary forms, with or without
9*1295d682SXin Li    modification, are permitted provided that the following conditions are met:
10*1295d682SXin Li 
11*1295d682SXin Li     * Redistributions of source code must retain the above copyright notice,
12*1295d682SXin Li        this list of conditions and the following disclaimer.
13*1295d682SXin Li     * Redistributions in binary form must reproduce the above copyright notice,
14*1295d682SXin Li        this list of conditions and the following disclaimer in the
15*1295d682SXin Li        documentation and/or other materials provided with the distribution.
16*1295d682SXin Li 
17*1295d682SXin Li   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18*1295d682SXin Li   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19*1295d682SXin Li   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20*1295d682SXin Li   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21*1295d682SXin Li   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22*1295d682SXin Li   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23*1295d682SXin Li   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24*1295d682SXin Li   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25*1295d682SXin Li   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26*1295d682SXin Li   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27*1295d682SXin Li   POSSIBILITY OF SUCH DAMAGE.*/
28*1295d682SXin Li 
29*1295d682SXin Li /* This code is originally from Mark Borgerding's KISS-FFT but has been
30*1295d682SXin Li    heavily modified to better suit Opus */
31*1295d682SXin Li 
32*1295d682SXin Li #ifndef SKIP_CONFIG_H
33*1295d682SXin Li #  ifdef HAVE_CONFIG_H
34*1295d682SXin Li #    include "config.h"
35*1295d682SXin Li #  endif
36*1295d682SXin Li #endif
37*1295d682SXin Li 
38*1295d682SXin Li #include "_kiss_fft_guts.h"
39*1295d682SXin Li #define CUSTOM_MODES
40*1295d682SXin Li 
41*1295d682SXin Li /* The guts header contains all the multiplication and addition macros that are defined for
42*1295d682SXin Li    complex numbers.  It also declares the kf_ internal functions.
43*1295d682SXin Li */
44*1295d682SXin Li 
kf_bfly2(kiss_fft_cpx * Fout,int m,int N)45*1295d682SXin Li static void kf_bfly2(
46*1295d682SXin Li                      kiss_fft_cpx * Fout,
47*1295d682SXin Li                      int m,
48*1295d682SXin Li                      int N
49*1295d682SXin Li                     )
50*1295d682SXin Li {
51*1295d682SXin Li    kiss_fft_cpx * Fout2;
52*1295d682SXin Li    int i;
53*1295d682SXin Li    (void)m;
54*1295d682SXin Li #ifdef CUSTOM_MODES
55*1295d682SXin Li    if (m==1)
56*1295d682SXin Li    {
57*1295d682SXin Li       celt_assert(m==1);
58*1295d682SXin Li       for (i=0;i<N;i++)
59*1295d682SXin Li       {
60*1295d682SXin Li          kiss_fft_cpx t;
61*1295d682SXin Li          Fout2 = Fout + 1;
62*1295d682SXin Li          t = *Fout2;
63*1295d682SXin Li          C_SUB( *Fout2 ,  *Fout , t );
64*1295d682SXin Li          C_ADDTO( *Fout ,  t );
65*1295d682SXin Li          Fout += 2;
66*1295d682SXin Li       }
67*1295d682SXin Li    } else
68*1295d682SXin Li #endif
69*1295d682SXin Li    {
70*1295d682SXin Li       opus_val16 tw;
71*1295d682SXin Li       tw = QCONST16(0.7071067812f, 15);
72*1295d682SXin Li       /* We know that m==4 here because the radix-2 is just after a radix-4 */
73*1295d682SXin Li       celt_assert(m==4);
74*1295d682SXin Li       for (i=0;i<N;i++)
75*1295d682SXin Li       {
76*1295d682SXin Li          kiss_fft_cpx t;
77*1295d682SXin Li          Fout2 = Fout + 4;
78*1295d682SXin Li          t = Fout2[0];
79*1295d682SXin Li          C_SUB( Fout2[0] ,  Fout[0] , t );
80*1295d682SXin Li          C_ADDTO( Fout[0] ,  t );
81*1295d682SXin Li 
82*1295d682SXin Li          t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
83*1295d682SXin Li          t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
84*1295d682SXin Li          C_SUB( Fout2[1] ,  Fout[1] , t );
85*1295d682SXin Li          C_ADDTO( Fout[1] ,  t );
86*1295d682SXin Li 
87*1295d682SXin Li          t.r = Fout2[2].i;
88*1295d682SXin Li          t.i = -Fout2[2].r;
89*1295d682SXin Li          C_SUB( Fout2[2] ,  Fout[2] , t );
90*1295d682SXin Li          C_ADDTO( Fout[2] ,  t );
91*1295d682SXin Li 
92*1295d682SXin Li          t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
93*1295d682SXin Li          t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
94*1295d682SXin Li          C_SUB( Fout2[3] ,  Fout[3] , t );
95*1295d682SXin Li          C_ADDTO( Fout[3] ,  t );
96*1295d682SXin Li          Fout += 8;
97*1295d682SXin Li       }
98*1295d682SXin Li    }
99*1295d682SXin Li }
100*1295d682SXin Li 
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)101*1295d682SXin Li static void kf_bfly4(
102*1295d682SXin Li                      kiss_fft_cpx * Fout,
103*1295d682SXin Li                      const size_t fstride,
104*1295d682SXin Li                      const kiss_fft_state *st,
105*1295d682SXin Li                      int m,
106*1295d682SXin Li                      int N,
107*1295d682SXin Li                      int mm
108*1295d682SXin Li                     )
109*1295d682SXin Li {
110*1295d682SXin Li    int i;
111*1295d682SXin Li 
112*1295d682SXin Li    if (m==1)
113*1295d682SXin Li    {
114*1295d682SXin Li       /* Degenerate case where all the twiddles are 1. */
115*1295d682SXin Li       for (i=0;i<N;i++)
116*1295d682SXin Li       {
117*1295d682SXin Li          kiss_fft_cpx scratch0, scratch1;
118*1295d682SXin Li 
119*1295d682SXin Li          C_SUB( scratch0 , *Fout, Fout[2] );
120*1295d682SXin Li          C_ADDTO(*Fout, Fout[2]);
121*1295d682SXin Li          C_ADD( scratch1 , Fout[1] , Fout[3] );
122*1295d682SXin Li          C_SUB( Fout[2], *Fout, scratch1 );
123*1295d682SXin Li          C_ADDTO( *Fout , scratch1 );
124*1295d682SXin Li          C_SUB( scratch1 , Fout[1] , Fout[3] );
125*1295d682SXin Li 
126*1295d682SXin Li          Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
127*1295d682SXin Li          Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
128*1295d682SXin Li          Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
129*1295d682SXin Li          Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
130*1295d682SXin Li          Fout+=4;
131*1295d682SXin Li       }
132*1295d682SXin Li    } else {
133*1295d682SXin Li       int j;
134*1295d682SXin Li       kiss_fft_cpx scratch[6];
135*1295d682SXin Li       const kiss_twiddle_cpx *tw1,*tw2,*tw3;
136*1295d682SXin Li       const int m2=2*m;
137*1295d682SXin Li       const int m3=3*m;
138*1295d682SXin Li       kiss_fft_cpx * Fout_beg = Fout;
139*1295d682SXin Li       for (i=0;i<N;i++)
140*1295d682SXin Li       {
141*1295d682SXin Li          Fout = Fout_beg + i*mm;
142*1295d682SXin Li          tw3 = tw2 = tw1 = st->twiddles;
143*1295d682SXin Li          /* m is guaranteed to be a multiple of 4. */
144*1295d682SXin Li          for (j=0;j<m;j++)
145*1295d682SXin Li          {
146*1295d682SXin Li             C_MUL(scratch[0],Fout[m] , *tw1 );
147*1295d682SXin Li             C_MUL(scratch[1],Fout[m2] , *tw2 );
148*1295d682SXin Li             C_MUL(scratch[2],Fout[m3] , *tw3 );
149*1295d682SXin Li 
150*1295d682SXin Li             C_SUB( scratch[5] , *Fout, scratch[1] );
151*1295d682SXin Li             C_ADDTO(*Fout, scratch[1]);
152*1295d682SXin Li             C_ADD( scratch[3] , scratch[0] , scratch[2] );
153*1295d682SXin Li             C_SUB( scratch[4] , scratch[0] , scratch[2] );
154*1295d682SXin Li             C_SUB( Fout[m2], *Fout, scratch[3] );
155*1295d682SXin Li             tw1 += fstride;
156*1295d682SXin Li             tw2 += fstride*2;
157*1295d682SXin Li             tw3 += fstride*3;
158*1295d682SXin Li             C_ADDTO( *Fout , scratch[3] );
159*1295d682SXin Li 
160*1295d682SXin Li             Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
161*1295d682SXin Li             Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
162*1295d682SXin Li             Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
163*1295d682SXin Li             Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
164*1295d682SXin Li             ++Fout;
165*1295d682SXin Li          }
166*1295d682SXin Li       }
167*1295d682SXin Li    }
168*1295d682SXin Li }
169*1295d682SXin Li 
170*1295d682SXin Li 
171*1295d682SXin Li #ifndef RADIX_TWO_ONLY
172*1295d682SXin Li 
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)173*1295d682SXin Li static void kf_bfly3(
174*1295d682SXin Li                      kiss_fft_cpx * Fout,
175*1295d682SXin Li                      const size_t fstride,
176*1295d682SXin Li                      const kiss_fft_state *st,
177*1295d682SXin Li                      int m,
178*1295d682SXin Li                      int N,
179*1295d682SXin Li                      int mm
180*1295d682SXin Li                     )
181*1295d682SXin Li {
182*1295d682SXin Li    int i;
183*1295d682SXin Li    size_t k;
184*1295d682SXin Li    const size_t m2 = 2*m;
185*1295d682SXin Li    const kiss_twiddle_cpx *tw1,*tw2;
186*1295d682SXin Li    kiss_fft_cpx scratch[5];
187*1295d682SXin Li    kiss_twiddle_cpx epi3;
188*1295d682SXin Li 
189*1295d682SXin Li    kiss_fft_cpx * Fout_beg = Fout;
190*1295d682SXin Li #ifdef FIXED_POINT
191*1295d682SXin Li    /*epi3.r = -16384;*/ /* Unused */
192*1295d682SXin Li    epi3.i = -28378;
193*1295d682SXin Li #else
194*1295d682SXin Li    epi3 = st->twiddles[fstride*m];
195*1295d682SXin Li #endif
196*1295d682SXin Li    for (i=0;i<N;i++)
197*1295d682SXin Li    {
198*1295d682SXin Li       Fout = Fout_beg + i*mm;
199*1295d682SXin Li       tw1=tw2=st->twiddles;
200*1295d682SXin Li       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
201*1295d682SXin Li       k=m;
202*1295d682SXin Li       do {
203*1295d682SXin Li 
204*1295d682SXin Li          C_MUL(scratch[1],Fout[m] , *tw1);
205*1295d682SXin Li          C_MUL(scratch[2],Fout[m2] , *tw2);
206*1295d682SXin Li 
207*1295d682SXin Li          C_ADD(scratch[3],scratch[1],scratch[2]);
208*1295d682SXin Li          C_SUB(scratch[0],scratch[1],scratch[2]);
209*1295d682SXin Li          tw1 += fstride;
210*1295d682SXin Li          tw2 += fstride*2;
211*1295d682SXin Li 
212*1295d682SXin Li          Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
213*1295d682SXin Li          Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
214*1295d682SXin Li 
215*1295d682SXin Li          C_MULBYSCALAR( scratch[0] , epi3.i );
216*1295d682SXin Li 
217*1295d682SXin Li          C_ADDTO(*Fout,scratch[3]);
218*1295d682SXin Li 
219*1295d682SXin Li          Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
220*1295d682SXin Li          Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
221*1295d682SXin Li 
222*1295d682SXin Li          Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
223*1295d682SXin Li          Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
224*1295d682SXin Li 
225*1295d682SXin Li          ++Fout;
226*1295d682SXin Li       } while(--k);
227*1295d682SXin Li    }
228*1295d682SXin Li }
229*1295d682SXin Li 
230*1295d682SXin Li 
231*1295d682SXin Li #ifndef OVERRIDE_kf_bfly5
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)232*1295d682SXin Li static void kf_bfly5(
233*1295d682SXin Li                      kiss_fft_cpx * Fout,
234*1295d682SXin Li                      const size_t fstride,
235*1295d682SXin Li                      const kiss_fft_state *st,
236*1295d682SXin Li                      int m,
237*1295d682SXin Li                      int N,
238*1295d682SXin Li                      int mm
239*1295d682SXin Li                     )
240*1295d682SXin Li {
241*1295d682SXin Li    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
242*1295d682SXin Li    int i, u;
243*1295d682SXin Li    kiss_fft_cpx scratch[13];
244*1295d682SXin Li    const kiss_twiddle_cpx *tw;
245*1295d682SXin Li    kiss_twiddle_cpx ya,yb;
246*1295d682SXin Li    kiss_fft_cpx * Fout_beg = Fout;
247*1295d682SXin Li 
248*1295d682SXin Li #ifdef FIXED_POINT
249*1295d682SXin Li    ya.r = 10126;
250*1295d682SXin Li    ya.i = -31164;
251*1295d682SXin Li    yb.r = -26510;
252*1295d682SXin Li    yb.i = -19261;
253*1295d682SXin Li #else
254*1295d682SXin Li    ya = st->twiddles[fstride*m];
255*1295d682SXin Li    yb = st->twiddles[fstride*2*m];
256*1295d682SXin Li #endif
257*1295d682SXin Li    tw=st->twiddles;
258*1295d682SXin Li 
259*1295d682SXin Li    for (i=0;i<N;i++)
260*1295d682SXin Li    {
261*1295d682SXin Li       Fout = Fout_beg + i*mm;
262*1295d682SXin Li       Fout0=Fout;
263*1295d682SXin Li       Fout1=Fout0+m;
264*1295d682SXin Li       Fout2=Fout0+2*m;
265*1295d682SXin Li       Fout3=Fout0+3*m;
266*1295d682SXin Li       Fout4=Fout0+4*m;
267*1295d682SXin Li 
268*1295d682SXin Li       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
269*1295d682SXin Li       for ( u=0; u<m; ++u ) {
270*1295d682SXin Li          scratch[0] = *Fout0;
271*1295d682SXin Li 
272*1295d682SXin Li          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
273*1295d682SXin Li          C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
274*1295d682SXin Li          C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
275*1295d682SXin Li          C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
276*1295d682SXin Li 
277*1295d682SXin Li          C_ADD( scratch[7],scratch[1],scratch[4]);
278*1295d682SXin Li          C_SUB( scratch[10],scratch[1],scratch[4]);
279*1295d682SXin Li          C_ADD( scratch[8],scratch[2],scratch[3]);
280*1295d682SXin Li          C_SUB( scratch[9],scratch[2],scratch[3]);
281*1295d682SXin Li 
282*1295d682SXin Li          Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
283*1295d682SXin Li          Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
284*1295d682SXin Li 
285*1295d682SXin Li          scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
286*1295d682SXin Li          scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
287*1295d682SXin Li 
288*1295d682SXin Li          scratch[6].r =  ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
289*1295d682SXin Li          scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
290*1295d682SXin Li 
291*1295d682SXin Li          C_SUB(*Fout1,scratch[5],scratch[6]);
292*1295d682SXin Li          C_ADD(*Fout4,scratch[5],scratch[6]);
293*1295d682SXin Li 
294*1295d682SXin Li          scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
295*1295d682SXin Li          scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
296*1295d682SXin Li          scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
297*1295d682SXin Li          scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
298*1295d682SXin Li 
299*1295d682SXin Li          C_ADD(*Fout2,scratch[11],scratch[12]);
300*1295d682SXin Li          C_SUB(*Fout3,scratch[11],scratch[12]);
301*1295d682SXin Li 
302*1295d682SXin Li          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
303*1295d682SXin Li       }
304*1295d682SXin Li    }
305*1295d682SXin Li }
306*1295d682SXin Li #endif /* OVERRIDE_kf_bfly5 */
307*1295d682SXin Li 
308*1295d682SXin Li 
309*1295d682SXin Li #endif
310*1295d682SXin Li 
311*1295d682SXin Li 
312*1295d682SXin Li #ifdef CUSTOM_MODES
313*1295d682SXin Li 
314*1295d682SXin Li static
compute_bitrev_table(int Fout,opus_int16 * f,const size_t fstride,int in_stride,opus_int16 * factors,const kiss_fft_state * st)315*1295d682SXin Li void compute_bitrev_table(
316*1295d682SXin Li          int Fout,
317*1295d682SXin Li          opus_int16 *f,
318*1295d682SXin Li          const size_t fstride,
319*1295d682SXin Li          int in_stride,
320*1295d682SXin Li          opus_int16 * factors,
321*1295d682SXin Li          const kiss_fft_state *st
322*1295d682SXin Li             )
323*1295d682SXin Li {
324*1295d682SXin Li    const int p=*factors++; /* the radix  */
325*1295d682SXin Li    const int m=*factors++; /* stage's fft length/p */
326*1295d682SXin Li 
327*1295d682SXin Li     /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
328*1295d682SXin Li    if (m==1)
329*1295d682SXin Li    {
330*1295d682SXin Li       int j;
331*1295d682SXin Li       for (j=0;j<p;j++)
332*1295d682SXin Li       {
333*1295d682SXin Li          *f = Fout+j;
334*1295d682SXin Li          f += fstride*in_stride;
335*1295d682SXin Li       }
336*1295d682SXin Li    } else {
337*1295d682SXin Li       int j;
338*1295d682SXin Li       for (j=0;j<p;j++)
339*1295d682SXin Li       {
340*1295d682SXin Li          compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
341*1295d682SXin Li          f += fstride*in_stride;
342*1295d682SXin Li          Fout += m;
343*1295d682SXin Li       }
344*1295d682SXin Li    }
345*1295d682SXin Li }
346*1295d682SXin Li 
347*1295d682SXin Li /*  facbuf is populated by p1,m1,p2,m2, ...
348*1295d682SXin Li     where
349*1295d682SXin Li     p[i] * m[i] = m[i-1]
350*1295d682SXin Li     m0 = n                  */
351*1295d682SXin Li static
kf_factor(int n,opus_int16 * facbuf)352*1295d682SXin Li int kf_factor(int n,opus_int16 * facbuf)
353*1295d682SXin Li {
354*1295d682SXin Li     int p=4;
355*1295d682SXin Li     int i;
356*1295d682SXin Li     int stages=0;
357*1295d682SXin Li     int nbak = n;
358*1295d682SXin Li 
359*1295d682SXin Li     /*factor out powers of 4, powers of 2, then any remaining primes */
360*1295d682SXin Li     do {
361*1295d682SXin Li         while (n % p) {
362*1295d682SXin Li             switch (p) {
363*1295d682SXin Li                 case 4: p = 2; break;
364*1295d682SXin Li                 case 2: p = 3; break;
365*1295d682SXin Li                 default: p += 2; break;
366*1295d682SXin Li             }
367*1295d682SXin Li             if (p>32000 || (opus_int32)p*(opus_int32)p > n)
368*1295d682SXin Li                 p = n;          /* no more factors, skip to end */
369*1295d682SXin Li         }
370*1295d682SXin Li         n /= p;
371*1295d682SXin Li #ifdef RADIX_TWO_ONLY
372*1295d682SXin Li         if (p!=2 && p != 4)
373*1295d682SXin Li #else
374*1295d682SXin Li         if (p>5)
375*1295d682SXin Li #endif
376*1295d682SXin Li         {
377*1295d682SXin Li            return 0;
378*1295d682SXin Li         }
379*1295d682SXin Li         facbuf[2*stages] = p;
380*1295d682SXin Li         if (p==2 && stages > 1)
381*1295d682SXin Li         {
382*1295d682SXin Li            facbuf[2*stages] = 4;
383*1295d682SXin Li            facbuf[2] = 2;
384*1295d682SXin Li         }
385*1295d682SXin Li         stages++;
386*1295d682SXin Li     } while (n > 1);
387*1295d682SXin Li     n = nbak;
388*1295d682SXin Li     /* Reverse the order to get the radix 4 at the end, so we can use the
389*1295d682SXin Li        fast degenerate case. It turns out that reversing the order also
390*1295d682SXin Li        improves the noise behaviour. */
391*1295d682SXin Li     for (i=0;i<stages/2;i++)
392*1295d682SXin Li     {
393*1295d682SXin Li        int tmp;
394*1295d682SXin Li        tmp = facbuf[2*i];
395*1295d682SXin Li        facbuf[2*i] = facbuf[2*(stages-i-1)];
396*1295d682SXin Li        facbuf[2*(stages-i-1)] = tmp;
397*1295d682SXin Li     }
398*1295d682SXin Li     for (i=0;i<stages;i++)
399*1295d682SXin Li     {
400*1295d682SXin Li         n /= facbuf[2*i];
401*1295d682SXin Li         facbuf[2*i+1] = n;
402*1295d682SXin Li     }
403*1295d682SXin Li     return 1;
404*1295d682SXin Li }
405*1295d682SXin Li 
compute_twiddles(kiss_twiddle_cpx * twiddles,int nfft)406*1295d682SXin Li static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
407*1295d682SXin Li {
408*1295d682SXin Li    int i;
409*1295d682SXin Li #ifdef FIXED_POINT
410*1295d682SXin Li    for (i=0;i<nfft;++i) {
411*1295d682SXin Li       opus_val32 phase = -i;
412*1295d682SXin Li       kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
413*1295d682SXin Li    }
414*1295d682SXin Li #else
415*1295d682SXin Li    for (i=0;i<nfft;++i) {
416*1295d682SXin Li       const double pi=3.14159265358979323846264338327;
417*1295d682SXin Li       double phase = ( -2*pi /nfft ) * i;
418*1295d682SXin Li       kf_cexp(twiddles+i, phase );
419*1295d682SXin Li    }
420*1295d682SXin Li #endif
421*1295d682SXin Li }
422*1295d682SXin Li 
opus_fft_alloc_arch_c(kiss_fft_state * st)423*1295d682SXin Li int opus_fft_alloc_arch_c(kiss_fft_state *st) {
424*1295d682SXin Li    (void)st;
425*1295d682SXin Li    return 0;
426*1295d682SXin Li }
427*1295d682SXin Li 
428*1295d682SXin Li /*
429*1295d682SXin Li  *
430*1295d682SXin Li  * Allocates all necessary storage space for the fft and ifft.
431*1295d682SXin Li  * The return value is a contiguous block of memory.  As such,
432*1295d682SXin Li  * It can be freed with free().
433*1295d682SXin Li  * */
opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,const kiss_fft_state * base,int arch)434*1295d682SXin Li kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
435*1295d682SXin Li                                         const kiss_fft_state *base, int arch)
436*1295d682SXin Li {
437*1295d682SXin Li     kiss_fft_state *st=NULL;
438*1295d682SXin Li     size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
439*1295d682SXin Li 
440*1295d682SXin Li     if ( lenmem==NULL ) {
441*1295d682SXin Li         st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
442*1295d682SXin Li     }else{
443*1295d682SXin Li         if (mem != NULL && *lenmem >= memneeded)
444*1295d682SXin Li             st = (kiss_fft_state*)mem;
445*1295d682SXin Li         *lenmem = memneeded;
446*1295d682SXin Li     }
447*1295d682SXin Li     if (st) {
448*1295d682SXin Li         opus_int16 *bitrev;
449*1295d682SXin Li         kiss_twiddle_cpx *twiddles;
450*1295d682SXin Li 
451*1295d682SXin Li         st->nfft=nfft;
452*1295d682SXin Li #ifdef FIXED_POINT
453*1295d682SXin Li         st->scale_shift = celt_ilog2(st->nfft);
454*1295d682SXin Li         if (st->nfft == 1<<st->scale_shift)
455*1295d682SXin Li            st->scale = Q15ONE;
456*1295d682SXin Li         else
457*1295d682SXin Li            st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
458*1295d682SXin Li #else
459*1295d682SXin Li         st->scale = 1.f/nfft;
460*1295d682SXin Li #endif
461*1295d682SXin Li         if (base != NULL)
462*1295d682SXin Li         {
463*1295d682SXin Li            st->twiddles = base->twiddles;
464*1295d682SXin Li            st->shift = 0;
465*1295d682SXin Li            while (st->shift < 32 && nfft<<st->shift != base->nfft)
466*1295d682SXin Li               st->shift++;
467*1295d682SXin Li            if (st->shift>=32)
468*1295d682SXin Li               goto fail;
469*1295d682SXin Li         } else {
470*1295d682SXin Li            st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
471*1295d682SXin Li            compute_twiddles(twiddles, nfft);
472*1295d682SXin Li            st->shift = -1;
473*1295d682SXin Li         }
474*1295d682SXin Li         if (!kf_factor(nfft,st->factors))
475*1295d682SXin Li         {
476*1295d682SXin Li            goto fail;
477*1295d682SXin Li         }
478*1295d682SXin Li 
479*1295d682SXin Li         /* bitrev */
480*1295d682SXin Li         st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
481*1295d682SXin Li         if (st->bitrev==NULL)
482*1295d682SXin Li             goto fail;
483*1295d682SXin Li         compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
484*1295d682SXin Li 
485*1295d682SXin Li         /* Initialize architecture specific fft parameters */
486*1295d682SXin Li         if (opus_fft_alloc_arch(st, arch))
487*1295d682SXin Li             goto fail;
488*1295d682SXin Li     }
489*1295d682SXin Li     return st;
490*1295d682SXin Li fail:
491*1295d682SXin Li     opus_fft_free(st, arch);
492*1295d682SXin Li     return NULL;
493*1295d682SXin Li }
494*1295d682SXin Li 
opus_fft_alloc(int nfft,void * mem,size_t * lenmem,int arch)495*1295d682SXin Li kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
496*1295d682SXin Li {
497*1295d682SXin Li    return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
498*1295d682SXin Li }
499*1295d682SXin Li 
opus_fft_free_arch_c(kiss_fft_state * st)500*1295d682SXin Li void opus_fft_free_arch_c(kiss_fft_state *st) {
501*1295d682SXin Li    (void)st;
502*1295d682SXin Li }
503*1295d682SXin Li 
opus_fft_free(const kiss_fft_state * cfg,int arch)504*1295d682SXin Li void opus_fft_free(const kiss_fft_state *cfg, int arch)
505*1295d682SXin Li {
506*1295d682SXin Li    if (cfg)
507*1295d682SXin Li    {
508*1295d682SXin Li       opus_fft_free_arch((kiss_fft_state *)cfg, arch);
509*1295d682SXin Li       opus_free((opus_int16*)cfg->bitrev);
510*1295d682SXin Li       if (cfg->shift < 0)
511*1295d682SXin Li          opus_free((kiss_twiddle_cpx*)cfg->twiddles);
512*1295d682SXin Li       opus_free((kiss_fft_state*)cfg);
513*1295d682SXin Li    }
514*1295d682SXin Li }
515*1295d682SXin Li 
516*1295d682SXin Li #endif /* CUSTOM_MODES */
517*1295d682SXin Li 
opus_fft_impl(const kiss_fft_state * st,kiss_fft_cpx * fout)518*1295d682SXin Li void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
519*1295d682SXin Li {
520*1295d682SXin Li     int m2, m;
521*1295d682SXin Li     int p;
522*1295d682SXin Li     int L;
523*1295d682SXin Li     int fstride[MAXFACTORS];
524*1295d682SXin Li     int i;
525*1295d682SXin Li     int shift;
526*1295d682SXin Li 
527*1295d682SXin Li     /* st->shift can be -1 */
528*1295d682SXin Li     shift = st->shift>0 ? st->shift : 0;
529*1295d682SXin Li 
530*1295d682SXin Li     fstride[0] = 1;
531*1295d682SXin Li     L=0;
532*1295d682SXin Li     do {
533*1295d682SXin Li        p = st->factors[2*L];
534*1295d682SXin Li        m = st->factors[2*L+1];
535*1295d682SXin Li        fstride[L+1] = fstride[L]*p;
536*1295d682SXin Li        L++;
537*1295d682SXin Li     } while(m!=1);
538*1295d682SXin Li     m = st->factors[2*L-1];
539*1295d682SXin Li     for (i=L-1;i>=0;i--)
540*1295d682SXin Li     {
541*1295d682SXin Li        if (i!=0)
542*1295d682SXin Li           m2 = st->factors[2*i-1];
543*1295d682SXin Li        else
544*1295d682SXin Li           m2 = 1;
545*1295d682SXin Li        switch (st->factors[2*i])
546*1295d682SXin Li        {
547*1295d682SXin Li        case 2:
548*1295d682SXin Li           kf_bfly2(fout, m, fstride[i]);
549*1295d682SXin Li           break;
550*1295d682SXin Li        case 4:
551*1295d682SXin Li           kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
552*1295d682SXin Li           break;
553*1295d682SXin Li  #ifndef RADIX_TWO_ONLY
554*1295d682SXin Li        case 3:
555*1295d682SXin Li           kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
556*1295d682SXin Li           break;
557*1295d682SXin Li        case 5:
558*1295d682SXin Li           kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
559*1295d682SXin Li           break;
560*1295d682SXin Li  #endif
561*1295d682SXin Li        }
562*1295d682SXin Li        m = m2;
563*1295d682SXin Li     }
564*1295d682SXin Li }
565*1295d682SXin Li 
opus_fft_c(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)566*1295d682SXin Li void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
567*1295d682SXin Li {
568*1295d682SXin Li    int i;
569*1295d682SXin Li    opus_val16 scale;
570*1295d682SXin Li #ifdef FIXED_POINT
571*1295d682SXin Li    /* Allows us to scale with MULT16_32_Q16(), which is faster than
572*1295d682SXin Li       MULT16_32_Q15() on ARM. */
573*1295d682SXin Li    int scale_shift = st->scale_shift-1;
574*1295d682SXin Li #endif
575*1295d682SXin Li    scale = st->scale;
576*1295d682SXin Li 
577*1295d682SXin Li    celt_assert2 (fin != fout, "In-place FFT not supported");
578*1295d682SXin Li    /* Bit-reverse the input */
579*1295d682SXin Li    for (i=0;i<st->nfft;i++)
580*1295d682SXin Li    {
581*1295d682SXin Li       kiss_fft_cpx x = fin[i];
582*1295d682SXin Li       fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
583*1295d682SXin Li       fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
584*1295d682SXin Li    }
585*1295d682SXin Li    opus_fft_impl(st, fout);
586*1295d682SXin Li }
587*1295d682SXin Li 
588*1295d682SXin Li 
opus_ifft_c(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)589*1295d682SXin Li void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
590*1295d682SXin Li {
591*1295d682SXin Li    int i;
592*1295d682SXin Li    celt_assert2 (fin != fout, "In-place FFT not supported");
593*1295d682SXin Li    /* Bit-reverse the input */
594*1295d682SXin Li    for (i=0;i<st->nfft;i++)
595*1295d682SXin Li       fout[st->bitrev[i]] = fin[i];
596*1295d682SXin Li    for (i=0;i<st->nfft;i++)
597*1295d682SXin Li       fout[i].i = -fout[i].i;
598*1295d682SXin Li    opus_fft_impl(st, fout);
599*1295d682SXin Li    for (i=0;i<st->nfft;i++)
600*1295d682SXin Li       fout[i].i = -fout[i].i;
601*1295d682SXin Li }
602