xref: /aosp_15_r20/external/libopus/celt/mips/kiss_fft_mipsr1.h (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /*Copyright (c) 2013, Xiph.Org Foundation and contributors.
2*a58d3d2aSXin Li 
3*a58d3d2aSXin Li   All rights reserved.
4*a58d3d2aSXin Li 
5*a58d3d2aSXin Li   Redistribution and use in source and binary forms, with or without
6*a58d3d2aSXin Li    modification, are permitted provided that the following conditions are met:
7*a58d3d2aSXin Li 
8*a58d3d2aSXin Li     * Redistributions of source code must retain the above copyright notice,
9*a58d3d2aSXin Li        this list of conditions and the following disclaimer.
10*a58d3d2aSXin Li     * Redistributions in binary form must reproduce the above copyright notice,
11*a58d3d2aSXin Li        this list of conditions and the following disclaimer in the
12*a58d3d2aSXin Li        documentation and/or other materials provided with the distribution.
13*a58d3d2aSXin Li 
14*a58d3d2aSXin Li   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15*a58d3d2aSXin Li   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16*a58d3d2aSXin Li   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17*a58d3d2aSXin Li   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18*a58d3d2aSXin Li   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19*a58d3d2aSXin Li   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20*a58d3d2aSXin Li   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21*a58d3d2aSXin Li   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22*a58d3d2aSXin Li   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23*a58d3d2aSXin Li   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24*a58d3d2aSXin Li   POSSIBILITY OF SUCH DAMAGE.*/
25*a58d3d2aSXin Li 
26*a58d3d2aSXin Li #ifndef KISS_FFT_MIPSR1_H
27*a58d3d2aSXin Li #define KISS_FFT_MIPSR1_H
28*a58d3d2aSXin Li 
29*a58d3d2aSXin Li #if !defined(KISS_FFT_GUTS_H)
30*a58d3d2aSXin Li #error "This file should only be included from _kiss_fft_guts.h"
31*a58d3d2aSXin Li #endif
32*a58d3d2aSXin Li 
33*a58d3d2aSXin Li #ifdef FIXED_POINT
34*a58d3d2aSXin Li 
35*a58d3d2aSXin Li #define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
36*a58d3d2aSXin Li #define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
37*a58d3d2aSXin Li 
38*a58d3d2aSXin Li #undef S_MUL_ADD
S_MUL_ADD(int a,int b,int c,int d)39*a58d3d2aSXin Li static inline int S_MUL_ADD(int a, int b, int c, int d) {
40*a58d3d2aSXin Li     int m;
41*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
42*a58d3d2aSXin Li     asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
43*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
44*a58d3d2aSXin Li     return m;
45*a58d3d2aSXin Li }
46*a58d3d2aSXin Li 
47*a58d3d2aSXin Li #undef S_MUL_SUB
S_MUL_SUB(int a,int b,int c,int d)48*a58d3d2aSXin Li static inline int S_MUL_SUB(int a, int b, int c, int d) {
49*a58d3d2aSXin Li     int m;
50*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
51*a58d3d2aSXin Li     asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
52*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
53*a58d3d2aSXin Li     return m;
54*a58d3d2aSXin Li }
55*a58d3d2aSXin Li 
56*a58d3d2aSXin Li #undef C_MUL
57*a58d3d2aSXin Li #   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
C_MUL_fun(kiss_fft_cpx a,kiss_twiddle_cpx b)58*a58d3d2aSXin Li static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
59*a58d3d2aSXin Li     kiss_fft_cpx m;
60*a58d3d2aSXin Li 
61*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
62*a58d3d2aSXin Li     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
63*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
64*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
65*a58d3d2aSXin Li     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
66*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
67*a58d3d2aSXin Li 
68*a58d3d2aSXin Li     return m;
69*a58d3d2aSXin Li }
70*a58d3d2aSXin Li #undef C_MULC
71*a58d3d2aSXin Li #   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
C_MULC_fun(kiss_fft_cpx a,kiss_twiddle_cpx b)72*a58d3d2aSXin Li static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
73*a58d3d2aSXin Li     kiss_fft_cpx m;
74*a58d3d2aSXin Li 
75*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
76*a58d3d2aSXin Li     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
77*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
78*a58d3d2aSXin Li     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
79*a58d3d2aSXin Li     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
80*a58d3d2aSXin Li     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
81*a58d3d2aSXin Li 
82*a58d3d2aSXin Li     return m;
83*a58d3d2aSXin Li }
84*a58d3d2aSXin Li 
85*a58d3d2aSXin Li #endif /* FIXED_POINT */
86*a58d3d2aSXin Li 
87*a58d3d2aSXin Li #define OVERRIDE_kf_bfly5
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)88*a58d3d2aSXin Li static void kf_bfly5(
89*a58d3d2aSXin Li                      kiss_fft_cpx * Fout,
90*a58d3d2aSXin Li                      const size_t fstride,
91*a58d3d2aSXin Li                      const kiss_fft_state *st,
92*a58d3d2aSXin Li                      int m,
93*a58d3d2aSXin Li                      int N,
94*a58d3d2aSXin Li                      int mm
95*a58d3d2aSXin Li                     )
96*a58d3d2aSXin Li {
97*a58d3d2aSXin Li    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
98*a58d3d2aSXin Li    int i, u;
99*a58d3d2aSXin Li    kiss_fft_cpx scratch[13];
100*a58d3d2aSXin Li 
101*a58d3d2aSXin Li    const kiss_twiddle_cpx *tw;
102*a58d3d2aSXin Li    kiss_twiddle_cpx ya,yb;
103*a58d3d2aSXin Li    kiss_fft_cpx * Fout_beg = Fout;
104*a58d3d2aSXin Li 
105*a58d3d2aSXin Li #ifdef FIXED_POINT
106*a58d3d2aSXin Li    ya.r = 10126;
107*a58d3d2aSXin Li    ya.i = -31164;
108*a58d3d2aSXin Li    yb.r = -26510;
109*a58d3d2aSXin Li    yb.i = -19261;
110*a58d3d2aSXin Li #else
111*a58d3d2aSXin Li    ya = st->twiddles[fstride*m];
112*a58d3d2aSXin Li    yb = st->twiddles[fstride*2*m];
113*a58d3d2aSXin Li #endif
114*a58d3d2aSXin Li 
115*a58d3d2aSXin Li    tw=st->twiddles;
116*a58d3d2aSXin Li 
117*a58d3d2aSXin Li    for (i=0;i<N;i++)
118*a58d3d2aSXin Li    {
119*a58d3d2aSXin Li       Fout = Fout_beg + i*mm;
120*a58d3d2aSXin Li       Fout0=Fout;
121*a58d3d2aSXin Li       Fout1=Fout0+m;
122*a58d3d2aSXin Li       Fout2=Fout0+2*m;
123*a58d3d2aSXin Li       Fout3=Fout0+3*m;
124*a58d3d2aSXin Li       Fout4=Fout0+4*m;
125*a58d3d2aSXin Li 
126*a58d3d2aSXin Li       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
127*a58d3d2aSXin Li       for ( u=0; u<m; ++u ) {
128*a58d3d2aSXin Li          scratch[0] = *Fout0;
129*a58d3d2aSXin Li 
130*a58d3d2aSXin Li 
131*a58d3d2aSXin Li          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
132*a58d3d2aSXin Li          C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
133*a58d3d2aSXin Li          C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
134*a58d3d2aSXin Li          C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
135*a58d3d2aSXin Li 
136*a58d3d2aSXin Li          C_ADD( scratch[7],scratch[1],scratch[4]);
137*a58d3d2aSXin Li          C_SUB( scratch[10],scratch[1],scratch[4]);
138*a58d3d2aSXin Li          C_ADD( scratch[8],scratch[2],scratch[3]);
139*a58d3d2aSXin Li          C_SUB( scratch[9],scratch[2],scratch[3]);
140*a58d3d2aSXin Li 
141*a58d3d2aSXin Li          Fout0->r += scratch[7].r + scratch[8].r;
142*a58d3d2aSXin Li          Fout0->i += scratch[7].i + scratch[8].i;
143*a58d3d2aSXin Li          scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
144*a58d3d2aSXin Li          scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
145*a58d3d2aSXin Li 
146*a58d3d2aSXin Li          scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
147*a58d3d2aSXin Li          scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
148*a58d3d2aSXin Li 
149*a58d3d2aSXin Li          C_SUB(*Fout1,scratch[5],scratch[6]);
150*a58d3d2aSXin Li          C_ADD(*Fout4,scratch[5],scratch[6]);
151*a58d3d2aSXin Li 
152*a58d3d2aSXin Li          scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
153*a58d3d2aSXin Li          scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
154*a58d3d2aSXin Li 
155*a58d3d2aSXin Li          scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
156*a58d3d2aSXin Li          scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
157*a58d3d2aSXin Li 
158*a58d3d2aSXin Li          C_ADD(*Fout2,scratch[11],scratch[12]);
159*a58d3d2aSXin Li          C_SUB(*Fout3,scratch[11],scratch[12]);
160*a58d3d2aSXin Li 
161*a58d3d2aSXin Li          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
162*a58d3d2aSXin Li       }
163*a58d3d2aSXin Li    }
164*a58d3d2aSXin Li }
165*a58d3d2aSXin Li 
166*a58d3d2aSXin Li 
167*a58d3d2aSXin Li #endif /* KISS_FFT_MIPSR1_H */
168