xref: /aosp_15_r20/external/rnnoise/src/celt_lpc.c (revision 1295d6828459cc82c3c29cc5d7d297215250a74b)
1*1295d682SXin Li /* Copyright (c) 2009-2010 Xiph.Org Foundation
2*1295d682SXin Li    Written by Jean-Marc Valin */
3*1295d682SXin Li /*
4*1295d682SXin Li    Redistribution and use in source and binary forms, with or without
5*1295d682SXin Li    modification, are permitted provided that the following conditions
6*1295d682SXin Li    are met:
7*1295d682SXin Li 
8*1295d682SXin Li    - Redistributions of source code must retain the above copyright
9*1295d682SXin Li    notice, this list of conditions and the following disclaimer.
10*1295d682SXin Li 
11*1295d682SXin Li    - Redistributions in binary form must reproduce the above copyright
12*1295d682SXin Li    notice, this list of conditions and the following disclaimer in the
13*1295d682SXin Li    documentation and/or other materials provided with the distribution.
14*1295d682SXin Li 
15*1295d682SXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16*1295d682SXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17*1295d682SXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18*1295d682SXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19*1295d682SXin Li    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20*1295d682SXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21*1295d682SXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22*1295d682SXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23*1295d682SXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24*1295d682SXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*1295d682SXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*1295d682SXin Li */
27*1295d682SXin Li 
28*1295d682SXin Li #ifdef HAVE_CONFIG_H
29*1295d682SXin Li #include "config.h"
30*1295d682SXin Li #endif
31*1295d682SXin Li 
32*1295d682SXin Li #include "celt_lpc.h"
33*1295d682SXin Li #include "arch.h"
34*1295d682SXin Li #include "common.h"
35*1295d682SXin Li #include "pitch.h"
36*1295d682SXin Li 
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37*1295d682SXin Li void _celt_lpc(
38*1295d682SXin Li       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39*1295d682SXin Li const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40*1295d682SXin Li int          p
41*1295d682SXin Li )
42*1295d682SXin Li {
43*1295d682SXin Li    int i, j;
44*1295d682SXin Li    opus_val32 r;
45*1295d682SXin Li    opus_val32 error = ac[0];
46*1295d682SXin Li #ifdef FIXED_POINT
47*1295d682SXin Li    opus_val32 lpc[LPC_ORDER];
48*1295d682SXin Li #else
49*1295d682SXin Li    float *lpc = _lpc;
50*1295d682SXin Li #endif
51*1295d682SXin Li 
52*1295d682SXin Li    RNN_CLEAR(lpc, p);
53*1295d682SXin Li    if (ac[0] != 0)
54*1295d682SXin Li    {
55*1295d682SXin Li       for (i = 0; i < p; i++) {
56*1295d682SXin Li          /* Sum up this iteration's reflection coefficient */
57*1295d682SXin Li          opus_val32 rr = 0;
58*1295d682SXin Li          for (j = 0; j < i; j++)
59*1295d682SXin Li             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
60*1295d682SXin Li          rr += SHR32(ac[i + 1],3);
61*1295d682SXin Li          r = -SHL32(rr,3)/error;
62*1295d682SXin Li          /*  Update LPC coefficients and total error */
63*1295d682SXin Li          lpc[i] = SHR32(r,3);
64*1295d682SXin Li          for (j = 0; j < (i+1)>>1; j++)
65*1295d682SXin Li          {
66*1295d682SXin Li             opus_val32 tmp1, tmp2;
67*1295d682SXin Li             tmp1 = lpc[j];
68*1295d682SXin Li             tmp2 = lpc[i-1-j];
69*1295d682SXin Li             lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
70*1295d682SXin Li             lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
71*1295d682SXin Li          }
72*1295d682SXin Li 
73*1295d682SXin Li          error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
74*1295d682SXin Li          /* Bail out once we get 30 dB gain */
75*1295d682SXin Li #ifdef FIXED_POINT
76*1295d682SXin Li          if (error<SHR32(ac[0],10))
77*1295d682SXin Li             break;
78*1295d682SXin Li #else
79*1295d682SXin Li          if (error<.001f*ac[0])
80*1295d682SXin Li             break;
81*1295d682SXin Li #endif
82*1295d682SXin Li       }
83*1295d682SXin Li    }
84*1295d682SXin Li #ifdef FIXED_POINT
85*1295d682SXin Li    for (i=0;i<p;i++)
86*1295d682SXin Li       _lpc[i] = ROUND16(lpc[i],16);
87*1295d682SXin Li #endif
88*1295d682SXin Li }
89*1295d682SXin Li 
90*1295d682SXin Li 
celt_fir(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,int ord)91*1295d682SXin Li void celt_fir(
92*1295d682SXin Li          const opus_val16 *x,
93*1295d682SXin Li          const opus_val16 *num,
94*1295d682SXin Li          opus_val16 *y,
95*1295d682SXin Li          int N,
96*1295d682SXin Li          int ord)
97*1295d682SXin Li {
98*1295d682SXin Li    int i,j;
99*1295d682SXin Li    opus_val16 rnum[ord];
100*1295d682SXin Li    for(i=0;i<ord;i++)
101*1295d682SXin Li       rnum[i] = num[ord-i-1];
102*1295d682SXin Li    for (i=0;i<N-3;i+=4)
103*1295d682SXin Li    {
104*1295d682SXin Li       opus_val32 sum[4];
105*1295d682SXin Li       sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
106*1295d682SXin Li       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
107*1295d682SXin Li       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
108*1295d682SXin Li       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
109*1295d682SXin Li       xcorr_kernel(rnum, x+i-ord, sum, ord);
110*1295d682SXin Li       y[i  ] = ROUND16(sum[0], SIG_SHIFT);
111*1295d682SXin Li       y[i+1] = ROUND16(sum[1], SIG_SHIFT);
112*1295d682SXin Li       y[i+2] = ROUND16(sum[2], SIG_SHIFT);
113*1295d682SXin Li       y[i+3] = ROUND16(sum[3], SIG_SHIFT);
114*1295d682SXin Li    }
115*1295d682SXin Li    for (;i<N;i++)
116*1295d682SXin Li    {
117*1295d682SXin Li       opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
118*1295d682SXin Li       for (j=0;j<ord;j++)
119*1295d682SXin Li          sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
120*1295d682SXin Li       y[i] = ROUND16(sum, SIG_SHIFT);
121*1295d682SXin Li    }
122*1295d682SXin Li }
123*1295d682SXin Li 
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem)124*1295d682SXin Li void celt_iir(const opus_val32 *_x,
125*1295d682SXin Li          const opus_val16 *den,
126*1295d682SXin Li          opus_val32 *_y,
127*1295d682SXin Li          int N,
128*1295d682SXin Li          int ord,
129*1295d682SXin Li          opus_val16 *mem)
130*1295d682SXin Li {
131*1295d682SXin Li #ifdef SMALL_FOOTPRINT
132*1295d682SXin Li    int i,j;
133*1295d682SXin Li    for (i=0;i<N;i++)
134*1295d682SXin Li    {
135*1295d682SXin Li       opus_val32 sum = _x[i];
136*1295d682SXin Li       for (j=0;j<ord;j++)
137*1295d682SXin Li       {
138*1295d682SXin Li          sum -= MULT16_16(den[j],mem[j]);
139*1295d682SXin Li       }
140*1295d682SXin Li       for (j=ord-1;j>=1;j--)
141*1295d682SXin Li       {
142*1295d682SXin Li          mem[j]=mem[j-1];
143*1295d682SXin Li       }
144*1295d682SXin Li       mem[0] = SROUND16(sum, SIG_SHIFT);
145*1295d682SXin Li       _y[i] = sum;
146*1295d682SXin Li    }
147*1295d682SXin Li #else
148*1295d682SXin Li    int i,j;
149*1295d682SXin Li    celt_assert((ord&3)==0);
150*1295d682SXin Li    opus_val16 rden[ord];
151*1295d682SXin Li    opus_val16 y[N+ord];
152*1295d682SXin Li    for(i=0;i<ord;i++)
153*1295d682SXin Li       rden[i] = den[ord-i-1];
154*1295d682SXin Li    for(i=0;i<ord;i++)
155*1295d682SXin Li       y[i] = -mem[ord-i-1];
156*1295d682SXin Li    for(;i<N+ord;i++)
157*1295d682SXin Li       y[i]=0;
158*1295d682SXin Li    for (i=0;i<N-3;i+=4)
159*1295d682SXin Li    {
160*1295d682SXin Li       /* Unroll by 4 as if it were an FIR filter */
161*1295d682SXin Li       opus_val32 sum[4];
162*1295d682SXin Li       sum[0]=_x[i];
163*1295d682SXin Li       sum[1]=_x[i+1];
164*1295d682SXin Li       sum[2]=_x[i+2];
165*1295d682SXin Li       sum[3]=_x[i+3];
166*1295d682SXin Li       xcorr_kernel(rden, y+i, sum, ord);
167*1295d682SXin Li 
168*1295d682SXin Li       /* Patch up the result to compensate for the fact that this is an IIR */
169*1295d682SXin Li       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
170*1295d682SXin Li       _y[i  ] = sum[0];
171*1295d682SXin Li       sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
172*1295d682SXin Li       y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
173*1295d682SXin Li       _y[i+1] = sum[1];
174*1295d682SXin Li       sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
175*1295d682SXin Li       sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
176*1295d682SXin Li       y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
177*1295d682SXin Li       _y[i+2] = sum[2];
178*1295d682SXin Li 
179*1295d682SXin Li       sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
180*1295d682SXin Li       sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
181*1295d682SXin Li       sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
182*1295d682SXin Li       y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
183*1295d682SXin Li       _y[i+3] = sum[3];
184*1295d682SXin Li    }
185*1295d682SXin Li    for (;i<N;i++)
186*1295d682SXin Li    {
187*1295d682SXin Li       opus_val32 sum = _x[i];
188*1295d682SXin Li       for (j=0;j<ord;j++)
189*1295d682SXin Li          sum -= MULT16_16(rden[j],y[i+j]);
190*1295d682SXin Li       y[i+ord] = SROUND16(sum,SIG_SHIFT);
191*1295d682SXin Li       _y[i] = sum;
192*1295d682SXin Li    }
193*1295d682SXin Li    for(i=0;i<ord;i++)
194*1295d682SXin Li       mem[i] = _y[N-i-1];
195*1295d682SXin Li #endif
196*1295d682SXin Li }
197*1295d682SXin Li 
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n)198*1295d682SXin Li int _celt_autocorr(
199*1295d682SXin Li                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
200*1295d682SXin Li                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
201*1295d682SXin Li                    const opus_val16       *window,
202*1295d682SXin Li                    int          overlap,
203*1295d682SXin Li                    int          lag,
204*1295d682SXin Li                    int          n)
205*1295d682SXin Li {
206*1295d682SXin Li    opus_val32 d;
207*1295d682SXin Li    int i, k;
208*1295d682SXin Li    int fastN=n-lag;
209*1295d682SXin Li    int shift;
210*1295d682SXin Li    const opus_val16 *xptr;
211*1295d682SXin Li    opus_val16 xx[n];
212*1295d682SXin Li    celt_assert(n>0);
213*1295d682SXin Li    celt_assert(overlap>=0);
214*1295d682SXin Li    if (overlap == 0)
215*1295d682SXin Li    {
216*1295d682SXin Li       xptr = x;
217*1295d682SXin Li    } else {
218*1295d682SXin Li       for (i=0;i<n;i++)
219*1295d682SXin Li          xx[i] = x[i];
220*1295d682SXin Li       for (i=0;i<overlap;i++)
221*1295d682SXin Li       {
222*1295d682SXin Li          xx[i] = MULT16_16_Q15(x[i],window[i]);
223*1295d682SXin Li          xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
224*1295d682SXin Li       }
225*1295d682SXin Li       xptr = xx;
226*1295d682SXin Li    }
227*1295d682SXin Li    shift=0;
228*1295d682SXin Li #ifdef FIXED_POINT
229*1295d682SXin Li    {
230*1295d682SXin Li       opus_val32 ac0;
231*1295d682SXin Li       ac0 = 1+(n<<7);
232*1295d682SXin Li       if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
233*1295d682SXin Li       for(i=(n&1);i<n;i+=2)
234*1295d682SXin Li       {
235*1295d682SXin Li          ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
236*1295d682SXin Li          ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
237*1295d682SXin Li       }
238*1295d682SXin Li 
239*1295d682SXin Li       shift = celt_ilog2(ac0)-30+10;
240*1295d682SXin Li       shift = (shift)/2;
241*1295d682SXin Li       if (shift>0)
242*1295d682SXin Li       {
243*1295d682SXin Li          for(i=0;i<n;i++)
244*1295d682SXin Li             xx[i] = PSHR32(xptr[i], shift);
245*1295d682SXin Li          xptr = xx;
246*1295d682SXin Li       } else
247*1295d682SXin Li          shift = 0;
248*1295d682SXin Li    }
249*1295d682SXin Li #endif
250*1295d682SXin Li    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1);
251*1295d682SXin Li    for (k=0;k<=lag;k++)
252*1295d682SXin Li    {
253*1295d682SXin Li       for (i = k+fastN, d = 0; i < n; i++)
254*1295d682SXin Li          d = MAC16_16(d, xptr[i], xptr[i-k]);
255*1295d682SXin Li       ac[k] += d;
256*1295d682SXin Li    }
257*1295d682SXin Li #ifdef FIXED_POINT
258*1295d682SXin Li    shift = 2*shift;
259*1295d682SXin Li    if (shift<=0)
260*1295d682SXin Li       ac[0] += SHL32((opus_int32)1, -shift);
261*1295d682SXin Li    if (ac[0] < 268435456)
262*1295d682SXin Li    {
263*1295d682SXin Li       int shift2 = 29 - EC_ILOG(ac[0]);
264*1295d682SXin Li       for (i=0;i<=lag;i++)
265*1295d682SXin Li          ac[i] = SHL32(ac[i], shift2);
266*1295d682SXin Li       shift -= shift2;
267*1295d682SXin Li    } else if (ac[0] >= 536870912)
268*1295d682SXin Li    {
269*1295d682SXin Li       int shift2=1;
270*1295d682SXin Li       if (ac[0] >= 1073741824)
271*1295d682SXin Li          shift2++;
272*1295d682SXin Li       for (i=0;i<=lag;i++)
273*1295d682SXin Li          ac[i] = SHR32(ac[i], shift2);
274*1295d682SXin Li       shift += shift2;
275*1295d682SXin Li    }
276*1295d682SXin Li #endif
277*1295d682SXin Li 
278*1295d682SXin Li    return shift;
279*1295d682SXin Li }
280