xref: /aosp_15_r20/external/libopus/celt/celt_lpc.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
2    Written by Jean-Marc Valin */
3 /*
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 
32 #include "celt_lpc.h"
33 #include "stack_alloc.h"
34 #include "mathops.h"
35 #include "pitch.h"
36 
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37 void _celt_lpc(
38       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39 const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40 int          p
41 )
42 {
43    int i, j;
44    opus_val32 r;
45    opus_val32 error = ac[0];
46 #ifdef FIXED_POINT
47    opus_val32 lpc[CELT_LPC_ORDER];
48 #else
49    float *lpc = _lpc;
50 #endif
51 
52    OPUS_CLEAR(lpc, p);
53 #ifdef FIXED_POINT
54    if (ac[0] != 0)
55 #else
56    if (ac[0] > 1e-10f)
57 #endif
58    {
59       for (i = 0; i < p; i++) {
60          /* Sum up this iteration's reflection coefficient */
61          opus_val32 rr = 0;
62          for (j = 0; j < i; j++)
63             rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64          rr += SHR32(ac[i + 1],6);
65          r = -frac_div32(SHL32(rr,6), error);
66          /*  Update LPC coefficients and total error */
67          lpc[i] = SHR32(r,6);
68          for (j = 0; j < (i+1)>>1; j++)
69          {
70             opus_val32 tmp1, tmp2;
71             tmp1 = lpc[j];
72             tmp2 = lpc[i-1-j];
73             lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74             lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75          }
76 
77          error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78          /* Bail out once we get 30 dB gain */
79 #ifdef FIXED_POINT
80          if (error<=SHR32(ac[0],10))
81             break;
82 #else
83          if (error<=.001f*ac[0])
84             break;
85 #endif
86       }
87    }
88 #ifdef FIXED_POINT
89    {
90       /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91          This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92          fixes should also be applied there. */
93       int iter, idx = 0;
94       opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95 
96       for (iter = 0; iter < 10; iter++) {
97          maxabs = 0;
98          for (i = 0; i < p; i++) {
99             absval = ABS32(lpc[i]);
100             if (absval > maxabs) {
101                maxabs = absval;
102                idx = i;
103             }
104          }
105          maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106 
107          if (maxabs > 32767) {
108             maxabs = MIN32(maxabs, 163838);
109             chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110                                                     SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111             chirp_minus_one_Q16 = chirp_Q16 - 65536;
112 
113             /* Apply bandwidth expansion. */
114             for (i = 0; i < p - 1; i++) {
115                lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116                chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117             }
118             lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119          } else {
120             break;
121          }
122       }
123 
124       if (iter == 10) {
125          /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126             fall back to the A(z)=1 filter. */
127          OPUS_CLEAR(lpc, p);
128          _lpc[0] = 4096;  /* Q12 */
129       } else {
130          for (i = 0; i < p; i++) {
131             _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132          }
133       }
134    }
135 #endif
136 }
137 
138 
celt_fir_c(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,int ord,int arch)139 void celt_fir_c(
140          const opus_val16 *x,
141          const opus_val16 *num,
142          opus_val16 *y,
143          int N,
144          int ord,
145          int arch)
146 {
147    int i,j;
148    VARDECL(opus_val16, rnum);
149    SAVE_STACK;
150    celt_assert(x != y);
151    ALLOC(rnum, ord, opus_val16);
152    for(i=0;i<ord;i++)
153       rnum[i] = num[ord-i-1];
154    for (i=0;i<N-3;i+=4)
155    {
156       opus_val32 sum[4];
157       sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162       {
163          opus_val32 sum_c[4];
164          memcpy(sum_c, sum, sizeof(sum_c));
165          xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166 #endif
167          xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169          celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170       }
171 #endif
172       y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173       y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174       y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175       y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176    }
177    for (;i<N;i++)
178    {
179       opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180       for (j=0;j<ord;j++)
181          sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182       y[i] = SROUND16(sum, SIG_SHIFT);
183    }
184    RESTORE_STACK;
185 }
186 
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem,int arch)187 void celt_iir(const opus_val32 *_x,
188          const opus_val16 *den,
189          opus_val32 *_y,
190          int N,
191          int ord,
192          opus_val16 *mem,
193          int arch)
194 {
195 #ifdef SMALL_FOOTPRINT
196    int i,j;
197    (void)arch;
198    for (i=0;i<N;i++)
199    {
200       opus_val32 sum = _x[i];
201       for (j=0;j<ord;j++)
202       {
203          sum -= MULT16_16(den[j],mem[j]);
204       }
205       for (j=ord-1;j>=1;j--)
206       {
207          mem[j]=mem[j-1];
208       }
209       mem[0] = SROUND16(sum, SIG_SHIFT);
210       _y[i] = sum;
211    }
212 #else
213    int i,j;
214    VARDECL(opus_val16, rden);
215    VARDECL(opus_val16, y);
216    SAVE_STACK;
217 
218    celt_assert((ord&3)==0);
219    ALLOC(rden, ord, opus_val16);
220    ALLOC(y, N+ord, opus_val16);
221    for(i=0;i<ord;i++)
222       rden[i] = den[ord-i-1];
223    for(i=0;i<ord;i++)
224       y[i] = -mem[ord-i-1];
225    for(;i<N+ord;i++)
226       y[i]=0;
227    for (i=0;i<N-3;i+=4)
228    {
229       /* Unroll by 4 as if it were an FIR filter */
230       opus_val32 sum[4];
231       sum[0]=_x[i];
232       sum[1]=_x[i+1];
233       sum[2]=_x[i+2];
234       sum[3]=_x[i+3];
235 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236       {
237          opus_val32 sum_c[4];
238          memcpy(sum_c, sum, sizeof(sum_c));
239          xcorr_kernel_c(rden, y+i, sum_c, ord);
240 #endif
241          xcorr_kernel(rden, y+i, sum, ord, arch);
242 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243          celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244       }
245 #endif
246       /* Patch up the result to compensate for the fact that this is an IIR */
247       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248       _y[i  ] = sum[0];
249       sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250       y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251       _y[i+1] = sum[1];
252       sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253       sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254       y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255       _y[i+2] = sum[2];
256 
257       sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258       sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259       sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260       y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261       _y[i+3] = sum[3];
262    }
263    for (;i<N;i++)
264    {
265       opus_val32 sum = _x[i];
266       for (j=0;j<ord;j++)
267          sum -= MULT16_16(rden[j],y[i+j]);
268       y[i+ord] = SROUND16(sum,SIG_SHIFT);
269       _y[i] = sum;
270    }
271    for(i=0;i<ord;i++)
272       mem[i] = _y[N-i-1];
273    RESTORE_STACK;
274 #endif
275 }
276 
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)277 int _celt_autocorr(
278                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
279                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
280                    const opus_val16       *window,
281                    int          overlap,
282                    int          lag,
283                    int          n,
284                    int          arch
285                   )
286 {
287    opus_val32 d;
288    int i, k;
289    int fastN=n-lag;
290    int shift;
291    const opus_val16 *xptr;
292    VARDECL(opus_val16, xx);
293    SAVE_STACK;
294    ALLOC(xx, n, opus_val16);
295    celt_assert(n>0);
296    celt_assert(overlap>=0);
297    if (overlap == 0)
298    {
299       xptr = x;
300    } else {
301       for (i=0;i<n;i++)
302          xx[i] = x[i];
303       for (i=0;i<overlap;i++)
304       {
305          xx[i] = MULT16_16_Q15(x[i],window[i]);
306          xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
307       }
308       xptr = xx;
309    }
310    shift=0;
311 #ifdef FIXED_POINT
312    {
313       opus_val32 ac0;
314       ac0 = 1+(n<<7);
315       if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
316       for(i=(n&1);i<n;i+=2)
317       {
318          ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
319          ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
320       }
321 
322       shift = celt_ilog2(ac0)-30+10;
323       shift = (shift)/2;
324       if (shift>0)
325       {
326          for(i=0;i<n;i++)
327             xx[i] = PSHR32(xptr[i], shift);
328          xptr = xx;
329       } else
330          shift = 0;
331    }
332 #endif
333    celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
334    for (k=0;k<=lag;k++)
335    {
336       for (i = k+fastN, d = 0; i < n; i++)
337          d = MAC16_16(d, xptr[i], xptr[i-k]);
338       ac[k] += d;
339    }
340 #ifdef FIXED_POINT
341    shift = 2*shift;
342    if (shift<=0)
343       ac[0] += SHL32((opus_int32)1, -shift);
344    if (ac[0] < 268435456)
345    {
346       int shift2 = 29 - EC_ILOG(ac[0]);
347       for (i=0;i<=lag;i++)
348          ac[i] = SHL32(ac[i], shift2);
349       shift -= shift2;
350    } else if (ac[0] >= 536870912)
351    {
352       int shift2=1;
353       if (ac[0] >= 1073741824)
354          shift2++;
355       for (i=0;i<=lag;i++)
356          ac[i] = SHR32(ac[i], shift2);
357       shift += shift2;
358    }
359 #endif
360 
361    RESTORE_STACK;
362    return shift;
363 }
364