1 /* Copyright (c) 2009-2010 Xiph.Org Foundation
2 Written by Jean-Marc Valin */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include "celt_lpc.h"
33 #include "stack_alloc.h"
34 #include "mathops.h"
35 #include "pitch.h"
36
_celt_lpc(opus_val16 * _lpc,const opus_val32 * ac,int p)37 void _celt_lpc(
38 opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
39 const opus_val32 *ac, /* in: [0...p] autocorrelation values */
40 int p
41 )
42 {
43 int i, j;
44 opus_val32 r;
45 opus_val32 error = ac[0];
46 #ifdef FIXED_POINT
47 opus_val32 lpc[CELT_LPC_ORDER];
48 #else
49 float *lpc = _lpc;
50 #endif
51
52 OPUS_CLEAR(lpc, p);
53 #ifdef FIXED_POINT
54 if (ac[0] != 0)
55 #else
56 if (ac[0] > 1e-10f)
57 #endif
58 {
59 for (i = 0; i < p; i++) {
60 /* Sum up this iteration's reflection coefficient */
61 opus_val32 rr = 0;
62 for (j = 0; j < i; j++)
63 rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64 rr += SHR32(ac[i + 1],6);
65 r = -frac_div32(SHL32(rr,6), error);
66 /* Update LPC coefficients and total error */
67 lpc[i] = SHR32(r,6);
68 for (j = 0; j < (i+1)>>1; j++)
69 {
70 opus_val32 tmp1, tmp2;
71 tmp1 = lpc[j];
72 tmp2 = lpc[i-1-j];
73 lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
74 lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75 }
76
77 error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78 /* Bail out once we get 30 dB gain */
79 #ifdef FIXED_POINT
80 if (error<=SHR32(ac[0],10))
81 break;
82 #else
83 if (error<=.001f*ac[0])
84 break;
85 #endif
86 }
87 }
88 #ifdef FIXED_POINT
89 {
90 /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91 This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92 fixes should also be applied there. */
93 int iter, idx = 0;
94 opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96 for (iter = 0; iter < 10; iter++) {
97 maxabs = 0;
98 for (i = 0; i < p; i++) {
99 absval = ABS32(lpc[i]);
100 if (absval > maxabs) {
101 maxabs = absval;
102 idx = i;
103 }
104 }
105 maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */
106
107 if (maxabs > 32767) {
108 maxabs = MIN32(maxabs, 163838);
109 chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110 SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111 chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113 /* Apply bandwidth expansion. */
114 for (i = 0; i < p - 1; i++) {
115 lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116 chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117 }
118 lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119 } else {
120 break;
121 }
122 }
123
124 if (iter == 10) {
125 /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126 fall back to the A(z)=1 filter. */
127 OPUS_CLEAR(lpc, p);
128 _lpc[0] = 4096; /* Q12 */
129 } else {
130 for (i = 0; i < p; i++) {
131 _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */
132 }
133 }
134 }
135 #endif
136 }
137
138
celt_fir_c(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,int ord,int arch)139 void celt_fir_c(
140 const opus_val16 *x,
141 const opus_val16 *num,
142 opus_val16 *y,
143 int N,
144 int ord,
145 int arch)
146 {
147 int i,j;
148 VARDECL(opus_val16, rnum);
149 SAVE_STACK;
150 celt_assert(x != y);
151 ALLOC(rnum, ord, opus_val16);
152 for(i=0;i<ord;i++)
153 rnum[i] = num[ord-i-1];
154 for (i=0;i<N-3;i+=4)
155 {
156 opus_val32 sum[4];
157 sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT);
158 sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159 sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160 sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162 {
163 opus_val32 sum_c[4];
164 memcpy(sum_c, sum, sizeof(sum_c));
165 xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166 #endif
167 xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169 celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170 }
171 #endif
172 y[i ] = SROUND16(sum[0], SIG_SHIFT);
173 y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174 y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175 y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176 }
177 for (;i<N;i++)
178 {
179 opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180 for (j=0;j<ord;j++)
181 sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182 y[i] = SROUND16(sum, SIG_SHIFT);
183 }
184 RESTORE_STACK;
185 }
186
celt_iir(const opus_val32 * _x,const opus_val16 * den,opus_val32 * _y,int N,int ord,opus_val16 * mem,int arch)187 void celt_iir(const opus_val32 *_x,
188 const opus_val16 *den,
189 opus_val32 *_y,
190 int N,
191 int ord,
192 opus_val16 *mem,
193 int arch)
194 {
195 #ifdef SMALL_FOOTPRINT
196 int i,j;
197 (void)arch;
198 for (i=0;i<N;i++)
199 {
200 opus_val32 sum = _x[i];
201 for (j=0;j<ord;j++)
202 {
203 sum -= MULT16_16(den[j],mem[j]);
204 }
205 for (j=ord-1;j>=1;j--)
206 {
207 mem[j]=mem[j-1];
208 }
209 mem[0] = SROUND16(sum, SIG_SHIFT);
210 _y[i] = sum;
211 }
212 #else
213 int i,j;
214 VARDECL(opus_val16, rden);
215 VARDECL(opus_val16, y);
216 SAVE_STACK;
217
218 celt_assert((ord&3)==0);
219 ALLOC(rden, ord, opus_val16);
220 ALLOC(y, N+ord, opus_val16);
221 for(i=0;i<ord;i++)
222 rden[i] = den[ord-i-1];
223 for(i=0;i<ord;i++)
224 y[i] = -mem[ord-i-1];
225 for(;i<N+ord;i++)
226 y[i]=0;
227 for (i=0;i<N-3;i+=4)
228 {
229 /* Unroll by 4 as if it were an FIR filter */
230 opus_val32 sum[4];
231 sum[0]=_x[i];
232 sum[1]=_x[i+1];
233 sum[2]=_x[i+2];
234 sum[3]=_x[i+3];
235 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236 {
237 opus_val32 sum_c[4];
238 memcpy(sum_c, sum, sizeof(sum_c));
239 xcorr_kernel_c(rden, y+i, sum_c, ord);
240 #endif
241 xcorr_kernel(rden, y+i, sum, ord, arch);
242 #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243 celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244 }
245 #endif
246 /* Patch up the result to compensate for the fact that this is an IIR */
247 y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
248 _y[i ] = sum[0];
249 sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]);
250 y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251 _y[i+1] = sum[1];
252 sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253 sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]);
254 y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255 _y[i+2] = sum[2];
256
257 sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258 sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259 sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]);
260 y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261 _y[i+3] = sum[3];
262 }
263 for (;i<N;i++)
264 {
265 opus_val32 sum = _x[i];
266 for (j=0;j<ord;j++)
267 sum -= MULT16_16(rden[j],y[i+j]);
268 y[i+ord] = SROUND16(sum,SIG_SHIFT);
269 _y[i] = sum;
270 }
271 for(i=0;i<ord;i++)
272 mem[i] = _y[N-i-1];
273 RESTORE_STACK;
274 #endif
275 }
276
_celt_autocorr(const opus_val16 * x,opus_val32 * ac,const opus_val16 * window,int overlap,int lag,int n,int arch)277 int _celt_autocorr(
278 const opus_val16 *x, /* in: [0...n-1] samples x */
279 opus_val32 *ac, /* out: [0...lag-1] ac values */
280 const opus_val16 *window,
281 int overlap,
282 int lag,
283 int n,
284 int arch
285 )
286 {
287 opus_val32 d;
288 int i, k;
289 int fastN=n-lag;
290 int shift;
291 const opus_val16 *xptr;
292 VARDECL(opus_val16, xx);
293 SAVE_STACK;
294 ALLOC(xx, n, opus_val16);
295 celt_assert(n>0);
296 celt_assert(overlap>=0);
297 if (overlap == 0)
298 {
299 xptr = x;
300 } else {
301 for (i=0;i<n;i++)
302 xx[i] = x[i];
303 for (i=0;i<overlap;i++)
304 {
305 xx[i] = MULT16_16_Q15(x[i],window[i]);
306 xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
307 }
308 xptr = xx;
309 }
310 shift=0;
311 #ifdef FIXED_POINT
312 {
313 opus_val32 ac0;
314 ac0 = 1+(n<<7);
315 if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9);
316 for(i=(n&1);i<n;i+=2)
317 {
318 ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9);
319 ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9);
320 }
321
322 shift = celt_ilog2(ac0)-30+10;
323 shift = (shift)/2;
324 if (shift>0)
325 {
326 for(i=0;i<n;i++)
327 xx[i] = PSHR32(xptr[i], shift);
328 xptr = xx;
329 } else
330 shift = 0;
331 }
332 #endif
333 celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
334 for (k=0;k<=lag;k++)
335 {
336 for (i = k+fastN, d = 0; i < n; i++)
337 d = MAC16_16(d, xptr[i], xptr[i-k]);
338 ac[k] += d;
339 }
340 #ifdef FIXED_POINT
341 shift = 2*shift;
342 if (shift<=0)
343 ac[0] += SHL32((opus_int32)1, -shift);
344 if (ac[0] < 268435456)
345 {
346 int shift2 = 29 - EC_ILOG(ac[0]);
347 for (i=0;i<=lag;i++)
348 ac[i] = SHL32(ac[i], shift2);
349 shift -= shift2;
350 } else if (ac[0] >= 536870912)
351 {
352 int shift2=1;
353 if (ac[0] >= 1073741824)
354 shift2++;
355 for (i=0;i<=lag;i++)
356 ac[i] = SHR32(ac[i], shift2);
357 shift += shift2;
358 }
359 #endif
360
361 RESTORE_STACK;
362 return shift;
363 }
364