1*1295d682SXin Li /* Copyright (c) 2007-2008 CSIRO
2*1295d682SXin Li Copyright (c) 2007-2009 Xiph.Org Foundation
3*1295d682SXin Li Written by Jean-Marc Valin */
4*1295d682SXin Li /**
5*1295d682SXin Li @file pitch.c
6*1295d682SXin Li @brief Pitch analysis
7*1295d682SXin Li */
8*1295d682SXin Li
9*1295d682SXin Li /*
10*1295d682SXin Li Redistribution and use in source and binary forms, with or without
11*1295d682SXin Li modification, are permitted provided that the following conditions
12*1295d682SXin Li are met:
13*1295d682SXin Li
14*1295d682SXin Li - Redistributions of source code must retain the above copyright
15*1295d682SXin Li notice, this list of conditions and the following disclaimer.
16*1295d682SXin Li
17*1295d682SXin Li - Redistributions in binary form must reproduce the above copyright
18*1295d682SXin Li notice, this list of conditions and the following disclaimer in the
19*1295d682SXin Li documentation and/or other materials provided with the distribution.
20*1295d682SXin Li
21*1295d682SXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*1295d682SXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*1295d682SXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*1295d682SXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25*1295d682SXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26*1295d682SXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27*1295d682SXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28*1295d682SXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*1295d682SXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*1295d682SXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*1295d682SXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*1295d682SXin Li */
33*1295d682SXin Li
34*1295d682SXin Li #ifdef HAVE_CONFIG_H
35*1295d682SXin Li #include "config.h"
36*1295d682SXin Li #endif
37*1295d682SXin Li
38*1295d682SXin Li #include "pitch.h"
39*1295d682SXin Li #include "common.h"
40*1295d682SXin Li //#include "modes.h"
41*1295d682SXin Li //#include "stack_alloc.h"
42*1295d682SXin Li //#include "mathops.h"
43*1295d682SXin Li #include "celt_lpc.h"
44*1295d682SXin Li #include "math.h"
45*1295d682SXin Li
find_best_pitch(opus_val32 * xcorr,opus_val16 * y,int len,int max_pitch,int * best_pitch,int yshift,opus_val32 maxcorr)46*1295d682SXin Li static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
47*1295d682SXin Li int max_pitch, int *best_pitch
48*1295d682SXin Li #ifdef FIXED_POINT
49*1295d682SXin Li , int yshift, opus_val32 maxcorr
50*1295d682SXin Li #endif
51*1295d682SXin Li )
52*1295d682SXin Li {
53*1295d682SXin Li int i, j;
54*1295d682SXin Li opus_val32 Syy=1;
55*1295d682SXin Li opus_val16 best_num[2];
56*1295d682SXin Li opus_val32 best_den[2];
57*1295d682SXin Li #ifdef FIXED_POINT
58*1295d682SXin Li int xshift;
59*1295d682SXin Li
60*1295d682SXin Li xshift = celt_ilog2(maxcorr)-14;
61*1295d682SXin Li #endif
62*1295d682SXin Li
63*1295d682SXin Li best_num[0] = -1;
64*1295d682SXin Li best_num[1] = -1;
65*1295d682SXin Li best_den[0] = 0;
66*1295d682SXin Li best_den[1] = 0;
67*1295d682SXin Li best_pitch[0] = 0;
68*1295d682SXin Li best_pitch[1] = 1;
69*1295d682SXin Li for (j=0;j<len;j++)
70*1295d682SXin Li Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
71*1295d682SXin Li for (i=0;i<max_pitch;i++)
72*1295d682SXin Li {
73*1295d682SXin Li if (xcorr[i]>0)
74*1295d682SXin Li {
75*1295d682SXin Li opus_val16 num;
76*1295d682SXin Li opus_val32 xcorr16;
77*1295d682SXin Li xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
78*1295d682SXin Li #ifndef FIXED_POINT
79*1295d682SXin Li /* Considering the range of xcorr16, this should avoid both underflows
80*1295d682SXin Li and overflows (inf) when squaring xcorr16 */
81*1295d682SXin Li xcorr16 *= 1e-12f;
82*1295d682SXin Li #endif
83*1295d682SXin Li num = MULT16_16_Q15(xcorr16,xcorr16);
84*1295d682SXin Li if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
85*1295d682SXin Li {
86*1295d682SXin Li if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
87*1295d682SXin Li {
88*1295d682SXin Li best_num[1] = best_num[0];
89*1295d682SXin Li best_den[1] = best_den[0];
90*1295d682SXin Li best_pitch[1] = best_pitch[0];
91*1295d682SXin Li best_num[0] = num;
92*1295d682SXin Li best_den[0] = Syy;
93*1295d682SXin Li best_pitch[0] = i;
94*1295d682SXin Li } else {
95*1295d682SXin Li best_num[1] = num;
96*1295d682SXin Li best_den[1] = Syy;
97*1295d682SXin Li best_pitch[1] = i;
98*1295d682SXin Li }
99*1295d682SXin Li }
100*1295d682SXin Li }
101*1295d682SXin Li Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
102*1295d682SXin Li Syy = MAX32(1, Syy);
103*1295d682SXin Li }
104*1295d682SXin Li }
105*1295d682SXin Li
celt_fir5(const opus_val16 * x,const opus_val16 * num,opus_val16 * y,int N,opus_val16 * mem)106*1295d682SXin Li static void celt_fir5(const opus_val16 *x,
107*1295d682SXin Li const opus_val16 *num,
108*1295d682SXin Li opus_val16 *y,
109*1295d682SXin Li int N,
110*1295d682SXin Li opus_val16 *mem)
111*1295d682SXin Li {
112*1295d682SXin Li int i;
113*1295d682SXin Li opus_val16 num0, num1, num2, num3, num4;
114*1295d682SXin Li opus_val32 mem0, mem1, mem2, mem3, mem4;
115*1295d682SXin Li num0=num[0];
116*1295d682SXin Li num1=num[1];
117*1295d682SXin Li num2=num[2];
118*1295d682SXin Li num3=num[3];
119*1295d682SXin Li num4=num[4];
120*1295d682SXin Li mem0=mem[0];
121*1295d682SXin Li mem1=mem[1];
122*1295d682SXin Li mem2=mem[2];
123*1295d682SXin Li mem3=mem[3];
124*1295d682SXin Li mem4=mem[4];
125*1295d682SXin Li for (i=0;i<N;i++)
126*1295d682SXin Li {
127*1295d682SXin Li opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
128*1295d682SXin Li sum = MAC16_16(sum,num0,mem0);
129*1295d682SXin Li sum = MAC16_16(sum,num1,mem1);
130*1295d682SXin Li sum = MAC16_16(sum,num2,mem2);
131*1295d682SXin Li sum = MAC16_16(sum,num3,mem3);
132*1295d682SXin Li sum = MAC16_16(sum,num4,mem4);
133*1295d682SXin Li mem4 = mem3;
134*1295d682SXin Li mem3 = mem2;
135*1295d682SXin Li mem2 = mem1;
136*1295d682SXin Li mem1 = mem0;
137*1295d682SXin Li mem0 = x[i];
138*1295d682SXin Li y[i] = ROUND16(sum, SIG_SHIFT);
139*1295d682SXin Li }
140*1295d682SXin Li mem[0]=mem0;
141*1295d682SXin Li mem[1]=mem1;
142*1295d682SXin Li mem[2]=mem2;
143*1295d682SXin Li mem[3]=mem3;
144*1295d682SXin Li mem[4]=mem4;
145*1295d682SXin Li }
146*1295d682SXin Li
147*1295d682SXin Li
pitch_downsample(celt_sig * x[],opus_val16 * x_lp,int len,int C)148*1295d682SXin Li void pitch_downsample(celt_sig *x[], opus_val16 *x_lp,
149*1295d682SXin Li int len, int C)
150*1295d682SXin Li {
151*1295d682SXin Li int i;
152*1295d682SXin Li opus_val32 ac[5];
153*1295d682SXin Li opus_val16 tmp=Q15ONE;
154*1295d682SXin Li opus_val16 lpc[4], mem[5]={0,0,0,0,0};
155*1295d682SXin Li opus_val16 lpc2[5];
156*1295d682SXin Li opus_val16 c1 = QCONST16(.8f,15);
157*1295d682SXin Li #ifdef FIXED_POINT
158*1295d682SXin Li int shift;
159*1295d682SXin Li opus_val32 maxabs = celt_maxabs32(x[0], len);
160*1295d682SXin Li if (C==2)
161*1295d682SXin Li {
162*1295d682SXin Li opus_val32 maxabs_1 = celt_maxabs32(x[1], len);
163*1295d682SXin Li maxabs = MAX32(maxabs, maxabs_1);
164*1295d682SXin Li }
165*1295d682SXin Li if (maxabs<1)
166*1295d682SXin Li maxabs=1;
167*1295d682SXin Li shift = celt_ilog2(maxabs)-10;
168*1295d682SXin Li if (shift<0)
169*1295d682SXin Li shift=0;
170*1295d682SXin Li if (C==2)
171*1295d682SXin Li shift++;
172*1295d682SXin Li #endif
173*1295d682SXin Li for (i=1;i<len>>1;i++)
174*1295d682SXin Li x_lp[i] = SHR32(HALF32(HALF32(x[0][(2*i-1)]+x[0][(2*i+1)])+x[0][2*i]), shift);
175*1295d682SXin Li x_lp[0] = SHR32(HALF32(HALF32(x[0][1])+x[0][0]), shift);
176*1295d682SXin Li if (C==2)
177*1295d682SXin Li {
178*1295d682SXin Li for (i=1;i<len>>1;i++)
179*1295d682SXin Li x_lp[i] += SHR32(HALF32(HALF32(x[1][(2*i-1)]+x[1][(2*i+1)])+x[1][2*i]), shift);
180*1295d682SXin Li x_lp[0] += SHR32(HALF32(HALF32(x[1][1])+x[1][0]), shift);
181*1295d682SXin Li }
182*1295d682SXin Li
183*1295d682SXin Li _celt_autocorr(x_lp, ac, NULL, 0,
184*1295d682SXin Li 4, len>>1);
185*1295d682SXin Li
186*1295d682SXin Li /* Noise floor -40 dB */
187*1295d682SXin Li #ifdef FIXED_POINT
188*1295d682SXin Li ac[0] += SHR32(ac[0],13);
189*1295d682SXin Li #else
190*1295d682SXin Li ac[0] *= 1.0001f;
191*1295d682SXin Li #endif
192*1295d682SXin Li /* Lag windowing */
193*1295d682SXin Li for (i=1;i<=4;i++)
194*1295d682SXin Li {
195*1295d682SXin Li /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
196*1295d682SXin Li #ifdef FIXED_POINT
197*1295d682SXin Li ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
198*1295d682SXin Li #else
199*1295d682SXin Li ac[i] -= ac[i]*(.008f*i)*(.008f*i);
200*1295d682SXin Li #endif
201*1295d682SXin Li }
202*1295d682SXin Li
203*1295d682SXin Li _celt_lpc(lpc, ac, 4);
204*1295d682SXin Li for (i=0;i<4;i++)
205*1295d682SXin Li {
206*1295d682SXin Li tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
207*1295d682SXin Li lpc[i] = MULT16_16_Q15(lpc[i], tmp);
208*1295d682SXin Li }
209*1295d682SXin Li /* Add a zero */
210*1295d682SXin Li lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
211*1295d682SXin Li lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
212*1295d682SXin Li lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
213*1295d682SXin Li lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
214*1295d682SXin Li lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
215*1295d682SXin Li celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
216*1295d682SXin Li }
217*1295d682SXin Li
celt_pitch_xcorr(const opus_val16 * _x,const opus_val16 * _y,opus_val32 * xcorr,int len,int max_pitch)218*1295d682SXin Li void celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
219*1295d682SXin Li opus_val32 *xcorr, int len, int max_pitch)
220*1295d682SXin Li {
221*1295d682SXin Li
222*1295d682SXin Li #if 0 /* This is a simple version of the pitch correlation that should work
223*1295d682SXin Li well on DSPs like Blackfin and TI C5x/C6x */
224*1295d682SXin Li int i, j;
225*1295d682SXin Li #ifdef FIXED_POINT
226*1295d682SXin Li opus_val32 maxcorr=1;
227*1295d682SXin Li #endif
228*1295d682SXin Li for (i=0;i<max_pitch;i++)
229*1295d682SXin Li {
230*1295d682SXin Li opus_val32 sum = 0;
231*1295d682SXin Li for (j=0;j<len;j++)
232*1295d682SXin Li sum = MAC16_16(sum, _x[j], _y[i+j]);
233*1295d682SXin Li xcorr[i] = sum;
234*1295d682SXin Li #ifdef FIXED_POINT
235*1295d682SXin Li maxcorr = MAX32(maxcorr, sum);
236*1295d682SXin Li #endif
237*1295d682SXin Li }
238*1295d682SXin Li #ifdef FIXED_POINT
239*1295d682SXin Li return maxcorr;
240*1295d682SXin Li #endif
241*1295d682SXin Li
242*1295d682SXin Li #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
243*1295d682SXin Li int i;
244*1295d682SXin Li /*The EDSP version requires that max_pitch is at least 1, and that _x is
245*1295d682SXin Li 32-bit aligned.
246*1295d682SXin Li Since it's hard to put asserts in assembly, put them here.*/
247*1295d682SXin Li #ifdef FIXED_POINT
248*1295d682SXin Li opus_val32 maxcorr=1;
249*1295d682SXin Li #endif
250*1295d682SXin Li celt_assert(max_pitch>0);
251*1295d682SXin Li celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
252*1295d682SXin Li for (i=0;i<max_pitch-3;i+=4)
253*1295d682SXin Li {
254*1295d682SXin Li opus_val32 sum[4]={0,0,0,0};
255*1295d682SXin Li xcorr_kernel(_x, _y+i, sum, len);
256*1295d682SXin Li xcorr[i]=sum[0];
257*1295d682SXin Li xcorr[i+1]=sum[1];
258*1295d682SXin Li xcorr[i+2]=sum[2];
259*1295d682SXin Li xcorr[i+3]=sum[3];
260*1295d682SXin Li #ifdef FIXED_POINT
261*1295d682SXin Li sum[0] = MAX32(sum[0], sum[1]);
262*1295d682SXin Li sum[2] = MAX32(sum[2], sum[3]);
263*1295d682SXin Li sum[0] = MAX32(sum[0], sum[2]);
264*1295d682SXin Li maxcorr = MAX32(maxcorr, sum[0]);
265*1295d682SXin Li #endif
266*1295d682SXin Li }
267*1295d682SXin Li /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
268*1295d682SXin Li for (;i<max_pitch;i++)
269*1295d682SXin Li {
270*1295d682SXin Li opus_val32 sum;
271*1295d682SXin Li sum = celt_inner_prod(_x, _y+i, len);
272*1295d682SXin Li xcorr[i] = sum;
273*1295d682SXin Li #ifdef FIXED_POINT
274*1295d682SXin Li maxcorr = MAX32(maxcorr, sum);
275*1295d682SXin Li #endif
276*1295d682SXin Li }
277*1295d682SXin Li #ifdef FIXED_POINT
278*1295d682SXin Li return maxcorr;
279*1295d682SXin Li #endif
280*1295d682SXin Li #endif
281*1295d682SXin Li }
282*1295d682SXin Li
pitch_search(const opus_val16 * x_lp,opus_val16 * y,int len,int max_pitch,int * pitch)283*1295d682SXin Li void pitch_search(const opus_val16 *x_lp, opus_val16 *y,
284*1295d682SXin Li int len, int max_pitch, int *pitch)
285*1295d682SXin Li {
286*1295d682SXin Li int i, j;
287*1295d682SXin Li int lag;
288*1295d682SXin Li int best_pitch[2]={0,0};
289*1295d682SXin Li #ifdef FIXED_POINT
290*1295d682SXin Li opus_val32 maxcorr;
291*1295d682SXin Li opus_val32 xmax, ymax;
292*1295d682SXin Li int shift=0;
293*1295d682SXin Li #endif
294*1295d682SXin Li int offset;
295*1295d682SXin Li
296*1295d682SXin Li celt_assert(len>0);
297*1295d682SXin Li celt_assert(max_pitch>0);
298*1295d682SXin Li lag = len+max_pitch;
299*1295d682SXin Li
300*1295d682SXin Li opus_val16 x_lp4[len>>2];
301*1295d682SXin Li opus_val16 y_lp4[lag>>2];
302*1295d682SXin Li opus_val32 xcorr[max_pitch>>1];
303*1295d682SXin Li
304*1295d682SXin Li /* Downsample by 2 again */
305*1295d682SXin Li for (j=0;j<len>>2;j++)
306*1295d682SXin Li x_lp4[j] = x_lp[2*j];
307*1295d682SXin Li for (j=0;j<lag>>2;j++)
308*1295d682SXin Li y_lp4[j] = y[2*j];
309*1295d682SXin Li
310*1295d682SXin Li #ifdef FIXED_POINT
311*1295d682SXin Li xmax = celt_maxabs16(x_lp4, len>>2);
312*1295d682SXin Li ymax = celt_maxabs16(y_lp4, lag>>2);
313*1295d682SXin Li shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11;
314*1295d682SXin Li if (shift>0)
315*1295d682SXin Li {
316*1295d682SXin Li for (j=0;j<len>>2;j++)
317*1295d682SXin Li x_lp4[j] = SHR16(x_lp4[j], shift);
318*1295d682SXin Li for (j=0;j<lag>>2;j++)
319*1295d682SXin Li y_lp4[j] = SHR16(y_lp4[j], shift);
320*1295d682SXin Li /* Use double the shift for a MAC */
321*1295d682SXin Li shift *= 2;
322*1295d682SXin Li } else {
323*1295d682SXin Li shift = 0;
324*1295d682SXin Li }
325*1295d682SXin Li #endif
326*1295d682SXin Li
327*1295d682SXin Li /* Coarse search with 4x decimation */
328*1295d682SXin Li
329*1295d682SXin Li #ifdef FIXED_POINT
330*1295d682SXin Li maxcorr =
331*1295d682SXin Li #endif
332*1295d682SXin Li celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
333*1295d682SXin Li
334*1295d682SXin Li find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
335*1295d682SXin Li #ifdef FIXED_POINT
336*1295d682SXin Li , 0, maxcorr
337*1295d682SXin Li #endif
338*1295d682SXin Li );
339*1295d682SXin Li
340*1295d682SXin Li /* Finer search with 2x decimation */
341*1295d682SXin Li #ifdef FIXED_POINT
342*1295d682SXin Li maxcorr=1;
343*1295d682SXin Li #endif
344*1295d682SXin Li for (i=0;i<max_pitch>>1;i++)
345*1295d682SXin Li {
346*1295d682SXin Li opus_val32 sum;
347*1295d682SXin Li xcorr[i] = 0;
348*1295d682SXin Li if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
349*1295d682SXin Li continue;
350*1295d682SXin Li #ifdef FIXED_POINT
351*1295d682SXin Li sum = 0;
352*1295d682SXin Li for (j=0;j<len>>1;j++)
353*1295d682SXin Li sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
354*1295d682SXin Li #else
355*1295d682SXin Li sum = celt_inner_prod(x_lp, y+i, len>>1);
356*1295d682SXin Li #endif
357*1295d682SXin Li xcorr[i] = MAX32(-1, sum);
358*1295d682SXin Li #ifdef FIXED_POINT
359*1295d682SXin Li maxcorr = MAX32(maxcorr, sum);
360*1295d682SXin Li #endif
361*1295d682SXin Li }
362*1295d682SXin Li find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
363*1295d682SXin Li #ifdef FIXED_POINT
364*1295d682SXin Li , shift+1, maxcorr
365*1295d682SXin Li #endif
366*1295d682SXin Li );
367*1295d682SXin Li
368*1295d682SXin Li /* Refine by pseudo-interpolation */
369*1295d682SXin Li if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
370*1295d682SXin Li {
371*1295d682SXin Li opus_val32 a, b, c;
372*1295d682SXin Li a = xcorr[best_pitch[0]-1];
373*1295d682SXin Li b = xcorr[best_pitch[0]];
374*1295d682SXin Li c = xcorr[best_pitch[0]+1];
375*1295d682SXin Li if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
376*1295d682SXin Li offset = 1;
377*1295d682SXin Li else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
378*1295d682SXin Li offset = -1;
379*1295d682SXin Li else
380*1295d682SXin Li offset = 0;
381*1295d682SXin Li } else {
382*1295d682SXin Li offset = 0;
383*1295d682SXin Li }
384*1295d682SXin Li *pitch = 2*best_pitch[0]-offset;
385*1295d682SXin Li }
386*1295d682SXin Li
387*1295d682SXin Li #ifdef FIXED_POINT
compute_pitch_gain(opus_val32 xy,opus_val32 xx,opus_val32 yy)388*1295d682SXin Li static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
389*1295d682SXin Li {
390*1295d682SXin Li opus_val32 x2y2;
391*1295d682SXin Li int sx, sy, shift;
392*1295d682SXin Li opus_val32 g;
393*1295d682SXin Li opus_val16 den;
394*1295d682SXin Li if (xy == 0 || xx == 0 || yy == 0)
395*1295d682SXin Li return 0;
396*1295d682SXin Li sx = celt_ilog2(xx)-14;
397*1295d682SXin Li sy = celt_ilog2(yy)-14;
398*1295d682SXin Li shift = sx + sy;
399*1295d682SXin Li x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
400*1295d682SXin Li if (shift & 1) {
401*1295d682SXin Li if (x2y2 < 32768)
402*1295d682SXin Li {
403*1295d682SXin Li x2y2 <<= 1;
404*1295d682SXin Li shift--;
405*1295d682SXin Li } else {
406*1295d682SXin Li x2y2 >>= 1;
407*1295d682SXin Li shift++;
408*1295d682SXin Li }
409*1295d682SXin Li }
410*1295d682SXin Li den = celt_rsqrt_norm(x2y2);
411*1295d682SXin Li g = MULT16_32_Q15(den, xy);
412*1295d682SXin Li g = VSHR32(g, (shift>>1)-1);
413*1295d682SXin Li return EXTRACT16(MIN32(g, Q15ONE));
414*1295d682SXin Li }
415*1295d682SXin Li #else
compute_pitch_gain(opus_val32 xy,opus_val32 xx,opus_val32 yy)416*1295d682SXin Li static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
417*1295d682SXin Li {
418*1295d682SXin Li return xy/sqrt(1+xx*yy);
419*1295d682SXin Li }
420*1295d682SXin Li #endif
421*1295d682SXin Li
422*1295d682SXin Li static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
remove_doubling(opus_val16 * x,int maxperiod,int minperiod,int N,int * T0_,int prev_period,opus_val16 prev_gain)423*1295d682SXin Li opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
424*1295d682SXin Li int N, int *T0_, int prev_period, opus_val16 prev_gain)
425*1295d682SXin Li {
426*1295d682SXin Li int k, i, T, T0;
427*1295d682SXin Li opus_val16 g, g0;
428*1295d682SXin Li opus_val16 pg;
429*1295d682SXin Li opus_val32 xy,xx,yy,xy2;
430*1295d682SXin Li opus_val32 xcorr[3];
431*1295d682SXin Li opus_val32 best_xy, best_yy;
432*1295d682SXin Li int offset;
433*1295d682SXin Li int minperiod0;
434*1295d682SXin Li
435*1295d682SXin Li minperiod0 = minperiod;
436*1295d682SXin Li maxperiod /= 2;
437*1295d682SXin Li minperiod /= 2;
438*1295d682SXin Li *T0_ /= 2;
439*1295d682SXin Li prev_period /= 2;
440*1295d682SXin Li N /= 2;
441*1295d682SXin Li x += maxperiod;
442*1295d682SXin Li if (*T0_>=maxperiod)
443*1295d682SXin Li *T0_=maxperiod-1;
444*1295d682SXin Li
445*1295d682SXin Li T = T0 = *T0_;
446*1295d682SXin Li opus_val32 yy_lookup[maxperiod+1];
447*1295d682SXin Li dual_inner_prod(x, x, x-T0, N, &xx, &xy);
448*1295d682SXin Li yy_lookup[0] = xx;
449*1295d682SXin Li yy=xx;
450*1295d682SXin Li for (i=1;i<=maxperiod;i++)
451*1295d682SXin Li {
452*1295d682SXin Li yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
453*1295d682SXin Li yy_lookup[i] = MAX32(0, yy);
454*1295d682SXin Li }
455*1295d682SXin Li yy = yy_lookup[T0];
456*1295d682SXin Li best_xy = xy;
457*1295d682SXin Li best_yy = yy;
458*1295d682SXin Li g = g0 = compute_pitch_gain(xy, xx, yy);
459*1295d682SXin Li /* Look for any pitch at T/k */
460*1295d682SXin Li for (k=2;k<=15;k++)
461*1295d682SXin Li {
462*1295d682SXin Li int T1, T1b;
463*1295d682SXin Li opus_val16 g1;
464*1295d682SXin Li opus_val16 cont=0;
465*1295d682SXin Li opus_val16 thresh;
466*1295d682SXin Li T1 = (2*T0+k)/(2*k);
467*1295d682SXin Li if (T1 < minperiod)
468*1295d682SXin Li break;
469*1295d682SXin Li /* Look for another strong correlation at T1b */
470*1295d682SXin Li if (k==2)
471*1295d682SXin Li {
472*1295d682SXin Li if (T1+T0>maxperiod)
473*1295d682SXin Li T1b = T0;
474*1295d682SXin Li else
475*1295d682SXin Li T1b = T0+T1;
476*1295d682SXin Li } else
477*1295d682SXin Li {
478*1295d682SXin Li T1b = (2*second_check[k]*T0+k)/(2*k);
479*1295d682SXin Li }
480*1295d682SXin Li dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
481*1295d682SXin Li xy = HALF32(xy + xy2);
482*1295d682SXin Li yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
483*1295d682SXin Li g1 = compute_pitch_gain(xy, xx, yy);
484*1295d682SXin Li if (abs(T1-prev_period)<=1)
485*1295d682SXin Li cont = prev_gain;
486*1295d682SXin Li else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
487*1295d682SXin Li cont = HALF16(prev_gain);
488*1295d682SXin Li else
489*1295d682SXin Li cont = 0;
490*1295d682SXin Li thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
491*1295d682SXin Li /* Bias against very high pitch (very short period) to avoid false-positives
492*1295d682SXin Li due to short-term correlation */
493*1295d682SXin Li if (T1<3*minperiod)
494*1295d682SXin Li thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
495*1295d682SXin Li else if (T1<2*minperiod)
496*1295d682SXin Li thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
497*1295d682SXin Li if (g1 > thresh)
498*1295d682SXin Li {
499*1295d682SXin Li best_xy = xy;
500*1295d682SXin Li best_yy = yy;
501*1295d682SXin Li T = T1;
502*1295d682SXin Li g = g1;
503*1295d682SXin Li }
504*1295d682SXin Li }
505*1295d682SXin Li best_xy = MAX32(0, best_xy);
506*1295d682SXin Li if (best_yy <= best_xy)
507*1295d682SXin Li pg = Q15ONE;
508*1295d682SXin Li else
509*1295d682SXin Li pg = best_xy/(best_yy+1);
510*1295d682SXin Li
511*1295d682SXin Li for (k=0;k<3;k++)
512*1295d682SXin Li xcorr[k] = celt_inner_prod(x, x-(T+k-1), N);
513*1295d682SXin Li if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
514*1295d682SXin Li offset = 1;
515*1295d682SXin Li else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
516*1295d682SXin Li offset = -1;
517*1295d682SXin Li else
518*1295d682SXin Li offset = 0;
519*1295d682SXin Li if (pg > g)
520*1295d682SXin Li pg = g;
521*1295d682SXin Li *T0_ = 2*T+offset;
522*1295d682SXin Li
523*1295d682SXin Li if (*T0_<minperiod0)
524*1295d682SXin Li *T0_=minperiod0;
525*1295d682SXin Li return pg;
526*1295d682SXin Li }
527