1*a58d3d2aSXin Li /* Copyright (c) 2007-2008 CSIRO
2*a58d3d2aSXin Li Copyright (c) 2007-2009 Xiph.Org Foundation
3*a58d3d2aSXin Li Copyright (c) 2008-2009 Gregory Maxwell
4*a58d3d2aSXin Li Written by Jean-Marc Valin and Gregory Maxwell */
5*a58d3d2aSXin Li /*
6*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
7*a58d3d2aSXin Li modification, are permitted provided that the following conditions
8*a58d3d2aSXin Li are met:
9*a58d3d2aSXin Li
10*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
11*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
12*a58d3d2aSXin Li
13*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
14*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
15*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
16*a58d3d2aSXin Li
17*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21*a58d3d2aSXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*a58d3d2aSXin Li */
29*a58d3d2aSXin Li
30*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
31*a58d3d2aSXin Li #include "config.h"
32*a58d3d2aSXin Li #endif
33*a58d3d2aSXin Li
34*a58d3d2aSXin Li #include <math.h>
35*a58d3d2aSXin Li #include "bands.h"
36*a58d3d2aSXin Li #include "modes.h"
37*a58d3d2aSXin Li #include "vq.h"
38*a58d3d2aSXin Li #include "cwrs.h"
39*a58d3d2aSXin Li #include "stack_alloc.h"
40*a58d3d2aSXin Li #include "os_support.h"
41*a58d3d2aSXin Li #include "mathops.h"
42*a58d3d2aSXin Li #include "rate.h"
43*a58d3d2aSXin Li #include "quant_bands.h"
44*a58d3d2aSXin Li #include "pitch.h"
45*a58d3d2aSXin Li
hysteresis_decision(opus_val16 val,const opus_val16 * thresholds,const opus_val16 * hysteresis,int N,int prev)46*a58d3d2aSXin Li int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47*a58d3d2aSXin Li {
48*a58d3d2aSXin Li int i;
49*a58d3d2aSXin Li for (i=0;i<N;i++)
50*a58d3d2aSXin Li {
51*a58d3d2aSXin Li if (val < thresholds[i])
52*a58d3d2aSXin Li break;
53*a58d3d2aSXin Li }
54*a58d3d2aSXin Li if (i>prev && val < thresholds[prev]+hysteresis[prev])
55*a58d3d2aSXin Li i=prev;
56*a58d3d2aSXin Li if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57*a58d3d2aSXin Li i=prev;
58*a58d3d2aSXin Li return i;
59*a58d3d2aSXin Li }
60*a58d3d2aSXin Li
celt_lcg_rand(opus_uint32 seed)61*a58d3d2aSXin Li opus_uint32 celt_lcg_rand(opus_uint32 seed)
62*a58d3d2aSXin Li {
63*a58d3d2aSXin Li return 1664525 * seed + 1013904223;
64*a58d3d2aSXin Li }
65*a58d3d2aSXin Li
66*a58d3d2aSXin Li /* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67*a58d3d2aSXin Li with this approximation is important because it has an impact on the bit allocation */
bitexact_cos(opus_int16 x)68*a58d3d2aSXin Li opus_int16 bitexact_cos(opus_int16 x)
69*a58d3d2aSXin Li {
70*a58d3d2aSXin Li opus_int32 tmp;
71*a58d3d2aSXin Li opus_int16 x2;
72*a58d3d2aSXin Li tmp = (4096+((opus_int32)(x)*(x)))>>13;
73*a58d3d2aSXin Li celt_sig_assert(tmp<=32767);
74*a58d3d2aSXin Li x2 = tmp;
75*a58d3d2aSXin Li x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76*a58d3d2aSXin Li celt_sig_assert(x2<=32766);
77*a58d3d2aSXin Li return 1+x2;
78*a58d3d2aSXin Li }
79*a58d3d2aSXin Li
bitexact_log2tan(int isin,int icos)80*a58d3d2aSXin Li int bitexact_log2tan(int isin,int icos)
81*a58d3d2aSXin Li {
82*a58d3d2aSXin Li int lc;
83*a58d3d2aSXin Li int ls;
84*a58d3d2aSXin Li lc=EC_ILOG(icos);
85*a58d3d2aSXin Li ls=EC_ILOG(isin);
86*a58d3d2aSXin Li icos<<=15-lc;
87*a58d3d2aSXin Li isin<<=15-ls;
88*a58d3d2aSXin Li return (ls-lc)*(1<<11)
89*a58d3d2aSXin Li +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90*a58d3d2aSXin Li -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91*a58d3d2aSXin Li }
92*a58d3d2aSXin Li
93*a58d3d2aSXin Li #ifdef FIXED_POINT
94*a58d3d2aSXin Li /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)95*a58d3d2aSXin Li void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
96*a58d3d2aSXin Li {
97*a58d3d2aSXin Li int i, c, N;
98*a58d3d2aSXin Li const opus_int16 *eBands = m->eBands;
99*a58d3d2aSXin Li (void)arch;
100*a58d3d2aSXin Li N = m->shortMdctSize<<LM;
101*a58d3d2aSXin Li c=0; do {
102*a58d3d2aSXin Li for (i=0;i<end;i++)
103*a58d3d2aSXin Li {
104*a58d3d2aSXin Li int j;
105*a58d3d2aSXin Li opus_val32 maxval=0;
106*a58d3d2aSXin Li opus_val32 sum = 0;
107*a58d3d2aSXin Li
108*a58d3d2aSXin Li maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
109*a58d3d2aSXin Li if (maxval > 0)
110*a58d3d2aSXin Li {
111*a58d3d2aSXin Li int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
112*a58d3d2aSXin Li j=eBands[i]<<LM;
113*a58d3d2aSXin Li if (shift>0)
114*a58d3d2aSXin Li {
115*a58d3d2aSXin Li do {
116*a58d3d2aSXin Li sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
117*a58d3d2aSXin Li EXTRACT16(SHR32(X[j+c*N],shift)));
118*a58d3d2aSXin Li } while (++j<eBands[i+1]<<LM);
119*a58d3d2aSXin Li } else {
120*a58d3d2aSXin Li do {
121*a58d3d2aSXin Li sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
122*a58d3d2aSXin Li EXTRACT16(SHL32(X[j+c*N],-shift)));
123*a58d3d2aSXin Li } while (++j<eBands[i+1]<<LM);
124*a58d3d2aSXin Li }
125*a58d3d2aSXin Li /* We're adding one here to ensure the normalized band isn't larger than unity norm */
126*a58d3d2aSXin Li bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
127*a58d3d2aSXin Li } else {
128*a58d3d2aSXin Li bandE[i+c*m->nbEBands] = EPSILON;
129*a58d3d2aSXin Li }
130*a58d3d2aSXin Li /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
131*a58d3d2aSXin Li }
132*a58d3d2aSXin Li } while (++c<C);
133*a58d3d2aSXin Li /*printf ("\n");*/
134*a58d3d2aSXin Li }
135*a58d3d2aSXin Li
136*a58d3d2aSXin Li /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)137*a58d3d2aSXin Li void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
138*a58d3d2aSXin Li {
139*a58d3d2aSXin Li int i, c, N;
140*a58d3d2aSXin Li const opus_int16 *eBands = m->eBands;
141*a58d3d2aSXin Li N = M*m->shortMdctSize;
142*a58d3d2aSXin Li c=0; do {
143*a58d3d2aSXin Li i=0; do {
144*a58d3d2aSXin Li opus_val16 g;
145*a58d3d2aSXin Li int j,shift;
146*a58d3d2aSXin Li opus_val16 E;
147*a58d3d2aSXin Li shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
148*a58d3d2aSXin Li E = VSHR32(bandE[i+c*m->nbEBands], shift);
149*a58d3d2aSXin Li g = EXTRACT16(celt_rcp(SHL32(E,3)));
150*a58d3d2aSXin Li j=M*eBands[i]; do {
151*a58d3d2aSXin Li X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
152*a58d3d2aSXin Li } while (++j<M*eBands[i+1]);
153*a58d3d2aSXin Li } while (++i<end);
154*a58d3d2aSXin Li } while (++c<C);
155*a58d3d2aSXin Li }
156*a58d3d2aSXin Li
157*a58d3d2aSXin Li #else /* FIXED_POINT */
158*a58d3d2aSXin Li /* Compute the amplitude (sqrt energy) in each of the bands */
compute_band_energies(const CELTMode * m,const celt_sig * X,celt_ener * bandE,int end,int C,int LM,int arch)159*a58d3d2aSXin Li void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
160*a58d3d2aSXin Li {
161*a58d3d2aSXin Li int i, c, N;
162*a58d3d2aSXin Li const opus_int16 *eBands = m->eBands;
163*a58d3d2aSXin Li N = m->shortMdctSize<<LM;
164*a58d3d2aSXin Li c=0; do {
165*a58d3d2aSXin Li for (i=0;i<end;i++)
166*a58d3d2aSXin Li {
167*a58d3d2aSXin Li opus_val32 sum;
168*a58d3d2aSXin Li sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
169*a58d3d2aSXin Li bandE[i+c*m->nbEBands] = celt_sqrt(sum);
170*a58d3d2aSXin Li /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
171*a58d3d2aSXin Li }
172*a58d3d2aSXin Li } while (++c<C);
173*a58d3d2aSXin Li /*printf ("\n");*/
174*a58d3d2aSXin Li }
175*a58d3d2aSXin Li
176*a58d3d2aSXin Li /* Normalise each band such that the energy is one. */
normalise_bands(const CELTMode * m,const celt_sig * OPUS_RESTRICT freq,celt_norm * OPUS_RESTRICT X,const celt_ener * bandE,int end,int C,int M)177*a58d3d2aSXin Li void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
178*a58d3d2aSXin Li {
179*a58d3d2aSXin Li int i, c, N;
180*a58d3d2aSXin Li const opus_int16 *eBands = m->eBands;
181*a58d3d2aSXin Li N = M*m->shortMdctSize;
182*a58d3d2aSXin Li c=0; do {
183*a58d3d2aSXin Li for (i=0;i<end;i++)
184*a58d3d2aSXin Li {
185*a58d3d2aSXin Li int j;
186*a58d3d2aSXin Li opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
187*a58d3d2aSXin Li for (j=M*eBands[i];j<M*eBands[i+1];j++)
188*a58d3d2aSXin Li X[j+c*N] = freq[j+c*N]*g;
189*a58d3d2aSXin Li }
190*a58d3d2aSXin Li } while (++c<C);
191*a58d3d2aSXin Li }
192*a58d3d2aSXin Li
193*a58d3d2aSXin Li #endif /* FIXED_POINT */
194*a58d3d2aSXin Li
195*a58d3d2aSXin Li /* De-normalise the energy to produce the synthesis from the unit-energy bands */
denormalise_bands(const CELTMode * m,const celt_norm * OPUS_RESTRICT X,celt_sig * OPUS_RESTRICT freq,const opus_val16 * bandLogE,int start,int end,int M,int downsample,int silence)196*a58d3d2aSXin Li void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
197*a58d3d2aSXin Li celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
198*a58d3d2aSXin Li int end, int M, int downsample, int silence)
199*a58d3d2aSXin Li {
200*a58d3d2aSXin Li int i, N;
201*a58d3d2aSXin Li int bound;
202*a58d3d2aSXin Li celt_sig * OPUS_RESTRICT f;
203*a58d3d2aSXin Li const celt_norm * OPUS_RESTRICT x;
204*a58d3d2aSXin Li const opus_int16 *eBands = m->eBands;
205*a58d3d2aSXin Li N = M*m->shortMdctSize;
206*a58d3d2aSXin Li bound = M*eBands[end];
207*a58d3d2aSXin Li if (downsample!=1)
208*a58d3d2aSXin Li bound = IMIN(bound, N/downsample);
209*a58d3d2aSXin Li if (silence)
210*a58d3d2aSXin Li {
211*a58d3d2aSXin Li bound = 0;
212*a58d3d2aSXin Li start = end = 0;
213*a58d3d2aSXin Li }
214*a58d3d2aSXin Li f = freq;
215*a58d3d2aSXin Li x = X+M*eBands[start];
216*a58d3d2aSXin Li for (i=0;i<M*eBands[start];i++)
217*a58d3d2aSXin Li *f++ = 0;
218*a58d3d2aSXin Li for (i=start;i<end;i++)
219*a58d3d2aSXin Li {
220*a58d3d2aSXin Li int j, band_end;
221*a58d3d2aSXin Li opus_val16 g;
222*a58d3d2aSXin Li opus_val16 lg;
223*a58d3d2aSXin Li #ifdef FIXED_POINT
224*a58d3d2aSXin Li int shift;
225*a58d3d2aSXin Li #endif
226*a58d3d2aSXin Li j=M*eBands[i];
227*a58d3d2aSXin Li band_end = M*eBands[i+1];
228*a58d3d2aSXin Li lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
229*a58d3d2aSXin Li #ifndef FIXED_POINT
230*a58d3d2aSXin Li g = celt_exp2(MIN32(32.f, lg));
231*a58d3d2aSXin Li #else
232*a58d3d2aSXin Li /* Handle the integer part of the log energy */
233*a58d3d2aSXin Li shift = 16-(lg>>DB_SHIFT);
234*a58d3d2aSXin Li if (shift>31)
235*a58d3d2aSXin Li {
236*a58d3d2aSXin Li shift=0;
237*a58d3d2aSXin Li g=0;
238*a58d3d2aSXin Li } else {
239*a58d3d2aSXin Li /* Handle the fractional part. */
240*a58d3d2aSXin Li g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
241*a58d3d2aSXin Li }
242*a58d3d2aSXin Li /* Handle extreme gains with negative shift. */
243*a58d3d2aSXin Li if (shift<0)
244*a58d3d2aSXin Li {
245*a58d3d2aSXin Li /* For shift <= -2 and g > 16384 we'd be likely to overflow, so we're
246*a58d3d2aSXin Li capping the gain here, which is equivalent to a cap of 18 on lg.
247*a58d3d2aSXin Li This shouldn't trigger unless the bitstream is already corrupted. */
248*a58d3d2aSXin Li if (shift <= -2)
249*a58d3d2aSXin Li {
250*a58d3d2aSXin Li g = 16384;
251*a58d3d2aSXin Li shift = -2;
252*a58d3d2aSXin Li }
253*a58d3d2aSXin Li do {
254*a58d3d2aSXin Li *f++ = SHL32(MULT16_16(*x++, g), -shift);
255*a58d3d2aSXin Li } while (++j<band_end);
256*a58d3d2aSXin Li } else
257*a58d3d2aSXin Li #endif
258*a58d3d2aSXin Li /* Be careful of the fixed-point "else" just above when changing this code */
259*a58d3d2aSXin Li do {
260*a58d3d2aSXin Li *f++ = SHR32(MULT16_16(*x++, g), shift);
261*a58d3d2aSXin Li } while (++j<band_end);
262*a58d3d2aSXin Li }
263*a58d3d2aSXin Li celt_assert(start <= end);
264*a58d3d2aSXin Li OPUS_CLEAR(&freq[bound], N-bound);
265*a58d3d2aSXin Li }
266*a58d3d2aSXin Li
267*a58d3d2aSXin Li /* This prevents energy collapse for transients with multiple short MDCTs */
anti_collapse(const CELTMode * m,celt_norm * X_,unsigned char * collapse_masks,int LM,int C,int size,int start,int end,const opus_val16 * logE,const opus_val16 * prev1logE,const opus_val16 * prev2logE,const int * pulses,opus_uint32 seed,int arch)268*a58d3d2aSXin Li void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
269*a58d3d2aSXin Li int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
270*a58d3d2aSXin Li const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
271*a58d3d2aSXin Li {
272*a58d3d2aSXin Li int c, i, j, k;
273*a58d3d2aSXin Li for (i=start;i<end;i++)
274*a58d3d2aSXin Li {
275*a58d3d2aSXin Li int N0;
276*a58d3d2aSXin Li opus_val16 thresh, sqrt_1;
277*a58d3d2aSXin Li int depth;
278*a58d3d2aSXin Li #ifdef FIXED_POINT
279*a58d3d2aSXin Li int shift;
280*a58d3d2aSXin Li opus_val32 thresh32;
281*a58d3d2aSXin Li #endif
282*a58d3d2aSXin Li
283*a58d3d2aSXin Li N0 = m->eBands[i+1]-m->eBands[i];
284*a58d3d2aSXin Li /* depth in 1/8 bits */
285*a58d3d2aSXin Li celt_sig_assert(pulses[i]>=0);
286*a58d3d2aSXin Li depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
287*a58d3d2aSXin Li
288*a58d3d2aSXin Li #ifdef FIXED_POINT
289*a58d3d2aSXin Li thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
290*a58d3d2aSXin Li thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
291*a58d3d2aSXin Li {
292*a58d3d2aSXin Li opus_val32 t;
293*a58d3d2aSXin Li t = N0<<LM;
294*a58d3d2aSXin Li shift = celt_ilog2(t)>>1;
295*a58d3d2aSXin Li t = SHL32(t, (7-shift)<<1);
296*a58d3d2aSXin Li sqrt_1 = celt_rsqrt_norm(t);
297*a58d3d2aSXin Li }
298*a58d3d2aSXin Li #else
299*a58d3d2aSXin Li thresh = .5f*celt_exp2(-.125f*depth);
300*a58d3d2aSXin Li sqrt_1 = celt_rsqrt(N0<<LM);
301*a58d3d2aSXin Li #endif
302*a58d3d2aSXin Li
303*a58d3d2aSXin Li c=0; do
304*a58d3d2aSXin Li {
305*a58d3d2aSXin Li celt_norm *X;
306*a58d3d2aSXin Li opus_val16 prev1;
307*a58d3d2aSXin Li opus_val16 prev2;
308*a58d3d2aSXin Li opus_val32 Ediff;
309*a58d3d2aSXin Li opus_val16 r;
310*a58d3d2aSXin Li int renormalize=0;
311*a58d3d2aSXin Li prev1 = prev1logE[c*m->nbEBands+i];
312*a58d3d2aSXin Li prev2 = prev2logE[c*m->nbEBands+i];
313*a58d3d2aSXin Li if (C==1)
314*a58d3d2aSXin Li {
315*a58d3d2aSXin Li prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
316*a58d3d2aSXin Li prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
317*a58d3d2aSXin Li }
318*a58d3d2aSXin Li Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
319*a58d3d2aSXin Li Ediff = MAX32(0, Ediff);
320*a58d3d2aSXin Li
321*a58d3d2aSXin Li #ifdef FIXED_POINT
322*a58d3d2aSXin Li if (Ediff < 16384)
323*a58d3d2aSXin Li {
324*a58d3d2aSXin Li opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
325*a58d3d2aSXin Li r = 2*MIN16(16383,r32);
326*a58d3d2aSXin Li } else {
327*a58d3d2aSXin Li r = 0;
328*a58d3d2aSXin Li }
329*a58d3d2aSXin Li if (LM==3)
330*a58d3d2aSXin Li r = MULT16_16_Q14(23170, MIN32(23169, r));
331*a58d3d2aSXin Li r = SHR16(MIN16(thresh, r),1);
332*a58d3d2aSXin Li r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
333*a58d3d2aSXin Li #else
334*a58d3d2aSXin Li /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
335*a58d3d2aSXin Li short blocks don't have the same energy as long */
336*a58d3d2aSXin Li r = 2.f*celt_exp2(-Ediff);
337*a58d3d2aSXin Li if (LM==3)
338*a58d3d2aSXin Li r *= 1.41421356f;
339*a58d3d2aSXin Li r = MIN16(thresh, r);
340*a58d3d2aSXin Li r = r*sqrt_1;
341*a58d3d2aSXin Li #endif
342*a58d3d2aSXin Li X = X_+c*size+(m->eBands[i]<<LM);
343*a58d3d2aSXin Li for (k=0;k<1<<LM;k++)
344*a58d3d2aSXin Li {
345*a58d3d2aSXin Li /* Detect collapse */
346*a58d3d2aSXin Li if (!(collapse_masks[i*C+c]&1<<k))
347*a58d3d2aSXin Li {
348*a58d3d2aSXin Li /* Fill with noise */
349*a58d3d2aSXin Li for (j=0;j<N0;j++)
350*a58d3d2aSXin Li {
351*a58d3d2aSXin Li seed = celt_lcg_rand(seed);
352*a58d3d2aSXin Li X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
353*a58d3d2aSXin Li }
354*a58d3d2aSXin Li renormalize = 1;
355*a58d3d2aSXin Li }
356*a58d3d2aSXin Li }
357*a58d3d2aSXin Li /* We just added some energy, so we need to renormalise */
358*a58d3d2aSXin Li if (renormalize)
359*a58d3d2aSXin Li renormalise_vector(X, N0<<LM, Q15ONE, arch);
360*a58d3d2aSXin Li } while (++c<C);
361*a58d3d2aSXin Li }
362*a58d3d2aSXin Li }
363*a58d3d2aSXin Li
364*a58d3d2aSXin Li /* Compute the weights to use for optimizing normalized distortion across
365*a58d3d2aSXin Li channels. We use the amplitude to weight square distortion, which means
366*a58d3d2aSXin Li that we use the square root of the value we would have been using if we
367*a58d3d2aSXin Li wanted to minimize the MSE in the non-normalized domain. This roughly
368*a58d3d2aSXin Li corresponds to some quick-and-dirty perceptual experiments I ran to
369*a58d3d2aSXin Li measure inter-aural masking (there doesn't seem to be any published data
370*a58d3d2aSXin Li on the topic). */
compute_channel_weights(celt_ener Ex,celt_ener Ey,opus_val16 w[2])371*a58d3d2aSXin Li static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
372*a58d3d2aSXin Li {
373*a58d3d2aSXin Li celt_ener minE;
374*a58d3d2aSXin Li #ifdef FIXED_POINT
375*a58d3d2aSXin Li int shift;
376*a58d3d2aSXin Li #endif
377*a58d3d2aSXin Li minE = MIN32(Ex, Ey);
378*a58d3d2aSXin Li /* Adjustment to make the weights a bit more conservative. */
379*a58d3d2aSXin Li Ex = ADD32(Ex, minE/3);
380*a58d3d2aSXin Li Ey = ADD32(Ey, minE/3);
381*a58d3d2aSXin Li #ifdef FIXED_POINT
382*a58d3d2aSXin Li shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
383*a58d3d2aSXin Li #endif
384*a58d3d2aSXin Li w[0] = VSHR32(Ex, shift);
385*a58d3d2aSXin Li w[1] = VSHR32(Ey, shift);
386*a58d3d2aSXin Li }
387*a58d3d2aSXin Li
intensity_stereo(const CELTMode * m,celt_norm * OPUS_RESTRICT X,const celt_norm * OPUS_RESTRICT Y,const celt_ener * bandE,int bandID,int N)388*a58d3d2aSXin Li static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
389*a58d3d2aSXin Li {
390*a58d3d2aSXin Li int i = bandID;
391*a58d3d2aSXin Li int j;
392*a58d3d2aSXin Li opus_val16 a1, a2;
393*a58d3d2aSXin Li opus_val16 left, right;
394*a58d3d2aSXin Li opus_val16 norm;
395*a58d3d2aSXin Li #ifdef FIXED_POINT
396*a58d3d2aSXin Li int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
397*a58d3d2aSXin Li #endif
398*a58d3d2aSXin Li left = VSHR32(bandE[i],shift);
399*a58d3d2aSXin Li right = VSHR32(bandE[i+m->nbEBands],shift);
400*a58d3d2aSXin Li norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
401*a58d3d2aSXin Li a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
402*a58d3d2aSXin Li a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
403*a58d3d2aSXin Li for (j=0;j<N;j++)
404*a58d3d2aSXin Li {
405*a58d3d2aSXin Li celt_norm r, l;
406*a58d3d2aSXin Li l = X[j];
407*a58d3d2aSXin Li r = Y[j];
408*a58d3d2aSXin Li X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
409*a58d3d2aSXin Li /* Side is not encoded, no need to calculate */
410*a58d3d2aSXin Li }
411*a58d3d2aSXin Li }
412*a58d3d2aSXin Li
stereo_split(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,int N)413*a58d3d2aSXin Li static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
414*a58d3d2aSXin Li {
415*a58d3d2aSXin Li int j;
416*a58d3d2aSXin Li for (j=0;j<N;j++)
417*a58d3d2aSXin Li {
418*a58d3d2aSXin Li opus_val32 r, l;
419*a58d3d2aSXin Li l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
420*a58d3d2aSXin Li r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
421*a58d3d2aSXin Li X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
422*a58d3d2aSXin Li Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
423*a58d3d2aSXin Li }
424*a58d3d2aSXin Li }
425*a58d3d2aSXin Li
stereo_merge(celt_norm * OPUS_RESTRICT X,celt_norm * OPUS_RESTRICT Y,opus_val16 mid,int N,int arch)426*a58d3d2aSXin Li static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
427*a58d3d2aSXin Li {
428*a58d3d2aSXin Li int j;
429*a58d3d2aSXin Li opus_val32 xp=0, side=0;
430*a58d3d2aSXin Li opus_val32 El, Er;
431*a58d3d2aSXin Li opus_val16 mid2;
432*a58d3d2aSXin Li #ifdef FIXED_POINT
433*a58d3d2aSXin Li int kl, kr;
434*a58d3d2aSXin Li #endif
435*a58d3d2aSXin Li opus_val32 t, lgain, rgain;
436*a58d3d2aSXin Li
437*a58d3d2aSXin Li /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
438*a58d3d2aSXin Li dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
439*a58d3d2aSXin Li /* Compensating for the mid normalization */
440*a58d3d2aSXin Li xp = MULT16_32_Q15(mid, xp);
441*a58d3d2aSXin Li /* mid and side are in Q15, not Q14 like X and Y */
442*a58d3d2aSXin Li mid2 = SHR16(mid, 1);
443*a58d3d2aSXin Li El = MULT16_16(mid2, mid2) + side - 2*xp;
444*a58d3d2aSXin Li Er = MULT16_16(mid2, mid2) + side + 2*xp;
445*a58d3d2aSXin Li if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
446*a58d3d2aSXin Li {
447*a58d3d2aSXin Li OPUS_COPY(Y, X, N);
448*a58d3d2aSXin Li return;
449*a58d3d2aSXin Li }
450*a58d3d2aSXin Li
451*a58d3d2aSXin Li #ifdef FIXED_POINT
452*a58d3d2aSXin Li kl = celt_ilog2(El)>>1;
453*a58d3d2aSXin Li kr = celt_ilog2(Er)>>1;
454*a58d3d2aSXin Li #endif
455*a58d3d2aSXin Li t = VSHR32(El, (kl-7)<<1);
456*a58d3d2aSXin Li lgain = celt_rsqrt_norm(t);
457*a58d3d2aSXin Li t = VSHR32(Er, (kr-7)<<1);
458*a58d3d2aSXin Li rgain = celt_rsqrt_norm(t);
459*a58d3d2aSXin Li
460*a58d3d2aSXin Li #ifdef FIXED_POINT
461*a58d3d2aSXin Li if (kl < 7)
462*a58d3d2aSXin Li kl = 7;
463*a58d3d2aSXin Li if (kr < 7)
464*a58d3d2aSXin Li kr = 7;
465*a58d3d2aSXin Li #endif
466*a58d3d2aSXin Li
467*a58d3d2aSXin Li for (j=0;j<N;j++)
468*a58d3d2aSXin Li {
469*a58d3d2aSXin Li celt_norm r, l;
470*a58d3d2aSXin Li /* Apply mid scaling (side is already scaled) */
471*a58d3d2aSXin Li l = MULT16_16_P15(mid, X[j]);
472*a58d3d2aSXin Li r = Y[j];
473*a58d3d2aSXin Li X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
474*a58d3d2aSXin Li Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
475*a58d3d2aSXin Li }
476*a58d3d2aSXin Li }
477*a58d3d2aSXin Li
478*a58d3d2aSXin Li /* Decide whether we should spread the pulses in the current frame */
spreading_decision(const CELTMode * m,const celt_norm * X,int * average,int last_decision,int * hf_average,int * tapset_decision,int update_hf,int end,int C,int M,const int * spread_weight)479*a58d3d2aSXin Li int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
480*a58d3d2aSXin Li int last_decision, int *hf_average, int *tapset_decision, int update_hf,
481*a58d3d2aSXin Li int end, int C, int M, const int *spread_weight)
482*a58d3d2aSXin Li {
483*a58d3d2aSXin Li int i, c, N0;
484*a58d3d2aSXin Li int sum = 0, nbBands=0;
485*a58d3d2aSXin Li const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
486*a58d3d2aSXin Li int decision;
487*a58d3d2aSXin Li int hf_sum=0;
488*a58d3d2aSXin Li
489*a58d3d2aSXin Li celt_assert(end>0);
490*a58d3d2aSXin Li
491*a58d3d2aSXin Li N0 = M*m->shortMdctSize;
492*a58d3d2aSXin Li
493*a58d3d2aSXin Li if (M*(eBands[end]-eBands[end-1]) <= 8)
494*a58d3d2aSXin Li return SPREAD_NONE;
495*a58d3d2aSXin Li c=0; do {
496*a58d3d2aSXin Li for (i=0;i<end;i++)
497*a58d3d2aSXin Li {
498*a58d3d2aSXin Li int j, N, tmp=0;
499*a58d3d2aSXin Li int tcount[3] = {0,0,0};
500*a58d3d2aSXin Li const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
501*a58d3d2aSXin Li N = M*(eBands[i+1]-eBands[i]);
502*a58d3d2aSXin Li if (N<=8)
503*a58d3d2aSXin Li continue;
504*a58d3d2aSXin Li /* Compute rough CDF of |x[j]| */
505*a58d3d2aSXin Li for (j=0;j<N;j++)
506*a58d3d2aSXin Li {
507*a58d3d2aSXin Li opus_val32 x2N; /* Q13 */
508*a58d3d2aSXin Li
509*a58d3d2aSXin Li x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
510*a58d3d2aSXin Li if (x2N < QCONST16(0.25f,13))
511*a58d3d2aSXin Li tcount[0]++;
512*a58d3d2aSXin Li if (x2N < QCONST16(0.0625f,13))
513*a58d3d2aSXin Li tcount[1]++;
514*a58d3d2aSXin Li if (x2N < QCONST16(0.015625f,13))
515*a58d3d2aSXin Li tcount[2]++;
516*a58d3d2aSXin Li }
517*a58d3d2aSXin Li
518*a58d3d2aSXin Li /* Only include four last bands (8 kHz and up) */
519*a58d3d2aSXin Li if (i>m->nbEBands-4)
520*a58d3d2aSXin Li hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
521*a58d3d2aSXin Li tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
522*a58d3d2aSXin Li sum += tmp*spread_weight[i];
523*a58d3d2aSXin Li nbBands+=spread_weight[i];
524*a58d3d2aSXin Li }
525*a58d3d2aSXin Li } while (++c<C);
526*a58d3d2aSXin Li
527*a58d3d2aSXin Li if (update_hf)
528*a58d3d2aSXin Li {
529*a58d3d2aSXin Li if (hf_sum)
530*a58d3d2aSXin Li hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
531*a58d3d2aSXin Li *hf_average = (*hf_average+hf_sum)>>1;
532*a58d3d2aSXin Li hf_sum = *hf_average;
533*a58d3d2aSXin Li if (*tapset_decision==2)
534*a58d3d2aSXin Li hf_sum += 4;
535*a58d3d2aSXin Li else if (*tapset_decision==0)
536*a58d3d2aSXin Li hf_sum -= 4;
537*a58d3d2aSXin Li if (hf_sum > 22)
538*a58d3d2aSXin Li *tapset_decision=2;
539*a58d3d2aSXin Li else if (hf_sum > 18)
540*a58d3d2aSXin Li *tapset_decision=1;
541*a58d3d2aSXin Li else
542*a58d3d2aSXin Li *tapset_decision=0;
543*a58d3d2aSXin Li }
544*a58d3d2aSXin Li /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
545*a58d3d2aSXin Li celt_assert(nbBands>0); /* end has to be non-zero */
546*a58d3d2aSXin Li celt_assert(sum>=0);
547*a58d3d2aSXin Li sum = celt_udiv((opus_int32)sum<<8, nbBands);
548*a58d3d2aSXin Li /* Recursive averaging */
549*a58d3d2aSXin Li sum = (sum+*average)>>1;
550*a58d3d2aSXin Li *average = sum;
551*a58d3d2aSXin Li /* Hysteresis */
552*a58d3d2aSXin Li sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
553*a58d3d2aSXin Li if (sum < 80)
554*a58d3d2aSXin Li {
555*a58d3d2aSXin Li decision = SPREAD_AGGRESSIVE;
556*a58d3d2aSXin Li } else if (sum < 256)
557*a58d3d2aSXin Li {
558*a58d3d2aSXin Li decision = SPREAD_NORMAL;
559*a58d3d2aSXin Li } else if (sum < 384)
560*a58d3d2aSXin Li {
561*a58d3d2aSXin Li decision = SPREAD_LIGHT;
562*a58d3d2aSXin Li } else {
563*a58d3d2aSXin Li decision = SPREAD_NONE;
564*a58d3d2aSXin Li }
565*a58d3d2aSXin Li #ifdef FUZZING
566*a58d3d2aSXin Li decision = rand()&0x3;
567*a58d3d2aSXin Li *tapset_decision=rand()%3;
568*a58d3d2aSXin Li #endif
569*a58d3d2aSXin Li return decision;
570*a58d3d2aSXin Li }
571*a58d3d2aSXin Li
572*a58d3d2aSXin Li /* Indexing table for converting from natural Hadamard to ordery Hadamard
573*a58d3d2aSXin Li This is essentially a bit-reversed Gray, on top of which we've added
574*a58d3d2aSXin Li an inversion of the order because we want the DC at the end rather than
575*a58d3d2aSXin Li the beginning. The lines are for N=2, 4, 8, 16 */
576*a58d3d2aSXin Li static const int ordery_table[] = {
577*a58d3d2aSXin Li 1, 0,
578*a58d3d2aSXin Li 3, 0, 2, 1,
579*a58d3d2aSXin Li 7, 0, 4, 3, 6, 1, 5, 2,
580*a58d3d2aSXin Li 15, 0, 8, 7, 12, 3, 11, 4, 14, 1, 9, 6, 13, 2, 10, 5,
581*a58d3d2aSXin Li };
582*a58d3d2aSXin Li
deinterleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)583*a58d3d2aSXin Li static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
584*a58d3d2aSXin Li {
585*a58d3d2aSXin Li int i,j;
586*a58d3d2aSXin Li VARDECL(celt_norm, tmp);
587*a58d3d2aSXin Li int N;
588*a58d3d2aSXin Li SAVE_STACK;
589*a58d3d2aSXin Li N = N0*stride;
590*a58d3d2aSXin Li ALLOC(tmp, N, celt_norm);
591*a58d3d2aSXin Li celt_assert(stride>0);
592*a58d3d2aSXin Li if (hadamard)
593*a58d3d2aSXin Li {
594*a58d3d2aSXin Li const int *ordery = ordery_table+stride-2;
595*a58d3d2aSXin Li for (i=0;i<stride;i++)
596*a58d3d2aSXin Li {
597*a58d3d2aSXin Li for (j=0;j<N0;j++)
598*a58d3d2aSXin Li tmp[ordery[i]*N0+j] = X[j*stride+i];
599*a58d3d2aSXin Li }
600*a58d3d2aSXin Li } else {
601*a58d3d2aSXin Li for (i=0;i<stride;i++)
602*a58d3d2aSXin Li for (j=0;j<N0;j++)
603*a58d3d2aSXin Li tmp[i*N0+j] = X[j*stride+i];
604*a58d3d2aSXin Li }
605*a58d3d2aSXin Li OPUS_COPY(X, tmp, N);
606*a58d3d2aSXin Li RESTORE_STACK;
607*a58d3d2aSXin Li }
608*a58d3d2aSXin Li
interleave_hadamard(celt_norm * X,int N0,int stride,int hadamard)609*a58d3d2aSXin Li static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
610*a58d3d2aSXin Li {
611*a58d3d2aSXin Li int i,j;
612*a58d3d2aSXin Li VARDECL(celt_norm, tmp);
613*a58d3d2aSXin Li int N;
614*a58d3d2aSXin Li SAVE_STACK;
615*a58d3d2aSXin Li N = N0*stride;
616*a58d3d2aSXin Li ALLOC(tmp, N, celt_norm);
617*a58d3d2aSXin Li if (hadamard)
618*a58d3d2aSXin Li {
619*a58d3d2aSXin Li const int *ordery = ordery_table+stride-2;
620*a58d3d2aSXin Li for (i=0;i<stride;i++)
621*a58d3d2aSXin Li for (j=0;j<N0;j++)
622*a58d3d2aSXin Li tmp[j*stride+i] = X[ordery[i]*N0+j];
623*a58d3d2aSXin Li } else {
624*a58d3d2aSXin Li for (i=0;i<stride;i++)
625*a58d3d2aSXin Li for (j=0;j<N0;j++)
626*a58d3d2aSXin Li tmp[j*stride+i] = X[i*N0+j];
627*a58d3d2aSXin Li }
628*a58d3d2aSXin Li OPUS_COPY(X, tmp, N);
629*a58d3d2aSXin Li RESTORE_STACK;
630*a58d3d2aSXin Li }
631*a58d3d2aSXin Li
haar1(celt_norm * X,int N0,int stride)632*a58d3d2aSXin Li void haar1(celt_norm *X, int N0, int stride)
633*a58d3d2aSXin Li {
634*a58d3d2aSXin Li int i, j;
635*a58d3d2aSXin Li N0 >>= 1;
636*a58d3d2aSXin Li for (i=0;i<stride;i++)
637*a58d3d2aSXin Li for (j=0;j<N0;j++)
638*a58d3d2aSXin Li {
639*a58d3d2aSXin Li opus_val32 tmp1, tmp2;
640*a58d3d2aSXin Li tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
641*a58d3d2aSXin Li tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
642*a58d3d2aSXin Li X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
643*a58d3d2aSXin Li X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
644*a58d3d2aSXin Li }
645*a58d3d2aSXin Li }
646*a58d3d2aSXin Li
compute_qn(int N,int b,int offset,int pulse_cap,int stereo)647*a58d3d2aSXin Li static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
648*a58d3d2aSXin Li {
649*a58d3d2aSXin Li static const opus_int16 exp2_table8[8] =
650*a58d3d2aSXin Li {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
651*a58d3d2aSXin Li int qn, qb;
652*a58d3d2aSXin Li int N2 = 2*N-1;
653*a58d3d2aSXin Li if (stereo && N==2)
654*a58d3d2aSXin Li N2--;
655*a58d3d2aSXin Li /* The upper limit ensures that in a stereo split with itheta==16384, we'll
656*a58d3d2aSXin Li always have enough bits left over to code at least one pulse in the
657*a58d3d2aSXin Li side; otherwise it would collapse, since it doesn't get folded. */
658*a58d3d2aSXin Li qb = celt_sudiv(b+N2*offset, N2);
659*a58d3d2aSXin Li qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
660*a58d3d2aSXin Li
661*a58d3d2aSXin Li qb = IMIN(8<<BITRES, qb);
662*a58d3d2aSXin Li
663*a58d3d2aSXin Li if (qb<(1<<BITRES>>1)) {
664*a58d3d2aSXin Li qn = 1;
665*a58d3d2aSXin Li } else {
666*a58d3d2aSXin Li qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
667*a58d3d2aSXin Li qn = (qn+1)>>1<<1;
668*a58d3d2aSXin Li }
669*a58d3d2aSXin Li celt_assert(qn <= 256);
670*a58d3d2aSXin Li return qn;
671*a58d3d2aSXin Li }
672*a58d3d2aSXin Li
673*a58d3d2aSXin Li struct band_ctx {
674*a58d3d2aSXin Li int encode;
675*a58d3d2aSXin Li int resynth;
676*a58d3d2aSXin Li const CELTMode *m;
677*a58d3d2aSXin Li int i;
678*a58d3d2aSXin Li int intensity;
679*a58d3d2aSXin Li int spread;
680*a58d3d2aSXin Li int tf_change;
681*a58d3d2aSXin Li ec_ctx *ec;
682*a58d3d2aSXin Li opus_int32 remaining_bits;
683*a58d3d2aSXin Li const celt_ener *bandE;
684*a58d3d2aSXin Li opus_uint32 seed;
685*a58d3d2aSXin Li int arch;
686*a58d3d2aSXin Li int theta_round;
687*a58d3d2aSXin Li int disable_inv;
688*a58d3d2aSXin Li int avoid_split_noise;
689*a58d3d2aSXin Li };
690*a58d3d2aSXin Li
691*a58d3d2aSXin Li struct split_ctx {
692*a58d3d2aSXin Li int inv;
693*a58d3d2aSXin Li int imid;
694*a58d3d2aSXin Li int iside;
695*a58d3d2aSXin Li int delta;
696*a58d3d2aSXin Li int itheta;
697*a58d3d2aSXin Li int qalloc;
698*a58d3d2aSXin Li };
699*a58d3d2aSXin Li
compute_theta(struct band_ctx * ctx,struct split_ctx * sctx,celt_norm * X,celt_norm * Y,int N,int * b,int B,int B0,int LM,int stereo,int * fill)700*a58d3d2aSXin Li static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
701*a58d3d2aSXin Li celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
702*a58d3d2aSXin Li int LM,
703*a58d3d2aSXin Li int stereo, int *fill)
704*a58d3d2aSXin Li {
705*a58d3d2aSXin Li int qn;
706*a58d3d2aSXin Li int itheta=0;
707*a58d3d2aSXin Li int delta;
708*a58d3d2aSXin Li int imid, iside;
709*a58d3d2aSXin Li int qalloc;
710*a58d3d2aSXin Li int pulse_cap;
711*a58d3d2aSXin Li int offset;
712*a58d3d2aSXin Li opus_int32 tell;
713*a58d3d2aSXin Li int inv=0;
714*a58d3d2aSXin Li int encode;
715*a58d3d2aSXin Li const CELTMode *m;
716*a58d3d2aSXin Li int i;
717*a58d3d2aSXin Li int intensity;
718*a58d3d2aSXin Li ec_ctx *ec;
719*a58d3d2aSXin Li const celt_ener *bandE;
720*a58d3d2aSXin Li
721*a58d3d2aSXin Li encode = ctx->encode;
722*a58d3d2aSXin Li m = ctx->m;
723*a58d3d2aSXin Li i = ctx->i;
724*a58d3d2aSXin Li intensity = ctx->intensity;
725*a58d3d2aSXin Li ec = ctx->ec;
726*a58d3d2aSXin Li bandE = ctx->bandE;
727*a58d3d2aSXin Li
728*a58d3d2aSXin Li /* Decide on the resolution to give to the split parameter theta */
729*a58d3d2aSXin Li pulse_cap = m->logN[i]+LM*(1<<BITRES);
730*a58d3d2aSXin Li offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
731*a58d3d2aSXin Li qn = compute_qn(N, *b, offset, pulse_cap, stereo);
732*a58d3d2aSXin Li if (stereo && i>=intensity)
733*a58d3d2aSXin Li qn = 1;
734*a58d3d2aSXin Li if (encode)
735*a58d3d2aSXin Li {
736*a58d3d2aSXin Li /* theta is the atan() of the ratio between the (normalized)
737*a58d3d2aSXin Li side and mid. With just that parameter, we can re-scale both
738*a58d3d2aSXin Li mid and side because we know that 1) they have unit norm and
739*a58d3d2aSXin Li 2) they are orthogonal. */
740*a58d3d2aSXin Li itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
741*a58d3d2aSXin Li }
742*a58d3d2aSXin Li tell = ec_tell_frac(ec);
743*a58d3d2aSXin Li if (qn!=1)
744*a58d3d2aSXin Li {
745*a58d3d2aSXin Li if (encode)
746*a58d3d2aSXin Li {
747*a58d3d2aSXin Li if (!stereo || ctx->theta_round == 0)
748*a58d3d2aSXin Li {
749*a58d3d2aSXin Li itheta = (itheta*(opus_int32)qn+8192)>>14;
750*a58d3d2aSXin Li if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
751*a58d3d2aSXin Li {
752*a58d3d2aSXin Li /* Check if the selected value of theta will cause the bit allocation
753*a58d3d2aSXin Li to inject noise on one side. If so, make sure the energy of that side
754*a58d3d2aSXin Li is zero. */
755*a58d3d2aSXin Li int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
756*a58d3d2aSXin Li imid = bitexact_cos((opus_int16)unquantized);
757*a58d3d2aSXin Li iside = bitexact_cos((opus_int16)(16384-unquantized));
758*a58d3d2aSXin Li delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
759*a58d3d2aSXin Li if (delta > *b)
760*a58d3d2aSXin Li itheta = qn;
761*a58d3d2aSXin Li else if (delta < -*b)
762*a58d3d2aSXin Li itheta = 0;
763*a58d3d2aSXin Li }
764*a58d3d2aSXin Li } else {
765*a58d3d2aSXin Li int down;
766*a58d3d2aSXin Li /* Bias quantization towards itheta=0 and itheta=16384. */
767*a58d3d2aSXin Li int bias = itheta > 8192 ? 32767/qn : -32767/qn;
768*a58d3d2aSXin Li down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
769*a58d3d2aSXin Li if (ctx->theta_round < 0)
770*a58d3d2aSXin Li itheta = down;
771*a58d3d2aSXin Li else
772*a58d3d2aSXin Li itheta = down+1;
773*a58d3d2aSXin Li }
774*a58d3d2aSXin Li }
775*a58d3d2aSXin Li /* Entropy coding of the angle. We use a uniform pdf for the
776*a58d3d2aSXin Li time split, a step for stereo, and a triangular one for the rest. */
777*a58d3d2aSXin Li if (stereo && N>2)
778*a58d3d2aSXin Li {
779*a58d3d2aSXin Li int p0 = 3;
780*a58d3d2aSXin Li int x = itheta;
781*a58d3d2aSXin Li int x0 = qn/2;
782*a58d3d2aSXin Li int ft = p0*(x0+1) + x0;
783*a58d3d2aSXin Li /* Use a probability of p0 up to itheta=8192 and then use 1 after */
784*a58d3d2aSXin Li if (encode)
785*a58d3d2aSXin Li {
786*a58d3d2aSXin Li ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
787*a58d3d2aSXin Li } else {
788*a58d3d2aSXin Li int fs;
789*a58d3d2aSXin Li fs=ec_decode(ec,ft);
790*a58d3d2aSXin Li if (fs<(x0+1)*p0)
791*a58d3d2aSXin Li x=fs/p0;
792*a58d3d2aSXin Li else
793*a58d3d2aSXin Li x=x0+1+(fs-(x0+1)*p0);
794*a58d3d2aSXin Li ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
795*a58d3d2aSXin Li itheta = x;
796*a58d3d2aSXin Li }
797*a58d3d2aSXin Li } else if (B0>1 || stereo) {
798*a58d3d2aSXin Li /* Uniform pdf */
799*a58d3d2aSXin Li if (encode)
800*a58d3d2aSXin Li ec_enc_uint(ec, itheta, qn+1);
801*a58d3d2aSXin Li else
802*a58d3d2aSXin Li itheta = ec_dec_uint(ec, qn+1);
803*a58d3d2aSXin Li } else {
804*a58d3d2aSXin Li int fs=1, ft;
805*a58d3d2aSXin Li ft = ((qn>>1)+1)*((qn>>1)+1);
806*a58d3d2aSXin Li if (encode)
807*a58d3d2aSXin Li {
808*a58d3d2aSXin Li int fl;
809*a58d3d2aSXin Li
810*a58d3d2aSXin Li fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
811*a58d3d2aSXin Li fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
812*a58d3d2aSXin Li ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
813*a58d3d2aSXin Li
814*a58d3d2aSXin Li ec_encode(ec, fl, fl+fs, ft);
815*a58d3d2aSXin Li } else {
816*a58d3d2aSXin Li /* Triangular pdf */
817*a58d3d2aSXin Li int fl=0;
818*a58d3d2aSXin Li int fm;
819*a58d3d2aSXin Li fm = ec_decode(ec, ft);
820*a58d3d2aSXin Li
821*a58d3d2aSXin Li if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
822*a58d3d2aSXin Li {
823*a58d3d2aSXin Li itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
824*a58d3d2aSXin Li fs = itheta + 1;
825*a58d3d2aSXin Li fl = itheta*(itheta + 1)>>1;
826*a58d3d2aSXin Li }
827*a58d3d2aSXin Li else
828*a58d3d2aSXin Li {
829*a58d3d2aSXin Li itheta = (2*(qn + 1)
830*a58d3d2aSXin Li - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
831*a58d3d2aSXin Li fs = qn + 1 - itheta;
832*a58d3d2aSXin Li fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
833*a58d3d2aSXin Li }
834*a58d3d2aSXin Li
835*a58d3d2aSXin Li ec_dec_update(ec, fl, fl+fs, ft);
836*a58d3d2aSXin Li }
837*a58d3d2aSXin Li }
838*a58d3d2aSXin Li celt_assert(itheta>=0);
839*a58d3d2aSXin Li itheta = celt_udiv((opus_int32)itheta*16384, qn);
840*a58d3d2aSXin Li if (encode && stereo)
841*a58d3d2aSXin Li {
842*a58d3d2aSXin Li if (itheta==0)
843*a58d3d2aSXin Li intensity_stereo(m, X, Y, bandE, i, N);
844*a58d3d2aSXin Li else
845*a58d3d2aSXin Li stereo_split(X, Y, N);
846*a58d3d2aSXin Li }
847*a58d3d2aSXin Li /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
848*a58d3d2aSXin Li Let's do that at higher complexity */
849*a58d3d2aSXin Li } else if (stereo) {
850*a58d3d2aSXin Li if (encode)
851*a58d3d2aSXin Li {
852*a58d3d2aSXin Li inv = itheta > 8192 && !ctx->disable_inv;
853*a58d3d2aSXin Li if (inv)
854*a58d3d2aSXin Li {
855*a58d3d2aSXin Li int j;
856*a58d3d2aSXin Li for (j=0;j<N;j++)
857*a58d3d2aSXin Li Y[j] = -Y[j];
858*a58d3d2aSXin Li }
859*a58d3d2aSXin Li intensity_stereo(m, X, Y, bandE, i, N);
860*a58d3d2aSXin Li }
861*a58d3d2aSXin Li if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
862*a58d3d2aSXin Li {
863*a58d3d2aSXin Li if (encode)
864*a58d3d2aSXin Li ec_enc_bit_logp(ec, inv, 2);
865*a58d3d2aSXin Li else
866*a58d3d2aSXin Li inv = ec_dec_bit_logp(ec, 2);
867*a58d3d2aSXin Li } else
868*a58d3d2aSXin Li inv = 0;
869*a58d3d2aSXin Li /* inv flag override to avoid problems with downmixing. */
870*a58d3d2aSXin Li if (ctx->disable_inv)
871*a58d3d2aSXin Li inv = 0;
872*a58d3d2aSXin Li itheta = 0;
873*a58d3d2aSXin Li }
874*a58d3d2aSXin Li qalloc = ec_tell_frac(ec) - tell;
875*a58d3d2aSXin Li *b -= qalloc;
876*a58d3d2aSXin Li
877*a58d3d2aSXin Li if (itheta == 0)
878*a58d3d2aSXin Li {
879*a58d3d2aSXin Li imid = 32767;
880*a58d3d2aSXin Li iside = 0;
881*a58d3d2aSXin Li *fill &= (1<<B)-1;
882*a58d3d2aSXin Li delta = -16384;
883*a58d3d2aSXin Li } else if (itheta == 16384)
884*a58d3d2aSXin Li {
885*a58d3d2aSXin Li imid = 0;
886*a58d3d2aSXin Li iside = 32767;
887*a58d3d2aSXin Li *fill &= ((1<<B)-1)<<B;
888*a58d3d2aSXin Li delta = 16384;
889*a58d3d2aSXin Li } else {
890*a58d3d2aSXin Li imid = bitexact_cos((opus_int16)itheta);
891*a58d3d2aSXin Li iside = bitexact_cos((opus_int16)(16384-itheta));
892*a58d3d2aSXin Li /* This is the mid vs side allocation that minimizes squared error
893*a58d3d2aSXin Li in that band. */
894*a58d3d2aSXin Li delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
895*a58d3d2aSXin Li }
896*a58d3d2aSXin Li
897*a58d3d2aSXin Li sctx->inv = inv;
898*a58d3d2aSXin Li sctx->imid = imid;
899*a58d3d2aSXin Li sctx->iside = iside;
900*a58d3d2aSXin Li sctx->delta = delta;
901*a58d3d2aSXin Li sctx->itheta = itheta;
902*a58d3d2aSXin Li sctx->qalloc = qalloc;
903*a58d3d2aSXin Li }
quant_band_n1(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,celt_norm * lowband_out)904*a58d3d2aSXin Li static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
905*a58d3d2aSXin Li celt_norm *lowband_out)
906*a58d3d2aSXin Li {
907*a58d3d2aSXin Li int c;
908*a58d3d2aSXin Li int stereo;
909*a58d3d2aSXin Li celt_norm *x = X;
910*a58d3d2aSXin Li int encode;
911*a58d3d2aSXin Li ec_ctx *ec;
912*a58d3d2aSXin Li
913*a58d3d2aSXin Li encode = ctx->encode;
914*a58d3d2aSXin Li ec = ctx->ec;
915*a58d3d2aSXin Li
916*a58d3d2aSXin Li stereo = Y != NULL;
917*a58d3d2aSXin Li c=0; do {
918*a58d3d2aSXin Li int sign=0;
919*a58d3d2aSXin Li if (ctx->remaining_bits>=1<<BITRES)
920*a58d3d2aSXin Li {
921*a58d3d2aSXin Li if (encode)
922*a58d3d2aSXin Li {
923*a58d3d2aSXin Li sign = x[0]<0;
924*a58d3d2aSXin Li ec_enc_bits(ec, sign, 1);
925*a58d3d2aSXin Li } else {
926*a58d3d2aSXin Li sign = ec_dec_bits(ec, 1);
927*a58d3d2aSXin Li }
928*a58d3d2aSXin Li ctx->remaining_bits -= 1<<BITRES;
929*a58d3d2aSXin Li }
930*a58d3d2aSXin Li if (ctx->resynth)
931*a58d3d2aSXin Li x[0] = sign ? -NORM_SCALING : NORM_SCALING;
932*a58d3d2aSXin Li x = Y;
933*a58d3d2aSXin Li } while (++c<1+stereo);
934*a58d3d2aSXin Li if (lowband_out)
935*a58d3d2aSXin Li lowband_out[0] = SHR16(X[0],4);
936*a58d3d2aSXin Li return 1;
937*a58d3d2aSXin Li }
938*a58d3d2aSXin Li
939*a58d3d2aSXin Li /* This function is responsible for encoding and decoding a mono partition.
940*a58d3d2aSXin Li It can split the band in two and transmit the energy difference with
941*a58d3d2aSXin Li the two half-bands. It can be called recursively so bands can end up being
942*a58d3d2aSXin Li split in 8 parts. */
quant_partition(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,opus_val16 gain,int fill)943*a58d3d2aSXin Li static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
944*a58d3d2aSXin Li int N, int b, int B, celt_norm *lowband,
945*a58d3d2aSXin Li int LM,
946*a58d3d2aSXin Li opus_val16 gain, int fill)
947*a58d3d2aSXin Li {
948*a58d3d2aSXin Li const unsigned char *cache;
949*a58d3d2aSXin Li int q;
950*a58d3d2aSXin Li int curr_bits;
951*a58d3d2aSXin Li int imid=0, iside=0;
952*a58d3d2aSXin Li int B0=B;
953*a58d3d2aSXin Li opus_val16 mid=0, side=0;
954*a58d3d2aSXin Li unsigned cm=0;
955*a58d3d2aSXin Li celt_norm *Y=NULL;
956*a58d3d2aSXin Li int encode;
957*a58d3d2aSXin Li const CELTMode *m;
958*a58d3d2aSXin Li int i;
959*a58d3d2aSXin Li int spread;
960*a58d3d2aSXin Li ec_ctx *ec;
961*a58d3d2aSXin Li
962*a58d3d2aSXin Li encode = ctx->encode;
963*a58d3d2aSXin Li m = ctx->m;
964*a58d3d2aSXin Li i = ctx->i;
965*a58d3d2aSXin Li spread = ctx->spread;
966*a58d3d2aSXin Li ec = ctx->ec;
967*a58d3d2aSXin Li
968*a58d3d2aSXin Li /* If we need 1.5 more bit than we can produce, split the band in two. */
969*a58d3d2aSXin Li cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
970*a58d3d2aSXin Li if (LM != -1 && b > cache[cache[0]]+12 && N>2)
971*a58d3d2aSXin Li {
972*a58d3d2aSXin Li int mbits, sbits, delta;
973*a58d3d2aSXin Li int itheta;
974*a58d3d2aSXin Li int qalloc;
975*a58d3d2aSXin Li struct split_ctx sctx;
976*a58d3d2aSXin Li celt_norm *next_lowband2=NULL;
977*a58d3d2aSXin Li opus_int32 rebalance;
978*a58d3d2aSXin Li
979*a58d3d2aSXin Li N >>= 1;
980*a58d3d2aSXin Li Y = X+N;
981*a58d3d2aSXin Li LM -= 1;
982*a58d3d2aSXin Li if (B==1)
983*a58d3d2aSXin Li fill = (fill&1)|(fill<<1);
984*a58d3d2aSXin Li B = (B+1)>>1;
985*a58d3d2aSXin Li
986*a58d3d2aSXin Li compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill);
987*a58d3d2aSXin Li imid = sctx.imid;
988*a58d3d2aSXin Li iside = sctx.iside;
989*a58d3d2aSXin Li delta = sctx.delta;
990*a58d3d2aSXin Li itheta = sctx.itheta;
991*a58d3d2aSXin Li qalloc = sctx.qalloc;
992*a58d3d2aSXin Li #ifdef FIXED_POINT
993*a58d3d2aSXin Li mid = imid;
994*a58d3d2aSXin Li side = iside;
995*a58d3d2aSXin Li #else
996*a58d3d2aSXin Li mid = (1.f/32768)*imid;
997*a58d3d2aSXin Li side = (1.f/32768)*iside;
998*a58d3d2aSXin Li #endif
999*a58d3d2aSXin Li
1000*a58d3d2aSXin Li /* Give more bits to low-energy MDCTs than they would otherwise deserve */
1001*a58d3d2aSXin Li if (B0>1 && (itheta&0x3fff))
1002*a58d3d2aSXin Li {
1003*a58d3d2aSXin Li if (itheta > 8192)
1004*a58d3d2aSXin Li /* Rough approximation for pre-echo masking */
1005*a58d3d2aSXin Li delta -= delta>>(4-LM);
1006*a58d3d2aSXin Li else
1007*a58d3d2aSXin Li /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
1008*a58d3d2aSXin Li delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
1009*a58d3d2aSXin Li }
1010*a58d3d2aSXin Li mbits = IMAX(0, IMIN(b, (b-delta)/2));
1011*a58d3d2aSXin Li sbits = b-mbits;
1012*a58d3d2aSXin Li ctx->remaining_bits -= qalloc;
1013*a58d3d2aSXin Li
1014*a58d3d2aSXin Li if (lowband)
1015*a58d3d2aSXin Li next_lowband2 = lowband+N; /* >32-bit split case */
1016*a58d3d2aSXin Li
1017*a58d3d2aSXin Li rebalance = ctx->remaining_bits;
1018*a58d3d2aSXin Li if (mbits >= sbits)
1019*a58d3d2aSXin Li {
1020*a58d3d2aSXin Li cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
1021*a58d3d2aSXin Li MULT16_16_P15(gain,mid), fill);
1022*a58d3d2aSXin Li rebalance = mbits - (rebalance-ctx->remaining_bits);
1023*a58d3d2aSXin Li if (rebalance > 3<<BITRES && itheta!=0)
1024*a58d3d2aSXin Li sbits += rebalance - (3<<BITRES);
1025*a58d3d2aSXin Li cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1026*a58d3d2aSXin Li MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1027*a58d3d2aSXin Li } else {
1028*a58d3d2aSXin Li cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1029*a58d3d2aSXin Li MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
1030*a58d3d2aSXin Li rebalance = sbits - (rebalance-ctx->remaining_bits);
1031*a58d3d2aSXin Li if (rebalance > 3<<BITRES && itheta!=16384)
1032*a58d3d2aSXin Li mbits += rebalance - (3<<BITRES);
1033*a58d3d2aSXin Li cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
1034*a58d3d2aSXin Li MULT16_16_P15(gain,mid), fill);
1035*a58d3d2aSXin Li }
1036*a58d3d2aSXin Li } else {
1037*a58d3d2aSXin Li /* This is the basic no-split case */
1038*a58d3d2aSXin Li q = bits2pulses(m, i, LM, b);
1039*a58d3d2aSXin Li curr_bits = pulses2bits(m, i, LM, q);
1040*a58d3d2aSXin Li ctx->remaining_bits -= curr_bits;
1041*a58d3d2aSXin Li
1042*a58d3d2aSXin Li /* Ensures we can never bust the budget */
1043*a58d3d2aSXin Li while (ctx->remaining_bits < 0 && q > 0)
1044*a58d3d2aSXin Li {
1045*a58d3d2aSXin Li ctx->remaining_bits += curr_bits;
1046*a58d3d2aSXin Li q--;
1047*a58d3d2aSXin Li curr_bits = pulses2bits(m, i, LM, q);
1048*a58d3d2aSXin Li ctx->remaining_bits -= curr_bits;
1049*a58d3d2aSXin Li }
1050*a58d3d2aSXin Li
1051*a58d3d2aSXin Li if (q!=0)
1052*a58d3d2aSXin Li {
1053*a58d3d2aSXin Li int K = get_pulses(q);
1054*a58d3d2aSXin Li
1055*a58d3d2aSXin Li /* Finally do the actual quantization */
1056*a58d3d2aSXin Li if (encode)
1057*a58d3d2aSXin Li {
1058*a58d3d2aSXin Li cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth, ctx->arch);
1059*a58d3d2aSXin Li } else {
1060*a58d3d2aSXin Li cm = alg_unquant(X, N, K, spread, B, ec, gain);
1061*a58d3d2aSXin Li }
1062*a58d3d2aSXin Li } else {
1063*a58d3d2aSXin Li /* If there's no pulse, fill the band anyway */
1064*a58d3d2aSXin Li int j;
1065*a58d3d2aSXin Li if (ctx->resynth)
1066*a58d3d2aSXin Li {
1067*a58d3d2aSXin Li unsigned cm_mask;
1068*a58d3d2aSXin Li /* B can be as large as 16, so this shift might overflow an int on a
1069*a58d3d2aSXin Li 16-bit platform; use a long to get defined behavior.*/
1070*a58d3d2aSXin Li cm_mask = (unsigned)(1UL<<B)-1;
1071*a58d3d2aSXin Li fill &= cm_mask;
1072*a58d3d2aSXin Li if (!fill)
1073*a58d3d2aSXin Li {
1074*a58d3d2aSXin Li OPUS_CLEAR(X, N);
1075*a58d3d2aSXin Li } else {
1076*a58d3d2aSXin Li if (lowband == NULL)
1077*a58d3d2aSXin Li {
1078*a58d3d2aSXin Li /* Noise */
1079*a58d3d2aSXin Li for (j=0;j<N;j++)
1080*a58d3d2aSXin Li {
1081*a58d3d2aSXin Li ctx->seed = celt_lcg_rand(ctx->seed);
1082*a58d3d2aSXin Li X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
1083*a58d3d2aSXin Li }
1084*a58d3d2aSXin Li cm = cm_mask;
1085*a58d3d2aSXin Li } else {
1086*a58d3d2aSXin Li /* Folded spectrum */
1087*a58d3d2aSXin Li for (j=0;j<N;j++)
1088*a58d3d2aSXin Li {
1089*a58d3d2aSXin Li opus_val16 tmp;
1090*a58d3d2aSXin Li ctx->seed = celt_lcg_rand(ctx->seed);
1091*a58d3d2aSXin Li /* About 48 dB below the "normal" folding level */
1092*a58d3d2aSXin Li tmp = QCONST16(1.0f/256, 10);
1093*a58d3d2aSXin Li tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1094*a58d3d2aSXin Li X[j] = lowband[j]+tmp;
1095*a58d3d2aSXin Li }
1096*a58d3d2aSXin Li cm = fill;
1097*a58d3d2aSXin Li }
1098*a58d3d2aSXin Li renormalise_vector(X, N, gain, ctx->arch);
1099*a58d3d2aSXin Li }
1100*a58d3d2aSXin Li }
1101*a58d3d2aSXin Li }
1102*a58d3d2aSXin Li }
1103*a58d3d2aSXin Li
1104*a58d3d2aSXin Li return cm;
1105*a58d3d2aSXin Li }
1106*a58d3d2aSXin Li
1107*a58d3d2aSXin Li
1108*a58d3d2aSXin Li /* This function is responsible for encoding and decoding a band for the mono case. */
quant_band(struct band_ctx * ctx,celt_norm * X,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,opus_val16 gain,celt_norm * lowband_scratch,int fill)1109*a58d3d2aSXin Li static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1110*a58d3d2aSXin Li int N, int b, int B, celt_norm *lowband,
1111*a58d3d2aSXin Li int LM, celt_norm *lowband_out,
1112*a58d3d2aSXin Li opus_val16 gain, celt_norm *lowband_scratch, int fill)
1113*a58d3d2aSXin Li {
1114*a58d3d2aSXin Li int N0=N;
1115*a58d3d2aSXin Li int N_B=N;
1116*a58d3d2aSXin Li int N_B0;
1117*a58d3d2aSXin Li int B0=B;
1118*a58d3d2aSXin Li int time_divide=0;
1119*a58d3d2aSXin Li int recombine=0;
1120*a58d3d2aSXin Li int longBlocks;
1121*a58d3d2aSXin Li unsigned cm=0;
1122*a58d3d2aSXin Li int k;
1123*a58d3d2aSXin Li int encode;
1124*a58d3d2aSXin Li int tf_change;
1125*a58d3d2aSXin Li
1126*a58d3d2aSXin Li encode = ctx->encode;
1127*a58d3d2aSXin Li tf_change = ctx->tf_change;
1128*a58d3d2aSXin Li
1129*a58d3d2aSXin Li longBlocks = B0==1;
1130*a58d3d2aSXin Li
1131*a58d3d2aSXin Li N_B = celt_udiv(N_B, B);
1132*a58d3d2aSXin Li
1133*a58d3d2aSXin Li /* Special case for one sample */
1134*a58d3d2aSXin Li if (N==1)
1135*a58d3d2aSXin Li {
1136*a58d3d2aSXin Li return quant_band_n1(ctx, X, NULL, lowband_out);
1137*a58d3d2aSXin Li }
1138*a58d3d2aSXin Li
1139*a58d3d2aSXin Li if (tf_change>0)
1140*a58d3d2aSXin Li recombine = tf_change;
1141*a58d3d2aSXin Li /* Band recombining to increase frequency resolution */
1142*a58d3d2aSXin Li
1143*a58d3d2aSXin Li if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1144*a58d3d2aSXin Li {
1145*a58d3d2aSXin Li OPUS_COPY(lowband_scratch, lowband, N);
1146*a58d3d2aSXin Li lowband = lowband_scratch;
1147*a58d3d2aSXin Li }
1148*a58d3d2aSXin Li
1149*a58d3d2aSXin Li for (k=0;k<recombine;k++)
1150*a58d3d2aSXin Li {
1151*a58d3d2aSXin Li static const unsigned char bit_interleave_table[16]={
1152*a58d3d2aSXin Li 0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1153*a58d3d2aSXin Li };
1154*a58d3d2aSXin Li if (encode)
1155*a58d3d2aSXin Li haar1(X, N>>k, 1<<k);
1156*a58d3d2aSXin Li if (lowband)
1157*a58d3d2aSXin Li haar1(lowband, N>>k, 1<<k);
1158*a58d3d2aSXin Li fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1159*a58d3d2aSXin Li }
1160*a58d3d2aSXin Li B>>=recombine;
1161*a58d3d2aSXin Li N_B<<=recombine;
1162*a58d3d2aSXin Li
1163*a58d3d2aSXin Li /* Increasing the time resolution */
1164*a58d3d2aSXin Li while ((N_B&1) == 0 && tf_change<0)
1165*a58d3d2aSXin Li {
1166*a58d3d2aSXin Li if (encode)
1167*a58d3d2aSXin Li haar1(X, N_B, B);
1168*a58d3d2aSXin Li if (lowband)
1169*a58d3d2aSXin Li haar1(lowband, N_B, B);
1170*a58d3d2aSXin Li fill |= fill<<B;
1171*a58d3d2aSXin Li B <<= 1;
1172*a58d3d2aSXin Li N_B >>= 1;
1173*a58d3d2aSXin Li time_divide++;
1174*a58d3d2aSXin Li tf_change++;
1175*a58d3d2aSXin Li }
1176*a58d3d2aSXin Li B0=B;
1177*a58d3d2aSXin Li N_B0 = N_B;
1178*a58d3d2aSXin Li
1179*a58d3d2aSXin Li /* Reorganize the samples in time order instead of frequency order */
1180*a58d3d2aSXin Li if (B0>1)
1181*a58d3d2aSXin Li {
1182*a58d3d2aSXin Li if (encode)
1183*a58d3d2aSXin Li deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1184*a58d3d2aSXin Li if (lowband)
1185*a58d3d2aSXin Li deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1186*a58d3d2aSXin Li }
1187*a58d3d2aSXin Li
1188*a58d3d2aSXin Li cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill);
1189*a58d3d2aSXin Li
1190*a58d3d2aSXin Li /* This code is used by the decoder and by the resynthesis-enabled encoder */
1191*a58d3d2aSXin Li if (ctx->resynth)
1192*a58d3d2aSXin Li {
1193*a58d3d2aSXin Li /* Undo the sample reorganization going from time order to frequency order */
1194*a58d3d2aSXin Li if (B0>1)
1195*a58d3d2aSXin Li interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1196*a58d3d2aSXin Li
1197*a58d3d2aSXin Li /* Undo time-freq changes that we did earlier */
1198*a58d3d2aSXin Li N_B = N_B0;
1199*a58d3d2aSXin Li B = B0;
1200*a58d3d2aSXin Li for (k=0;k<time_divide;k++)
1201*a58d3d2aSXin Li {
1202*a58d3d2aSXin Li B >>= 1;
1203*a58d3d2aSXin Li N_B <<= 1;
1204*a58d3d2aSXin Li cm |= cm>>B;
1205*a58d3d2aSXin Li haar1(X, N_B, B);
1206*a58d3d2aSXin Li }
1207*a58d3d2aSXin Li
1208*a58d3d2aSXin Li for (k=0;k<recombine;k++)
1209*a58d3d2aSXin Li {
1210*a58d3d2aSXin Li static const unsigned char bit_deinterleave_table[16]={
1211*a58d3d2aSXin Li 0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1212*a58d3d2aSXin Li 0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1213*a58d3d2aSXin Li };
1214*a58d3d2aSXin Li cm = bit_deinterleave_table[cm];
1215*a58d3d2aSXin Li haar1(X, N0>>k, 1<<k);
1216*a58d3d2aSXin Li }
1217*a58d3d2aSXin Li B<<=recombine;
1218*a58d3d2aSXin Li
1219*a58d3d2aSXin Li /* Scale output for later folding */
1220*a58d3d2aSXin Li if (lowband_out)
1221*a58d3d2aSXin Li {
1222*a58d3d2aSXin Li int j;
1223*a58d3d2aSXin Li opus_val16 n;
1224*a58d3d2aSXin Li n = celt_sqrt(SHL32(EXTEND32(N0),22));
1225*a58d3d2aSXin Li for (j=0;j<N0;j++)
1226*a58d3d2aSXin Li lowband_out[j] = MULT16_16_Q15(n,X[j]);
1227*a58d3d2aSXin Li }
1228*a58d3d2aSXin Li cm &= (1<<B)-1;
1229*a58d3d2aSXin Li }
1230*a58d3d2aSXin Li return cm;
1231*a58d3d2aSXin Li }
1232*a58d3d2aSXin Li
1233*a58d3d2aSXin Li
1234*a58d3d2aSXin Li /* This function is responsible for encoding and decoding a band for the stereo case. */
quant_band_stereo(struct band_ctx * ctx,celt_norm * X,celt_norm * Y,int N,int b,int B,celt_norm * lowband,int LM,celt_norm * lowband_out,celt_norm * lowband_scratch,int fill)1235*a58d3d2aSXin Li static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1236*a58d3d2aSXin Li int N, int b, int B, celt_norm *lowband,
1237*a58d3d2aSXin Li int LM, celt_norm *lowband_out,
1238*a58d3d2aSXin Li celt_norm *lowband_scratch, int fill)
1239*a58d3d2aSXin Li {
1240*a58d3d2aSXin Li int imid=0, iside=0;
1241*a58d3d2aSXin Li int inv = 0;
1242*a58d3d2aSXin Li opus_val16 mid=0, side=0;
1243*a58d3d2aSXin Li unsigned cm=0;
1244*a58d3d2aSXin Li int mbits, sbits, delta;
1245*a58d3d2aSXin Li int itheta;
1246*a58d3d2aSXin Li int qalloc;
1247*a58d3d2aSXin Li struct split_ctx sctx;
1248*a58d3d2aSXin Li int orig_fill;
1249*a58d3d2aSXin Li int encode;
1250*a58d3d2aSXin Li ec_ctx *ec;
1251*a58d3d2aSXin Li
1252*a58d3d2aSXin Li encode = ctx->encode;
1253*a58d3d2aSXin Li ec = ctx->ec;
1254*a58d3d2aSXin Li
1255*a58d3d2aSXin Li /* Special case for one sample */
1256*a58d3d2aSXin Li if (N==1)
1257*a58d3d2aSXin Li {
1258*a58d3d2aSXin Li return quant_band_n1(ctx, X, Y, lowband_out);
1259*a58d3d2aSXin Li }
1260*a58d3d2aSXin Li
1261*a58d3d2aSXin Li orig_fill = fill;
1262*a58d3d2aSXin Li
1263*a58d3d2aSXin Li compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill);
1264*a58d3d2aSXin Li inv = sctx.inv;
1265*a58d3d2aSXin Li imid = sctx.imid;
1266*a58d3d2aSXin Li iside = sctx.iside;
1267*a58d3d2aSXin Li delta = sctx.delta;
1268*a58d3d2aSXin Li itheta = sctx.itheta;
1269*a58d3d2aSXin Li qalloc = sctx.qalloc;
1270*a58d3d2aSXin Li #ifdef FIXED_POINT
1271*a58d3d2aSXin Li mid = imid;
1272*a58d3d2aSXin Li side = iside;
1273*a58d3d2aSXin Li #else
1274*a58d3d2aSXin Li mid = (1.f/32768)*imid;
1275*a58d3d2aSXin Li side = (1.f/32768)*iside;
1276*a58d3d2aSXin Li #endif
1277*a58d3d2aSXin Li
1278*a58d3d2aSXin Li /* This is a special case for N=2 that only works for stereo and takes
1279*a58d3d2aSXin Li advantage of the fact that mid and side are orthogonal to encode
1280*a58d3d2aSXin Li the side with just one bit. */
1281*a58d3d2aSXin Li if (N==2)
1282*a58d3d2aSXin Li {
1283*a58d3d2aSXin Li int c;
1284*a58d3d2aSXin Li int sign=0;
1285*a58d3d2aSXin Li celt_norm *x2, *y2;
1286*a58d3d2aSXin Li mbits = b;
1287*a58d3d2aSXin Li sbits = 0;
1288*a58d3d2aSXin Li /* Only need one bit for the side. */
1289*a58d3d2aSXin Li if (itheta != 0 && itheta != 16384)
1290*a58d3d2aSXin Li sbits = 1<<BITRES;
1291*a58d3d2aSXin Li mbits -= sbits;
1292*a58d3d2aSXin Li c = itheta > 8192;
1293*a58d3d2aSXin Li ctx->remaining_bits -= qalloc+sbits;
1294*a58d3d2aSXin Li
1295*a58d3d2aSXin Li x2 = c ? Y : X;
1296*a58d3d2aSXin Li y2 = c ? X : Y;
1297*a58d3d2aSXin Li if (sbits)
1298*a58d3d2aSXin Li {
1299*a58d3d2aSXin Li if (encode)
1300*a58d3d2aSXin Li {
1301*a58d3d2aSXin Li /* Here we only need to encode a sign for the side. */
1302*a58d3d2aSXin Li sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
1303*a58d3d2aSXin Li ec_enc_bits(ec, sign, 1);
1304*a58d3d2aSXin Li } else {
1305*a58d3d2aSXin Li sign = ec_dec_bits(ec, 1);
1306*a58d3d2aSXin Li }
1307*a58d3d2aSXin Li }
1308*a58d3d2aSXin Li sign = 1-2*sign;
1309*a58d3d2aSXin Li /* We use orig_fill here because we want to fold the side, but if
1310*a58d3d2aSXin Li itheta==16384, we'll have cleared the low bits of fill. */
1311*a58d3d2aSXin Li cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1312*a58d3d2aSXin Li lowband_scratch, orig_fill);
1313*a58d3d2aSXin Li /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1314*a58d3d2aSXin Li and there's no need to worry about mixing with the other channel. */
1315*a58d3d2aSXin Li y2[0] = -sign*x2[1];
1316*a58d3d2aSXin Li y2[1] = sign*x2[0];
1317*a58d3d2aSXin Li if (ctx->resynth)
1318*a58d3d2aSXin Li {
1319*a58d3d2aSXin Li celt_norm tmp;
1320*a58d3d2aSXin Li X[0] = MULT16_16_Q15(mid, X[0]);
1321*a58d3d2aSXin Li X[1] = MULT16_16_Q15(mid, X[1]);
1322*a58d3d2aSXin Li Y[0] = MULT16_16_Q15(side, Y[0]);
1323*a58d3d2aSXin Li Y[1] = MULT16_16_Q15(side, Y[1]);
1324*a58d3d2aSXin Li tmp = X[0];
1325*a58d3d2aSXin Li X[0] = SUB16(tmp,Y[0]);
1326*a58d3d2aSXin Li Y[0] = ADD16(tmp,Y[0]);
1327*a58d3d2aSXin Li tmp = X[1];
1328*a58d3d2aSXin Li X[1] = SUB16(tmp,Y[1]);
1329*a58d3d2aSXin Li Y[1] = ADD16(tmp,Y[1]);
1330*a58d3d2aSXin Li }
1331*a58d3d2aSXin Li } else {
1332*a58d3d2aSXin Li /* "Normal" split code */
1333*a58d3d2aSXin Li opus_int32 rebalance;
1334*a58d3d2aSXin Li
1335*a58d3d2aSXin Li mbits = IMAX(0, IMIN(b, (b-delta)/2));
1336*a58d3d2aSXin Li sbits = b-mbits;
1337*a58d3d2aSXin Li ctx->remaining_bits -= qalloc;
1338*a58d3d2aSXin Li
1339*a58d3d2aSXin Li rebalance = ctx->remaining_bits;
1340*a58d3d2aSXin Li if (mbits >= sbits)
1341*a58d3d2aSXin Li {
1342*a58d3d2aSXin Li /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1343*a58d3d2aSXin Li mid for folding later. */
1344*a58d3d2aSXin Li cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1345*a58d3d2aSXin Li lowband_scratch, fill);
1346*a58d3d2aSXin Li rebalance = mbits - (rebalance-ctx->remaining_bits);
1347*a58d3d2aSXin Li if (rebalance > 3<<BITRES && itheta!=0)
1348*a58d3d2aSXin Li sbits += rebalance - (3<<BITRES);
1349*a58d3d2aSXin Li
1350*a58d3d2aSXin Li /* For a stereo split, the high bits of fill are always zero, so no
1351*a58d3d2aSXin Li folding will be done to the side. */
1352*a58d3d2aSXin Li cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1353*a58d3d2aSXin Li } else {
1354*a58d3d2aSXin Li /* For a stereo split, the high bits of fill are always zero, so no
1355*a58d3d2aSXin Li folding will be done to the side. */
1356*a58d3d2aSXin Li cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B);
1357*a58d3d2aSXin Li rebalance = sbits - (rebalance-ctx->remaining_bits);
1358*a58d3d2aSXin Li if (rebalance > 3<<BITRES && itheta!=16384)
1359*a58d3d2aSXin Li mbits += rebalance - (3<<BITRES);
1360*a58d3d2aSXin Li /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1361*a58d3d2aSXin Li mid for folding later. */
1362*a58d3d2aSXin Li cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
1363*a58d3d2aSXin Li lowband_scratch, fill);
1364*a58d3d2aSXin Li }
1365*a58d3d2aSXin Li }
1366*a58d3d2aSXin Li
1367*a58d3d2aSXin Li
1368*a58d3d2aSXin Li /* This code is used by the decoder and by the resynthesis-enabled encoder */
1369*a58d3d2aSXin Li if (ctx->resynth)
1370*a58d3d2aSXin Li {
1371*a58d3d2aSXin Li if (N!=2)
1372*a58d3d2aSXin Li stereo_merge(X, Y, mid, N, ctx->arch);
1373*a58d3d2aSXin Li if (inv)
1374*a58d3d2aSXin Li {
1375*a58d3d2aSXin Li int j;
1376*a58d3d2aSXin Li for (j=0;j<N;j++)
1377*a58d3d2aSXin Li Y[j] = -Y[j];
1378*a58d3d2aSXin Li }
1379*a58d3d2aSXin Li }
1380*a58d3d2aSXin Li return cm;
1381*a58d3d2aSXin Li }
1382*a58d3d2aSXin Li
1383*a58d3d2aSXin Li #ifndef DISABLE_UPDATE_DRAFT
special_hybrid_folding(const CELTMode * m,celt_norm * norm,celt_norm * norm2,int start,int M,int dual_stereo)1384*a58d3d2aSXin Li static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1385*a58d3d2aSXin Li {
1386*a58d3d2aSXin Li int n1, n2;
1387*a58d3d2aSXin Li const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1388*a58d3d2aSXin Li n1 = M*(eBands[start+1]-eBands[start]);
1389*a58d3d2aSXin Li n2 = M*(eBands[start+2]-eBands[start+1]);
1390*a58d3d2aSXin Li /* Duplicate enough of the first band folding data to be able to fold the second band.
1391*a58d3d2aSXin Li Copies no data for CELT-only mode. */
1392*a58d3d2aSXin Li OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1393*a58d3d2aSXin Li if (dual_stereo)
1394*a58d3d2aSXin Li OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1395*a58d3d2aSXin Li }
1396*a58d3d2aSXin Li #endif
1397*a58d3d2aSXin Li
quant_all_bands(int encode,const CELTMode * m,int start,int end,celt_norm * X_,celt_norm * Y_,unsigned char * collapse_masks,const celt_ener * bandE,int * pulses,int shortBlocks,int spread,int dual_stereo,int intensity,int * tf_res,opus_int32 total_bits,opus_int32 balance,ec_ctx * ec,int LM,int codedBands,opus_uint32 * seed,int complexity,int arch,int disable_inv)1398*a58d3d2aSXin Li void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1399*a58d3d2aSXin Li celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1400*a58d3d2aSXin Li const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1401*a58d3d2aSXin Li int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1402*a58d3d2aSXin Li opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1403*a58d3d2aSXin Li opus_uint32 *seed, int complexity, int arch, int disable_inv)
1404*a58d3d2aSXin Li {
1405*a58d3d2aSXin Li int i;
1406*a58d3d2aSXin Li opus_int32 remaining_bits;
1407*a58d3d2aSXin Li const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1408*a58d3d2aSXin Li celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1409*a58d3d2aSXin Li VARDECL(celt_norm, _norm);
1410*a58d3d2aSXin Li VARDECL(celt_norm, _lowband_scratch);
1411*a58d3d2aSXin Li VARDECL(celt_norm, X_save);
1412*a58d3d2aSXin Li VARDECL(celt_norm, Y_save);
1413*a58d3d2aSXin Li VARDECL(celt_norm, X_save2);
1414*a58d3d2aSXin Li VARDECL(celt_norm, Y_save2);
1415*a58d3d2aSXin Li VARDECL(celt_norm, norm_save2);
1416*a58d3d2aSXin Li int resynth_alloc;
1417*a58d3d2aSXin Li celt_norm *lowband_scratch;
1418*a58d3d2aSXin Li int B;
1419*a58d3d2aSXin Li int M;
1420*a58d3d2aSXin Li int lowband_offset;
1421*a58d3d2aSXin Li int update_lowband = 1;
1422*a58d3d2aSXin Li int C = Y_ != NULL ? 2 : 1;
1423*a58d3d2aSXin Li int norm_offset;
1424*a58d3d2aSXin Li int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1425*a58d3d2aSXin Li #ifdef RESYNTH
1426*a58d3d2aSXin Li int resynth = 1;
1427*a58d3d2aSXin Li #else
1428*a58d3d2aSXin Li int resynth = !encode || theta_rdo;
1429*a58d3d2aSXin Li #endif
1430*a58d3d2aSXin Li struct band_ctx ctx;
1431*a58d3d2aSXin Li SAVE_STACK;
1432*a58d3d2aSXin Li
1433*a58d3d2aSXin Li M = 1<<LM;
1434*a58d3d2aSXin Li B = shortBlocks ? M : 1;
1435*a58d3d2aSXin Li norm_offset = M*eBands[start];
1436*a58d3d2aSXin Li /* No need to allocate norm for the last band because we don't need an
1437*a58d3d2aSXin Li output in that band. */
1438*a58d3d2aSXin Li ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1439*a58d3d2aSXin Li norm = _norm;
1440*a58d3d2aSXin Li norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1441*a58d3d2aSXin Li
1442*a58d3d2aSXin Li /* For decoding, we can use the last band as scratch space because we don't need that
1443*a58d3d2aSXin Li scratch space for the last band and we don't care about the data there until we're
1444*a58d3d2aSXin Li decoding the last band. */
1445*a58d3d2aSXin Li if (encode && resynth)
1446*a58d3d2aSXin Li resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1447*a58d3d2aSXin Li else
1448*a58d3d2aSXin Li resynth_alloc = ALLOC_NONE;
1449*a58d3d2aSXin Li ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1450*a58d3d2aSXin Li if (encode && resynth)
1451*a58d3d2aSXin Li lowband_scratch = _lowband_scratch;
1452*a58d3d2aSXin Li else
1453*a58d3d2aSXin Li lowband_scratch = X_+M*eBands[m->effEBands-1];
1454*a58d3d2aSXin Li ALLOC(X_save, resynth_alloc, celt_norm);
1455*a58d3d2aSXin Li ALLOC(Y_save, resynth_alloc, celt_norm);
1456*a58d3d2aSXin Li ALLOC(X_save2, resynth_alloc, celt_norm);
1457*a58d3d2aSXin Li ALLOC(Y_save2, resynth_alloc, celt_norm);
1458*a58d3d2aSXin Li ALLOC(norm_save2, resynth_alloc, celt_norm);
1459*a58d3d2aSXin Li
1460*a58d3d2aSXin Li lowband_offset = 0;
1461*a58d3d2aSXin Li ctx.bandE = bandE;
1462*a58d3d2aSXin Li ctx.ec = ec;
1463*a58d3d2aSXin Li ctx.encode = encode;
1464*a58d3d2aSXin Li ctx.intensity = intensity;
1465*a58d3d2aSXin Li ctx.m = m;
1466*a58d3d2aSXin Li ctx.seed = *seed;
1467*a58d3d2aSXin Li ctx.spread = spread;
1468*a58d3d2aSXin Li ctx.arch = arch;
1469*a58d3d2aSXin Li ctx.disable_inv = disable_inv;
1470*a58d3d2aSXin Li ctx.resynth = resynth;
1471*a58d3d2aSXin Li ctx.theta_round = 0;
1472*a58d3d2aSXin Li /* Avoid injecting noise in the first band on transients. */
1473*a58d3d2aSXin Li ctx.avoid_split_noise = B > 1;
1474*a58d3d2aSXin Li for (i=start;i<end;i++)
1475*a58d3d2aSXin Li {
1476*a58d3d2aSXin Li opus_int32 tell;
1477*a58d3d2aSXin Li int b;
1478*a58d3d2aSXin Li int N;
1479*a58d3d2aSXin Li opus_int32 curr_balance;
1480*a58d3d2aSXin Li int effective_lowband=-1;
1481*a58d3d2aSXin Li celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1482*a58d3d2aSXin Li int tf_change=0;
1483*a58d3d2aSXin Li unsigned x_cm;
1484*a58d3d2aSXin Li unsigned y_cm;
1485*a58d3d2aSXin Li int last;
1486*a58d3d2aSXin Li
1487*a58d3d2aSXin Li ctx.i = i;
1488*a58d3d2aSXin Li last = (i==end-1);
1489*a58d3d2aSXin Li
1490*a58d3d2aSXin Li X = X_+M*eBands[i];
1491*a58d3d2aSXin Li if (Y_!=NULL)
1492*a58d3d2aSXin Li Y = Y_+M*eBands[i];
1493*a58d3d2aSXin Li else
1494*a58d3d2aSXin Li Y = NULL;
1495*a58d3d2aSXin Li N = M*eBands[i+1]-M*eBands[i];
1496*a58d3d2aSXin Li celt_assert(N > 0);
1497*a58d3d2aSXin Li tell = ec_tell_frac(ec);
1498*a58d3d2aSXin Li
1499*a58d3d2aSXin Li /* Compute how many bits we want to allocate to this band */
1500*a58d3d2aSXin Li if (i != start)
1501*a58d3d2aSXin Li balance -= tell;
1502*a58d3d2aSXin Li remaining_bits = total_bits-tell-1;
1503*a58d3d2aSXin Li ctx.remaining_bits = remaining_bits;
1504*a58d3d2aSXin Li if (i <= codedBands-1)
1505*a58d3d2aSXin Li {
1506*a58d3d2aSXin Li curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1507*a58d3d2aSXin Li b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1508*a58d3d2aSXin Li } else {
1509*a58d3d2aSXin Li b = 0;
1510*a58d3d2aSXin Li }
1511*a58d3d2aSXin Li
1512*a58d3d2aSXin Li #ifndef DISABLE_UPDATE_DRAFT
1513*a58d3d2aSXin Li if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1514*a58d3d2aSXin Li lowband_offset = i;
1515*a58d3d2aSXin Li if (i == start+1)
1516*a58d3d2aSXin Li special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1517*a58d3d2aSXin Li #else
1518*a58d3d2aSXin Li if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1519*a58d3d2aSXin Li lowband_offset = i;
1520*a58d3d2aSXin Li #endif
1521*a58d3d2aSXin Li
1522*a58d3d2aSXin Li tf_change = tf_res[i];
1523*a58d3d2aSXin Li ctx.tf_change = tf_change;
1524*a58d3d2aSXin Li if (i>=m->effEBands)
1525*a58d3d2aSXin Li {
1526*a58d3d2aSXin Li X=norm;
1527*a58d3d2aSXin Li if (Y_!=NULL)
1528*a58d3d2aSXin Li Y = norm;
1529*a58d3d2aSXin Li lowband_scratch = NULL;
1530*a58d3d2aSXin Li }
1531*a58d3d2aSXin Li if (last && !theta_rdo)
1532*a58d3d2aSXin Li lowband_scratch = NULL;
1533*a58d3d2aSXin Li
1534*a58d3d2aSXin Li /* Get a conservative estimate of the collapse_mask's for the bands we're
1535*a58d3d2aSXin Li going to be folding from. */
1536*a58d3d2aSXin Li if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1537*a58d3d2aSXin Li {
1538*a58d3d2aSXin Li int fold_start;
1539*a58d3d2aSXin Li int fold_end;
1540*a58d3d2aSXin Li int fold_i;
1541*a58d3d2aSXin Li /* This ensures we never repeat spectral content within one band */
1542*a58d3d2aSXin Li effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1543*a58d3d2aSXin Li fold_start = lowband_offset;
1544*a58d3d2aSXin Li while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1545*a58d3d2aSXin Li fold_end = lowband_offset-1;
1546*a58d3d2aSXin Li #ifndef DISABLE_UPDATE_DRAFT
1547*a58d3d2aSXin Li while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1548*a58d3d2aSXin Li #else
1549*a58d3d2aSXin Li while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1550*a58d3d2aSXin Li #endif
1551*a58d3d2aSXin Li x_cm = y_cm = 0;
1552*a58d3d2aSXin Li fold_i = fold_start; do {
1553*a58d3d2aSXin Li x_cm |= collapse_masks[fold_i*C+0];
1554*a58d3d2aSXin Li y_cm |= collapse_masks[fold_i*C+C-1];
1555*a58d3d2aSXin Li } while (++fold_i<fold_end);
1556*a58d3d2aSXin Li }
1557*a58d3d2aSXin Li /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1558*a58d3d2aSXin Li always) be non-zero. */
1559*a58d3d2aSXin Li else
1560*a58d3d2aSXin Li x_cm = y_cm = (1<<B)-1;
1561*a58d3d2aSXin Li
1562*a58d3d2aSXin Li if (dual_stereo && i==intensity)
1563*a58d3d2aSXin Li {
1564*a58d3d2aSXin Li int j;
1565*a58d3d2aSXin Li
1566*a58d3d2aSXin Li /* Switch off dual stereo to do intensity. */
1567*a58d3d2aSXin Li dual_stereo = 0;
1568*a58d3d2aSXin Li if (resynth)
1569*a58d3d2aSXin Li for (j=0;j<M*eBands[i]-norm_offset;j++)
1570*a58d3d2aSXin Li norm[j] = HALF32(norm[j]+norm2[j]);
1571*a58d3d2aSXin Li }
1572*a58d3d2aSXin Li if (dual_stereo)
1573*a58d3d2aSXin Li {
1574*a58d3d2aSXin Li x_cm = quant_band(&ctx, X, N, b/2, B,
1575*a58d3d2aSXin Li effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1576*a58d3d2aSXin Li last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
1577*a58d3d2aSXin Li y_cm = quant_band(&ctx, Y, N, b/2, B,
1578*a58d3d2aSXin Li effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1579*a58d3d2aSXin Li last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
1580*a58d3d2aSXin Li } else {
1581*a58d3d2aSXin Li if (Y!=NULL)
1582*a58d3d2aSXin Li {
1583*a58d3d2aSXin Li if (theta_rdo && i < intensity)
1584*a58d3d2aSXin Li {
1585*a58d3d2aSXin Li ec_ctx ec_save, ec_save2;
1586*a58d3d2aSXin Li struct band_ctx ctx_save, ctx_save2;
1587*a58d3d2aSXin Li opus_val32 dist0, dist1;
1588*a58d3d2aSXin Li unsigned cm, cm2;
1589*a58d3d2aSXin Li int nstart_bytes, nend_bytes, save_bytes;
1590*a58d3d2aSXin Li unsigned char *bytes_buf;
1591*a58d3d2aSXin Li unsigned char bytes_save[1275];
1592*a58d3d2aSXin Li opus_val16 w[2];
1593*a58d3d2aSXin Li compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1594*a58d3d2aSXin Li /* Make a copy. */
1595*a58d3d2aSXin Li cm = x_cm|y_cm;
1596*a58d3d2aSXin Li ec_save = *ec;
1597*a58d3d2aSXin Li ctx_save = ctx;
1598*a58d3d2aSXin Li OPUS_COPY(X_save, X, N);
1599*a58d3d2aSXin Li OPUS_COPY(Y_save, Y, N);
1600*a58d3d2aSXin Li /* Encode and round down. */
1601*a58d3d2aSXin Li ctx.theta_round = -1;
1602*a58d3d2aSXin Li x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1603*a58d3d2aSXin Li effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1604*a58d3d2aSXin Li last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1605*a58d3d2aSXin Li dist0 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1606*a58d3d2aSXin Li
1607*a58d3d2aSXin Li /* Save first result. */
1608*a58d3d2aSXin Li cm2 = x_cm;
1609*a58d3d2aSXin Li ec_save2 = *ec;
1610*a58d3d2aSXin Li ctx_save2 = ctx;
1611*a58d3d2aSXin Li OPUS_COPY(X_save2, X, N);
1612*a58d3d2aSXin Li OPUS_COPY(Y_save2, Y, N);
1613*a58d3d2aSXin Li if (!last)
1614*a58d3d2aSXin Li OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1615*a58d3d2aSXin Li nstart_bytes = ec_save.offs;
1616*a58d3d2aSXin Li nend_bytes = ec_save.storage;
1617*a58d3d2aSXin Li bytes_buf = ec_save.buf+nstart_bytes;
1618*a58d3d2aSXin Li save_bytes = nend_bytes-nstart_bytes;
1619*a58d3d2aSXin Li OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1620*a58d3d2aSXin Li
1621*a58d3d2aSXin Li /* Restore */
1622*a58d3d2aSXin Li *ec = ec_save;
1623*a58d3d2aSXin Li ctx = ctx_save;
1624*a58d3d2aSXin Li OPUS_COPY(X, X_save, N);
1625*a58d3d2aSXin Li OPUS_COPY(Y, Y_save, N);
1626*a58d3d2aSXin Li #ifndef DISABLE_UPDATE_DRAFT
1627*a58d3d2aSXin Li if (i == start+1)
1628*a58d3d2aSXin Li special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1629*a58d3d2aSXin Li #endif
1630*a58d3d2aSXin Li /* Encode and round up. */
1631*a58d3d2aSXin Li ctx.theta_round = 1;
1632*a58d3d2aSXin Li x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1633*a58d3d2aSXin Li effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1634*a58d3d2aSXin Li last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm);
1635*a58d3d2aSXin Li dist1 = MULT16_32_Q15(w[0], celt_inner_prod(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod(Y_save, Y, N, arch));
1636*a58d3d2aSXin Li if (dist0 >= dist1) {
1637*a58d3d2aSXin Li x_cm = cm2;
1638*a58d3d2aSXin Li *ec = ec_save2;
1639*a58d3d2aSXin Li ctx = ctx_save2;
1640*a58d3d2aSXin Li OPUS_COPY(X, X_save2, N);
1641*a58d3d2aSXin Li OPUS_COPY(Y, Y_save2, N);
1642*a58d3d2aSXin Li if (!last)
1643*a58d3d2aSXin Li OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1644*a58d3d2aSXin Li OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1645*a58d3d2aSXin Li }
1646*a58d3d2aSXin Li } else {
1647*a58d3d2aSXin Li ctx.theta_round = 0;
1648*a58d3d2aSXin Li x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1649*a58d3d2aSXin Li effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1650*a58d3d2aSXin Li last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1651*a58d3d2aSXin Li }
1652*a58d3d2aSXin Li } else {
1653*a58d3d2aSXin Li x_cm = quant_band(&ctx, X, N, b, B,
1654*a58d3d2aSXin Li effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1655*a58d3d2aSXin Li last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1656*a58d3d2aSXin Li }
1657*a58d3d2aSXin Li y_cm = x_cm;
1658*a58d3d2aSXin Li }
1659*a58d3d2aSXin Li collapse_masks[i*C+0] = (unsigned char)x_cm;
1660*a58d3d2aSXin Li collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1661*a58d3d2aSXin Li balance += pulses[i] + tell;
1662*a58d3d2aSXin Li
1663*a58d3d2aSXin Li /* Update the folding position only as long as we have 1 bit/sample depth. */
1664*a58d3d2aSXin Li update_lowband = b>(N<<BITRES);
1665*a58d3d2aSXin Li /* We only need to avoid noise on a split for the first band. After that, we
1666*a58d3d2aSXin Li have folding. */
1667*a58d3d2aSXin Li ctx.avoid_split_noise = 0;
1668*a58d3d2aSXin Li }
1669*a58d3d2aSXin Li *seed = ctx.seed;
1670*a58d3d2aSXin Li
1671*a58d3d2aSXin Li RESTORE_STACK;
1672*a58d3d2aSXin Li }
1673*a58d3d2aSXin Li
1674