1*a58d3d2aSXin Li /***********************************************************************
2*a58d3d2aSXin Li Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li are met:
6*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright notice,
7*a58d3d2aSXin Li this list of conditions and the following disclaimer.
8*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
9*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
10*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
11*a58d3d2aSXin Li - Neither the name of Internet Society, IETF or IETF Trust, nor the
12*a58d3d2aSXin Li names of specific contributors, may be used to endorse or promote
13*a58d3d2aSXin Li products derived from this software without specific prior written
14*a58d3d2aSXin Li permission.
15*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16*a58d3d2aSXin Li AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17*a58d3d2aSXin Li IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18*a58d3d2aSXin Li ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19*a58d3d2aSXin Li LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20*a58d3d2aSXin Li CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21*a58d3d2aSXin Li SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22*a58d3d2aSXin Li INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23*a58d3d2aSXin Li CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24*a58d3d2aSXin Li ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25*a58d3d2aSXin Li POSSIBILITY OF SUCH DAMAGE.
26*a58d3d2aSXin Li ***********************************************************************/
27*a58d3d2aSXin Li
28*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
29*a58d3d2aSXin Li #include "config.h"
30*a58d3d2aSXin Li #endif
31*a58d3d2aSXin Li
32*a58d3d2aSXin Li #include "main.h"
33*a58d3d2aSXin Li #include "stack_alloc.h"
34*a58d3d2aSXin Li
35*a58d3d2aSXin Li /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
silk_stereo_LR_to_MS(stereo_enc_state * state,opus_int16 x1[],opus_int16 x2[],opus_int8 ix[2][3],opus_int8 * mid_only_flag,opus_int32 mid_side_rates_bps[],opus_int32 total_rate_bps,opus_int prev_speech_act_Q8,opus_int toMono,opus_int fs_kHz,opus_int frame_length)36*a58d3d2aSXin Li void silk_stereo_LR_to_MS(
37*a58d3d2aSXin Li stereo_enc_state *state, /* I/O State */
38*a58d3d2aSXin Li opus_int16 x1[], /* I/O Left input signal, becomes mid signal */
39*a58d3d2aSXin Li opus_int16 x2[], /* I/O Right input signal, becomes side signal */
40*a58d3d2aSXin Li opus_int8 ix[ 2 ][ 3 ], /* O Quantization indices */
41*a58d3d2aSXin Li opus_int8 *mid_only_flag, /* O Flag: only mid signal coded */
42*a58d3d2aSXin Li opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
43*a58d3d2aSXin Li opus_int32 total_rate_bps, /* I Total bitrate */
44*a58d3d2aSXin Li opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
45*a58d3d2aSXin Li opus_int toMono, /* I Last frame before a stereo->mono transition */
46*a58d3d2aSXin Li opus_int fs_kHz, /* I Sample rate (kHz) */
47*a58d3d2aSXin Li opus_int frame_length /* I Number of samples */
48*a58d3d2aSXin Li )
49*a58d3d2aSXin Li {
50*a58d3d2aSXin Li opus_int n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
51*a58d3d2aSXin Li opus_int32 sum, diff, smooth_coef_Q16, pred_Q13[ 2 ], pred0_Q13, pred1_Q13;
52*a58d3d2aSXin Li opus_int32 LP_ratio_Q14, HP_ratio_Q14, frac_Q16, frac_3_Q16, min_mid_rate_bps, width_Q14, w_Q24, deltaw_Q24;
53*a58d3d2aSXin Li VARDECL( opus_int16, side );
54*a58d3d2aSXin Li VARDECL( opus_int16, LP_mid );
55*a58d3d2aSXin Li VARDECL( opus_int16, HP_mid );
56*a58d3d2aSXin Li VARDECL( opus_int16, LP_side );
57*a58d3d2aSXin Li VARDECL( opus_int16, HP_side );
58*a58d3d2aSXin Li opus_int16 *mid = &x1[ -2 ];
59*a58d3d2aSXin Li SAVE_STACK;
60*a58d3d2aSXin Li
61*a58d3d2aSXin Li ALLOC( side, frame_length + 2, opus_int16 );
62*a58d3d2aSXin Li /* Convert to basic mid/side signals */
63*a58d3d2aSXin Li for( n = 0; n < frame_length + 2; n++ ) {
64*a58d3d2aSXin Li sum = x1[ n - 2 ] + (opus_int32)x2[ n - 2 ];
65*a58d3d2aSXin Li diff = x1[ n - 2 ] - (opus_int32)x2[ n - 2 ];
66*a58d3d2aSXin Li mid[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
67*a58d3d2aSXin Li side[ n ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( diff, 1 ) );
68*a58d3d2aSXin Li }
69*a58d3d2aSXin Li
70*a58d3d2aSXin Li /* Buffering */
71*a58d3d2aSXin Li silk_memcpy( mid, state->sMid, 2 * sizeof( opus_int16 ) );
72*a58d3d2aSXin Li silk_memcpy( side, state->sSide, 2 * sizeof( opus_int16 ) );
73*a58d3d2aSXin Li silk_memcpy( state->sMid, &mid[ frame_length ], 2 * sizeof( opus_int16 ) );
74*a58d3d2aSXin Li silk_memcpy( state->sSide, &side[ frame_length ], 2 * sizeof( opus_int16 ) );
75*a58d3d2aSXin Li
76*a58d3d2aSXin Li /* LP and HP filter mid signal */
77*a58d3d2aSXin Li ALLOC( LP_mid, frame_length, opus_int16 );
78*a58d3d2aSXin Li ALLOC( HP_mid, frame_length, opus_int16 );
79*a58d3d2aSXin Li for( n = 0; n < frame_length; n++ ) {
80*a58d3d2aSXin Li sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
81*a58d3d2aSXin Li LP_mid[ n ] = sum;
82*a58d3d2aSXin Li HP_mid[ n ] = mid[ n + 1 ] - sum;
83*a58d3d2aSXin Li }
84*a58d3d2aSXin Li
85*a58d3d2aSXin Li /* LP and HP filter side signal */
86*a58d3d2aSXin Li ALLOC( LP_side, frame_length, opus_int16 );
87*a58d3d2aSXin Li ALLOC( HP_side, frame_length, opus_int16 );
88*a58d3d2aSXin Li for( n = 0; n < frame_length; n++ ) {
89*a58d3d2aSXin Li sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT32( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
90*a58d3d2aSXin Li LP_side[ n ] = sum;
91*a58d3d2aSXin Li HP_side[ n ] = side[ n + 1 ] - sum;
92*a58d3d2aSXin Li }
93*a58d3d2aSXin Li
94*a58d3d2aSXin Li /* Find energies and predictors */
95*a58d3d2aSXin Li is10msFrame = frame_length == 10 * fs_kHz;
96*a58d3d2aSXin Li smooth_coef_Q16 = is10msFrame ?
97*a58d3d2aSXin Li SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF / 2, 16 ) :
98*a58d3d2aSXin Li SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF, 16 );
99*a58d3d2aSXin Li smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
100*a58d3d2aSXin Li
101*a58d3d2aSXin Li pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
102*a58d3d2aSXin Li pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
103*a58d3d2aSXin Li /* Ratio of the norms of residual and mid signals */
104*a58d3d2aSXin Li frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
105*a58d3d2aSXin Li frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
106*a58d3d2aSXin Li
107*a58d3d2aSXin Li /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
108*a58d3d2aSXin Li total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */
109*a58d3d2aSXin Li if( total_rate_bps < 1 ) {
110*a58d3d2aSXin Li total_rate_bps = 1;
111*a58d3d2aSXin Li }
112*a58d3d2aSXin Li min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 600 );
113*a58d3d2aSXin Li silk_assert( min_mid_rate_bps < 32767 );
114*a58d3d2aSXin Li /* Default bitrate distribution: 8 parts for Mid and (5+3*frac) parts for Side. so: mid_rate = ( 8 / ( 13 + 3 * frac ) ) * total_ rate */
115*a58d3d2aSXin Li frac_3_Q16 = silk_MUL( 3, frac_Q16 );
116*a58d3d2aSXin Li mid_side_rates_bps[ 0 ] = silk_DIV32_varQ( total_rate_bps, SILK_FIX_CONST( 8 + 5, 16 ) + frac_3_Q16, 16+3 );
117*a58d3d2aSXin Li /* If Mid bitrate below minimum, reduce stereo width */
118*a58d3d2aSXin Li if( mid_side_rates_bps[ 0 ] < min_mid_rate_bps ) {
119*a58d3d2aSXin Li mid_side_rates_bps[ 0 ] = min_mid_rate_bps;
120*a58d3d2aSXin Li mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
121*a58d3d2aSXin Li /* width = 4 * ( 2 * side_rate - min_rate ) / ( ( 1 + 3 * frac ) * min_rate ) */
122*a58d3d2aSXin Li width_Q14 = silk_DIV32_varQ( silk_LSHIFT( mid_side_rates_bps[ 1 ], 1 ) - min_mid_rate_bps,
123*a58d3d2aSXin Li silk_SMULWB( SILK_FIX_CONST( 1, 16 ) + frac_3_Q16, min_mid_rate_bps ), 14+2 );
124*a58d3d2aSXin Li width_Q14 = silk_LIMIT( width_Q14, 0, SILK_FIX_CONST( 1, 14 ) );
125*a58d3d2aSXin Li } else {
126*a58d3d2aSXin Li mid_side_rates_bps[ 1 ] = total_rate_bps - mid_side_rates_bps[ 0 ];
127*a58d3d2aSXin Li width_Q14 = SILK_FIX_CONST( 1, 14 );
128*a58d3d2aSXin Li }
129*a58d3d2aSXin Li
130*a58d3d2aSXin Li /* Smoother */
131*a58d3d2aSXin Li state->smth_width_Q14 = (opus_int16)silk_SMLAWB( state->smth_width_Q14, width_Q14 - state->smth_width_Q14, smooth_coef_Q16 );
132*a58d3d2aSXin Li
133*a58d3d2aSXin Li /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
134*a58d3d2aSXin Li *mid_only_flag = 0;
135*a58d3d2aSXin Li if( toMono ) {
136*a58d3d2aSXin Li /* Last frame before stereo->mono transition; collapse stereo width */
137*a58d3d2aSXin Li width_Q14 = 0;
138*a58d3d2aSXin Li pred_Q13[ 0 ] = 0;
139*a58d3d2aSXin Li pred_Q13[ 1 ] = 0;
140*a58d3d2aSXin Li silk_stereo_quant_pred( pred_Q13, ix );
141*a58d3d2aSXin Li } else if( state->width_prev_Q14 == 0 &&
142*a58d3d2aSXin Li ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
143*a58d3d2aSXin Li {
144*a58d3d2aSXin Li /* Code as panned-mono; previous frame already had zero width */
145*a58d3d2aSXin Li /* Scale down and quantize predictors */
146*a58d3d2aSXin Li pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
147*a58d3d2aSXin Li pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
148*a58d3d2aSXin Li silk_stereo_quant_pred( pred_Q13, ix );
149*a58d3d2aSXin Li /* Collapse stereo width */
150*a58d3d2aSXin Li width_Q14 = 0;
151*a58d3d2aSXin Li pred_Q13[ 0 ] = 0;
152*a58d3d2aSXin Li pred_Q13[ 1 ] = 0;
153*a58d3d2aSXin Li mid_side_rates_bps[ 0 ] = total_rate_bps;
154*a58d3d2aSXin Li mid_side_rates_bps[ 1 ] = 0;
155*a58d3d2aSXin Li *mid_only_flag = 1;
156*a58d3d2aSXin Li } else if( state->width_prev_Q14 != 0 &&
157*a58d3d2aSXin Li ( 8 * total_rate_bps < 11 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.02, 14 ) ) )
158*a58d3d2aSXin Li {
159*a58d3d2aSXin Li /* Transition to zero-width stereo */
160*a58d3d2aSXin Li /* Scale down and quantize predictors */
161*a58d3d2aSXin Li pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
162*a58d3d2aSXin Li pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
163*a58d3d2aSXin Li silk_stereo_quant_pred( pred_Q13, ix );
164*a58d3d2aSXin Li /* Collapse stereo width */
165*a58d3d2aSXin Li width_Q14 = 0;
166*a58d3d2aSXin Li pred_Q13[ 0 ] = 0;
167*a58d3d2aSXin Li pred_Q13[ 1 ] = 0;
168*a58d3d2aSXin Li } else if( state->smth_width_Q14 > SILK_FIX_CONST( 0.95, 14 ) ) {
169*a58d3d2aSXin Li /* Full-width stereo coding */
170*a58d3d2aSXin Li silk_stereo_quant_pred( pred_Q13, ix );
171*a58d3d2aSXin Li width_Q14 = SILK_FIX_CONST( 1, 14 );
172*a58d3d2aSXin Li } else {
173*a58d3d2aSXin Li /* Reduced-width stereo coding; scale down and quantize predictors */
174*a58d3d2aSXin Li pred_Q13[ 0 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 0 ] ), 14 );
175*a58d3d2aSXin Li pred_Q13[ 1 ] = silk_RSHIFT( silk_SMULBB( state->smth_width_Q14, pred_Q13[ 1 ] ), 14 );
176*a58d3d2aSXin Li silk_stereo_quant_pred( pred_Q13, ix );
177*a58d3d2aSXin Li width_Q14 = state->smth_width_Q14;
178*a58d3d2aSXin Li }
179*a58d3d2aSXin Li
180*a58d3d2aSXin Li /* Make sure to keep on encoding until the tapered output has been transmitted */
181*a58d3d2aSXin Li if( *mid_only_flag == 1 ) {
182*a58d3d2aSXin Li state->silent_side_len += frame_length - STEREO_INTERP_LEN_MS * fs_kHz;
183*a58d3d2aSXin Li if( state->silent_side_len < LA_SHAPE_MS * fs_kHz ) {
184*a58d3d2aSXin Li *mid_only_flag = 0;
185*a58d3d2aSXin Li } else {
186*a58d3d2aSXin Li /* Limit to avoid wrapping around */
187*a58d3d2aSXin Li state->silent_side_len = 10000;
188*a58d3d2aSXin Li }
189*a58d3d2aSXin Li } else {
190*a58d3d2aSXin Li state->silent_side_len = 0;
191*a58d3d2aSXin Li }
192*a58d3d2aSXin Li
193*a58d3d2aSXin Li if( *mid_only_flag == 0 && mid_side_rates_bps[ 1 ] < 1 ) {
194*a58d3d2aSXin Li mid_side_rates_bps[ 1 ] = 1;
195*a58d3d2aSXin Li mid_side_rates_bps[ 0 ] = silk_max_int( 1, total_rate_bps - mid_side_rates_bps[ 1 ]);
196*a58d3d2aSXin Li }
197*a58d3d2aSXin Li
198*a58d3d2aSXin Li /* Interpolate predictors and subtract prediction from side channel */
199*a58d3d2aSXin Li pred0_Q13 = -state->pred_prev_Q13[ 0 ];
200*a58d3d2aSXin Li pred1_Q13 = -state->pred_prev_Q13[ 1 ];
201*a58d3d2aSXin Li w_Q24 = silk_LSHIFT( state->width_prev_Q14, 10 );
202*a58d3d2aSXin Li denom_Q16 = silk_DIV32_16( (opus_int32)1 << 16, STEREO_INTERP_LEN_MS * fs_kHz );
203*a58d3d2aSXin Li delta0_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 0 ] - state->pred_prev_Q13[ 0 ], denom_Q16 ), 16 );
204*a58d3d2aSXin Li delta1_Q13 = -silk_RSHIFT_ROUND( silk_SMULBB( pred_Q13[ 1 ] - state->pred_prev_Q13[ 1 ], denom_Q16 ), 16 );
205*a58d3d2aSXin Li deltaw_Q24 = silk_LSHIFT( silk_SMULWB( width_Q14 - state->width_prev_Q14, denom_Q16 ), 10 );
206*a58d3d2aSXin Li for( n = 0; n < STEREO_INTERP_LEN_MS * fs_kHz; n++ ) {
207*a58d3d2aSXin Li pred0_Q13 += delta0_Q13;
208*a58d3d2aSXin Li pred1_Q13 += delta1_Q13;
209*a58d3d2aSXin Li w_Q24 += deltaw_Q24;
210*a58d3d2aSXin Li sum = silk_LSHIFT( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
211*a58d3d2aSXin Li sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
212*a58d3d2aSXin Li sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
213*a58d3d2aSXin Li x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
214*a58d3d2aSXin Li }
215*a58d3d2aSXin Li
216*a58d3d2aSXin Li pred0_Q13 = -pred_Q13[ 0 ];
217*a58d3d2aSXin Li pred1_Q13 = -pred_Q13[ 1 ];
218*a58d3d2aSXin Li w_Q24 = silk_LSHIFT( width_Q14, 10 );
219*a58d3d2aSXin Li for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
220*a58d3d2aSXin Li sum = silk_LSHIFT( silk_ADD_LSHIFT32( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
221*a58d3d2aSXin Li sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
222*a58d3d2aSXin Li sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
223*a58d3d2aSXin Li x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
224*a58d3d2aSXin Li }
225*a58d3d2aSXin Li state->pred_prev_Q13[ 0 ] = (opus_int16)pred_Q13[ 0 ];
226*a58d3d2aSXin Li state->pred_prev_Q13[ 1 ] = (opus_int16)pred_Q13[ 1 ];
227*a58d3d2aSXin Li state->width_prev_Q14 = (opus_int16)width_Q14;
228*a58d3d2aSXin Li RESTORE_STACK;
229*a58d3d2aSXin Li }
230