1 /* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
2 Written by Jean-Marc Valin and Koen Vos */
3 /*
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10
11 - Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31
32 #include <stdarg.h>
33 #include "celt.h"
34 #include "entenc.h"
35 #include "modes.h"
36 #include "API.h"
37 #include "stack_alloc.h"
38 #include "float_cast.h"
39 #include "opus.h"
40 #include "arch.h"
41 #include "pitch.h"
42 #include "opus_private.h"
43 #include "os_support.h"
44 #include "cpu_support.h"
45 #include "analysis.h"
46 #include "mathops.h"
47 #include "tuning_parameters.h"
48
49 #ifdef ENABLE_DRED
50 #include "dred_coding.h"
51 #endif
52
53 #ifdef FIXED_POINT
54 #include "fixed/structs_FIX.h"
55 #else
56 #include "float/structs_FLP.h"
57 #endif
58 #ifdef ENABLE_OSCE_TRAINING_DATA
59 #include <stdio.h>
60 #endif
61
62 #define MAX_ENCODER_BUFFER 480
63
64 #ifndef DISABLE_FLOAT_API
65 #define PSEUDO_SNR_THRESHOLD 316.23f /* 10^(25/10) */
66 #endif
67
68 typedef struct {
69 opus_val32 XX, XY, YY;
70 opus_val16 smoothed_width;
71 opus_val16 max_follower;
72 } StereoWidthState;
73
74 struct OpusEncoder {
75 int celt_enc_offset;
76 int silk_enc_offset;
77 silk_EncControlStruct silk_mode;
78 #ifdef ENABLE_DRED
79 DREDEnc dred_encoder;
80 #endif
81 int application;
82 int channels;
83 int delay_compensation;
84 int force_channels;
85 int signal_type;
86 int user_bandwidth;
87 int max_bandwidth;
88 int user_forced_mode;
89 int voice_ratio;
90 opus_int32 Fs;
91 int use_vbr;
92 int vbr_constraint;
93 int variable_duration;
94 opus_int32 bitrate_bps;
95 opus_int32 user_bitrate_bps;
96 int lsb_depth;
97 int encoder_buffer;
98 int lfe;
99 int arch;
100 int use_dtx; /* general DTX for both SILK and CELT */
101 int fec_config;
102 #ifndef DISABLE_FLOAT_API
103 TonalityAnalysisState analysis;
104 #endif
105
106 #define OPUS_ENCODER_RESET_START stream_channels
107 int stream_channels;
108 opus_int16 hybrid_stereo_width_Q14;
109 opus_int32 variable_HP_smth2_Q15;
110 opus_val16 prev_HB_gain;
111 opus_val32 hp_mem[4];
112 int mode;
113 int prev_mode;
114 int prev_channels;
115 int prev_framesize;
116 int bandwidth;
117 /* Bandwidth determined automatically from the rate (before any other adjustment) */
118 int auto_bandwidth;
119 int silk_bw_switch;
120 /* Sampling rate (at the API level) */
121 int first;
122 opus_val16 * energy_masking;
123 StereoWidthState width_mem;
124 opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
125 #ifndef DISABLE_FLOAT_API
126 int detected_bandwidth;
127 int nb_no_activity_ms_Q1;
128 opus_val32 peak_signal_energy;
129 #endif
130 #ifdef ENABLE_DRED
131 int dred_duration;
132 int dred_q0;
133 int dred_dQ;
134 int dred_qmax;
135 int dred_target_chunks;
136 unsigned char activity_mem[DRED_MAX_FRAMES*4]; /* 2.5ms resolution*/
137 #endif
138 int nonfinal_frame; /* current frame is not the final in a packet */
139 opus_uint32 rangeFinal;
140 };
141
142 /* Transition tables for the voice and music. First column is the
143 middle (memoriless) threshold. The second column is the hysteresis
144 (difference with the middle) */
145 static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
146 9000, 700, /* NB<->MB */
147 9000, 700, /* MB<->WB */
148 13500, 1000, /* WB<->SWB */
149 14000, 2000, /* SWB<->FB */
150 };
151 static const opus_int32 mono_music_bandwidth_thresholds[8] = {
152 9000, 700, /* NB<->MB */
153 9000, 700, /* MB<->WB */
154 11000, 1000, /* WB<->SWB */
155 12000, 2000, /* SWB<->FB */
156 };
157 static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
158 9000, 700, /* NB<->MB */
159 9000, 700, /* MB<->WB */
160 13500, 1000, /* WB<->SWB */
161 14000, 2000, /* SWB<->FB */
162 };
163 static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
164 9000, 700, /* NB<->MB */
165 9000, 700, /* MB<->WB */
166 11000, 1000, /* WB<->SWB */
167 12000, 2000, /* SWB<->FB */
168 };
169 /* Threshold bit-rates for switching between mono and stereo */
170 static const opus_int32 stereo_voice_threshold = 19000;
171 static const opus_int32 stereo_music_threshold = 17000;
172
173 /* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
174 static const opus_int32 mode_thresholds[2][2] = {
175 /* voice */ /* music */
176 { 64000, 10000}, /* mono */
177 { 44000, 10000}, /* stereo */
178 };
179
180 static const opus_int32 fec_thresholds[] = {
181 12000, 1000, /* NB */
182 14000, 1000, /* MB */
183 16000, 1000, /* WB */
184 20000, 1000, /* SWB */
185 22000, 1000, /* FB */
186 };
187
opus_encoder_get_size(int channels)188 int opus_encoder_get_size(int channels)
189 {
190 int silkEncSizeBytes, celtEncSizeBytes;
191 int ret;
192 if (channels<1 || channels > 2)
193 return 0;
194 ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
195 if (ret)
196 return 0;
197 silkEncSizeBytes = align(silkEncSizeBytes);
198 celtEncSizeBytes = celt_encoder_get_size(channels);
199 return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
200 }
201
opus_encoder_init(OpusEncoder * st,opus_int32 Fs,int channels,int application)202 int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
203 {
204 void *silk_enc;
205 CELTEncoder *celt_enc;
206 int err;
207 int ret, silkEncSizeBytes;
208
209 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
210 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
211 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
212 return OPUS_BAD_ARG;
213
214 OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
215 /* Create SILK encoder */
216 ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
217 if (ret)
218 return OPUS_BAD_ARG;
219 silkEncSizeBytes = align(silkEncSizeBytes);
220 st->silk_enc_offset = align(sizeof(OpusEncoder));
221 st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
222 silk_enc = (char*)st+st->silk_enc_offset;
223 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
224
225 st->stream_channels = st->channels = channels;
226
227 st->Fs = Fs;
228
229 st->arch = opus_select_arch();
230
231 ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
232 if(ret)return OPUS_INTERNAL_ERROR;
233
234 /* default SILK parameters */
235 st->silk_mode.nChannelsAPI = channels;
236 st->silk_mode.nChannelsInternal = channels;
237 st->silk_mode.API_sampleRate = st->Fs;
238 st->silk_mode.maxInternalSampleRate = 16000;
239 st->silk_mode.minInternalSampleRate = 8000;
240 st->silk_mode.desiredInternalSampleRate = 16000;
241 st->silk_mode.payloadSize_ms = 20;
242 st->silk_mode.bitRate = 25000;
243 st->silk_mode.packetLossPercentage = 0;
244 st->silk_mode.complexity = 9;
245 st->silk_mode.useInBandFEC = 0;
246 st->silk_mode.useDRED = 0;
247 st->silk_mode.useDTX = 0;
248 st->silk_mode.useCBR = 0;
249 st->silk_mode.reducedDependency = 0;
250
251 /* Create CELT encoder */
252 /* Initialize CELT encoder */
253 err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
254 if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;
255
256 celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
257 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
258
259 #ifdef ENABLE_DRED
260 /* Initialize DRED Encoder */
261 dred_encoder_init( &st->dred_encoder, Fs, channels );
262 #endif
263
264 st->use_vbr = 1;
265 /* Makes constrained VBR the default (safer for real-time use) */
266 st->vbr_constraint = 1;
267 st->user_bitrate_bps = OPUS_AUTO;
268 st->bitrate_bps = 3000+Fs*channels;
269 st->application = application;
270 st->signal_type = OPUS_AUTO;
271 st->user_bandwidth = OPUS_AUTO;
272 st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
273 st->force_channels = OPUS_AUTO;
274 st->user_forced_mode = OPUS_AUTO;
275 st->voice_ratio = -1;
276 st->encoder_buffer = st->Fs/100;
277 st->lsb_depth = 24;
278 st->variable_duration = OPUS_FRAMESIZE_ARG;
279
280 /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
281 + 1.5 ms for SILK resamplers and stereo prediction) */
282 st->delay_compensation = st->Fs/250;
283
284 st->hybrid_stereo_width_Q14 = 1 << 14;
285 st->prev_HB_gain = Q15ONE;
286 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
287 st->first = 1;
288 st->mode = MODE_HYBRID;
289 st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
290
291 #ifndef DISABLE_FLOAT_API
292 tonality_analysis_init(&st->analysis, st->Fs);
293 st->analysis.application = st->application;
294 #endif
295
296 return OPUS_OK;
297 }
298
gen_toc(int mode,int framerate,int bandwidth,int channels)299 static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
300 {
301 int period;
302 unsigned char toc;
303 period = 0;
304 while (framerate < 400)
305 {
306 framerate <<= 1;
307 period++;
308 }
309 if (mode == MODE_SILK_ONLY)
310 {
311 toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
312 toc |= (period-2)<<3;
313 } else if (mode == MODE_CELT_ONLY)
314 {
315 int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
316 if (tmp < 0)
317 tmp = 0;
318 toc = 0x80;
319 toc |= tmp << 5;
320 toc |= period<<3;
321 } else /* Hybrid */
322 {
323 toc = 0x60;
324 toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
325 toc |= (period-2)<<3;
326 }
327 toc |= (channels==2)<<2;
328 return toc;
329 }
330
331 #ifndef FIXED_POINT
silk_biquad_float(const opus_val16 * in,const opus_int32 * B_Q28,const opus_int32 * A_Q28,opus_val32 * S,opus_val16 * out,const opus_int32 len,int stride)332 static void silk_biquad_float(
333 const opus_val16 *in, /* I: Input signal */
334 const opus_int32 *B_Q28, /* I: MA coefficients [3] */
335 const opus_int32 *A_Q28, /* I: AR coefficients [2] */
336 opus_val32 *S, /* I/O: State vector [2] */
337 opus_val16 *out, /* O: Output signal */
338 const opus_int32 len, /* I: Signal length (must be even) */
339 int stride
340 )
341 {
342 /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
343 opus_int k;
344 opus_val32 vout;
345 opus_val32 inval;
346 opus_val32 A[2], B[3];
347
348 A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
349 A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
350 B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
351 B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
352 B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
353
354 /* Negate A_Q28 values and split in two parts */
355
356 for( k = 0; k < len; k++ ) {
357 /* S[ 0 ], S[ 1 ]: Q12 */
358 inval = in[ k*stride ];
359 vout = S[ 0 ] + B[0]*inval;
360
361 S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;
362
363 S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
364
365 /* Scale back to Q0 and saturate */
366 out[ k*stride ] = vout;
367 }
368 }
369 #endif
370
hp_cutoff(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs,int arch)371 static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs, int arch)
372 {
373 opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
374 opus_int32 Fc_Q19, r_Q28, r_Q22;
375 (void)arch;
376
377 silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
378 Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
379 silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
380
381 r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
382
383 /* b = r * [ 1; -2; 1 ]; */
384 /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
385 B_Q28[ 0 ] = r_Q28;
386 B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
387 B_Q28[ 2 ] = r_Q28;
388
389 /* -r * ( 2 - Fc * Fc ); */
390 r_Q22 = silk_RSHIFT( r_Q28, 6 );
391 A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0, 22 ) );
392 A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
393
394 #ifdef FIXED_POINT
395 if( channels == 1 ) {
396 silk_biquad_alt_stride1( in, B_Q28, A_Q28, hp_mem, out, len );
397 } else {
398 silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len, arch );
399 }
400 #else
401 silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
402 if( channels == 2 ) {
403 silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
404 }
405 #endif
406 }
407
408 #ifdef FIXED_POINT
dc_reject(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs)409 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
410 {
411 int c, i;
412 int shift;
413
414 /* Approximates -round(log2(6.3*cutoff_Hz/Fs)) */
415 shift=celt_ilog2(Fs/(cutoff_Hz*4));
416 for (c=0;c<channels;c++)
417 {
418 for (i=0;i<len;i++)
419 {
420 opus_val32 x, y;
421 x = SHL32(EXTEND32(in[channels*i+c]), 14);
422 y = x-hp_mem[2*c];
423 hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
424 out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 14), 32767));
425 }
426 }
427 }
428
429 #else
dc_reject(const opus_val16 * in,opus_int32 cutoff_Hz,opus_val16 * out,opus_val32 * hp_mem,int len,int channels,opus_int32 Fs)430 static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
431 {
432 int i;
433 float coef, coef2;
434 coef = 6.3f*cutoff_Hz/Fs;
435 coef2 = 1-coef;
436 if (channels==2)
437 {
438 float m0, m2;
439 m0 = hp_mem[0];
440 m2 = hp_mem[2];
441 for (i=0;i<len;i++)
442 {
443 opus_val32 x0, x1, out0, out1;
444 x0 = in[2*i+0];
445 x1 = in[2*i+1];
446 out0 = x0-m0;
447 out1 = x1-m2;
448 m0 = coef*x0 + VERY_SMALL + coef2*m0;
449 m2 = coef*x1 + VERY_SMALL + coef2*m2;
450 out[2*i+0] = out0;
451 out[2*i+1] = out1;
452 }
453 hp_mem[0] = m0;
454 hp_mem[2] = m2;
455 } else {
456 float m0;
457 m0 = hp_mem[0];
458 for (i=0;i<len;i++)
459 {
460 opus_val32 x, y;
461 x = in[i];
462 y = x-m0;
463 m0 = coef*x + VERY_SMALL + coef2*m0;
464 out[i] = y;
465 }
466 hp_mem[0] = m0;
467 }
468 }
469 #endif
470
stereo_fade(const opus_val16 * in,opus_val16 * out,opus_val16 g1,opus_val16 g2,int overlap48,int frame_size,int channels,const opus_val16 * window,opus_int32 Fs)471 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
472 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
473 {
474 int i;
475 int overlap;
476 int inc;
477 inc = 48000/Fs;
478 overlap=overlap48/inc;
479 g1 = Q15ONE-g1;
480 g2 = Q15ONE-g2;
481 for (i=0;i<overlap;i++)
482 {
483 opus_val32 diff;
484 opus_val16 g, w;
485 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
486 g = SHR32(MAC16_16(MULT16_16(w,g2),
487 Q15ONE-w, g1), 15);
488 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
489 diff = MULT16_16_Q15(g, diff);
490 out[i*channels] = out[i*channels] - diff;
491 out[i*channels+1] = out[i*channels+1] + diff;
492 }
493 for (;i<frame_size;i++)
494 {
495 opus_val32 diff;
496 diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
497 diff = MULT16_16_Q15(g2, diff);
498 out[i*channels] = out[i*channels] - diff;
499 out[i*channels+1] = out[i*channels+1] + diff;
500 }
501 }
502
gain_fade(const opus_val16 * in,opus_val16 * out,opus_val16 g1,opus_val16 g2,int overlap48,int frame_size,int channels,const opus_val16 * window,opus_int32 Fs)503 static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
504 int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
505 {
506 int i;
507 int inc;
508 int overlap;
509 int c;
510 inc = 48000/Fs;
511 overlap=overlap48/inc;
512 if (channels==1)
513 {
514 for (i=0;i<overlap;i++)
515 {
516 opus_val16 g, w;
517 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
518 g = SHR32(MAC16_16(MULT16_16(w,g2),
519 Q15ONE-w, g1), 15);
520 out[i] = MULT16_16_Q15(g, in[i]);
521 }
522 } else {
523 for (i=0;i<overlap;i++)
524 {
525 opus_val16 g, w;
526 w = MULT16_16_Q15(window[i*inc], window[i*inc]);
527 g = SHR32(MAC16_16(MULT16_16(w,g2),
528 Q15ONE-w, g1), 15);
529 out[i*2] = MULT16_16_Q15(g, in[i*2]);
530 out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
531 }
532 }
533 c=0;do {
534 for (i=overlap;i<frame_size;i++)
535 {
536 out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
537 }
538 }
539 while (++c<channels);
540 }
541
opus_encoder_create(opus_int32 Fs,int channels,int application,int * error)542 OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
543 {
544 int ret;
545 OpusEncoder *st;
546 if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
547 (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
548 && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
549 {
550 if (error)
551 *error = OPUS_BAD_ARG;
552 return NULL;
553 }
554 st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
555 if (st == NULL)
556 {
557 if (error)
558 *error = OPUS_ALLOC_FAIL;
559 return NULL;
560 }
561 ret = opus_encoder_init(st, Fs, channels, application);
562 if (error)
563 *error = ret;
564 if (ret != OPUS_OK)
565 {
566 opus_free(st);
567 st = NULL;
568 }
569 return st;
570 }
571
572 #ifdef ENABLE_DRED
573
574 static const float dred_bits_table[16] = {73.2f, 68.1f, 62.5f, 57.0f, 51.5f, 45.7f, 39.9f, 32.4f, 26.4f, 20.4f, 16.3f, 13.f, 9.3f, 8.2f, 7.2f, 6.4f};
estimate_dred_bitrate(int q0,int dQ,int qmax,int duration,opus_int32 target_bits,int * target_chunks)575 static int estimate_dred_bitrate(int q0, int dQ, int qmax, int duration, opus_int32 target_bits, int *target_chunks) {
576 int dred_chunks;
577 int i;
578 float bits;
579 /* Signaling DRED costs 3 bytes. */
580 bits = 8*(3+DRED_EXPERIMENTAL_BYTES);
581 /* Approximation for the size of the IS. */
582 bits += 50.f+dred_bits_table[q0];
583 dred_chunks = IMIN((duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
584 if (target_chunks != NULL) *target_chunks = 0;
585 for (i=0;i<dred_chunks;i++) {
586 int q = compute_quantizer(q0, dQ, qmax, i);
587 bits += dred_bits_table[q];
588 if (target_chunks != NULL && bits < target_bits) *target_chunks = i+1;
589 }
590 return (int)floor(.5f+bits);
591 }
592
compute_dred_bitrate(OpusEncoder * st,opus_int32 bitrate_bps,int frame_size)593 static opus_int32 compute_dred_bitrate(OpusEncoder *st, opus_int32 bitrate_bps, int frame_size)
594 {
595 float dred_frac;
596 int bitrate_offset;
597 opus_int32 dred_bitrate;
598 opus_int32 target_dred_bitrate;
599 int target_chunks;
600 opus_int32 max_dred_bits;
601 int q0, dQ, qmax;
602 if (st->silk_mode.useInBandFEC) {
603 dred_frac = MIN16(.7f, 3.f*st->silk_mode.packetLossPercentage/100.f);
604 bitrate_offset = 20000;
605 } else {
606 if (st->silk_mode.packetLossPercentage > 5) {
607 dred_frac = MIN16(.8f, .55f + st->silk_mode.packetLossPercentage/100.f);
608 } else {
609 dred_frac = 12*st->silk_mode.packetLossPercentage/100.f;
610 }
611 bitrate_offset = 12000;
612 }
613 /* Account for the fact that longer packets require less redundancy. */
614 dred_frac = dred_frac/(dred_frac + (1-dred_frac)*(frame_size*50.f)/st->Fs);
615 /* Approximate fit based on a few experiments. Could probably be improved. */
616 q0 = IMIN(15, IMAX(4, 51 - 3*EC_ILOG(IMAX(1, bitrate_bps-bitrate_offset))));
617 dQ = bitrate_bps-bitrate_offset > 36000 ? 3 : 5;
618 qmax = 15;
619 target_dred_bitrate = IMAX(0, (int)(dred_frac*(bitrate_bps-bitrate_offset)));
620 if (st->dred_duration > 0) {
621 opus_int32 target_bits = target_dred_bitrate*frame_size/st->Fs;
622 max_dred_bits = estimate_dred_bitrate(q0, dQ, qmax, st->dred_duration, target_bits, &target_chunks);
623 } else {
624 max_dred_bits = 0;
625 target_chunks=0;
626 }
627 dred_bitrate = IMIN(target_dred_bitrate, max_dred_bits*st->Fs/frame_size);
628 /* If we can't afford enough bits, don't bother with DRED at all. */
629 if (target_chunks < 2)
630 dred_bitrate = 0;
631 st->dred_q0 = q0;
632 st->dred_dQ = dQ;
633 st->dred_qmax = qmax;
634 st->dred_target_chunks = target_chunks;
635 return dred_bitrate;
636 }
637 #endif
638
user_bitrate_to_bitrate(OpusEncoder * st,int frame_size,int max_data_bytes)639 static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
640 {
641 if(!frame_size)frame_size=st->Fs/400;
642 if (st->user_bitrate_bps==OPUS_AUTO)
643 return 60*st->Fs/frame_size + st->Fs*st->channels;
644 else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
645 return max_data_bytes*8*st->Fs/frame_size;
646 else
647 return st->user_bitrate_bps;
648 }
649
650 #ifndef DISABLE_FLOAT_API
651 #ifdef FIXED_POINT
652 #define PCM2VAL(x) FLOAT2INT16(x)
653 #else
654 #define PCM2VAL(x) SCALEIN(x)
655 #endif
656
downmix_float(const void * _x,opus_val32 * y,int subframe,int offset,int c1,int c2,int C)657 void downmix_float(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
658 {
659 const float *x;
660 int j;
661
662 x = (const float *)_x;
663 for (j=0;j<subframe;j++)
664 y[j] = PCM2VAL(x[(j+offset)*C+c1]);
665 if (c2>-1)
666 {
667 for (j=0;j<subframe;j++)
668 y[j] += PCM2VAL(x[(j+offset)*C+c2]);
669 } else if (c2==-2)
670 {
671 int c;
672 for (c=1;c<C;c++)
673 {
674 for (j=0;j<subframe;j++)
675 y[j] += PCM2VAL(x[(j+offset)*C+c]);
676 }
677 }
678 }
679 #endif
680
downmix_int(const void * _x,opus_val32 * y,int subframe,int offset,int c1,int c2,int C)681 void downmix_int(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
682 {
683 const opus_int16 *x;
684 int j;
685
686 x = (const opus_int16 *)_x;
687 for (j=0;j<subframe;j++)
688 y[j] = x[(j+offset)*C+c1];
689 if (c2>-1)
690 {
691 for (j=0;j<subframe;j++)
692 y[j] += x[(j+offset)*C+c2];
693 } else if (c2==-2)
694 {
695 int c;
696 for (c=1;c<C;c++)
697 {
698 for (j=0;j<subframe;j++)
699 y[j] += x[(j+offset)*C+c];
700 }
701 }
702 }
703
frame_size_select(opus_int32 frame_size,int variable_duration,opus_int32 Fs)704 opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
705 {
706 int new_size;
707 if (frame_size<Fs/400)
708 return -1;
709 if (variable_duration == OPUS_FRAMESIZE_ARG)
710 new_size = frame_size;
711 else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS)
712 {
713 if (variable_duration <= OPUS_FRAMESIZE_40_MS)
714 new_size = (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS);
715 else
716 new_size = (variable_duration-OPUS_FRAMESIZE_2_5_MS-2)*Fs/50;
717 }
718 else
719 return -1;
720 if (new_size>frame_size)
721 return -1;
722 if (400*new_size!=Fs && 200*new_size!=Fs && 100*new_size!=Fs &&
723 50*new_size!=Fs && 25*new_size!=Fs && 50*new_size!=3*Fs &&
724 50*new_size!=4*Fs && 50*new_size!=5*Fs && 50*new_size!=6*Fs)
725 return -1;
726 return new_size;
727 }
728
compute_stereo_width(const opus_val16 * pcm,int frame_size,opus_int32 Fs,StereoWidthState * mem)729 opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
730 {
731 opus_val32 xx, xy, yy;
732 opus_val16 sqrt_xx, sqrt_yy;
733 opus_val16 qrrt_xx, qrrt_yy;
734 int frame_rate;
735 int i;
736 opus_val16 short_alpha;
737
738 frame_rate = Fs/frame_size;
739 short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate);
740 xx=xy=yy=0;
741 /* Unroll by 4. The frame size is always a multiple of 4 *except* for
742 2.5 ms frames at 12 kHz. Since this setting is very rare (and very
743 stupid), we just discard the last two samples. */
744 for (i=0;i<frame_size-3;i+=4)
745 {
746 opus_val32 pxx=0;
747 opus_val32 pxy=0;
748 opus_val32 pyy=0;
749 opus_val16 x, y;
750 x = pcm[2*i];
751 y = pcm[2*i+1];
752 pxx = SHR32(MULT16_16(x,x),2);
753 pxy = SHR32(MULT16_16(x,y),2);
754 pyy = SHR32(MULT16_16(y,y),2);
755 x = pcm[2*i+2];
756 y = pcm[2*i+3];
757 pxx += SHR32(MULT16_16(x,x),2);
758 pxy += SHR32(MULT16_16(x,y),2);
759 pyy += SHR32(MULT16_16(y,y),2);
760 x = pcm[2*i+4];
761 y = pcm[2*i+5];
762 pxx += SHR32(MULT16_16(x,x),2);
763 pxy += SHR32(MULT16_16(x,y),2);
764 pyy += SHR32(MULT16_16(y,y),2);
765 x = pcm[2*i+6];
766 y = pcm[2*i+7];
767 pxx += SHR32(MULT16_16(x,x),2);
768 pxy += SHR32(MULT16_16(x,y),2);
769 pyy += SHR32(MULT16_16(y,y),2);
770
771 xx += SHR32(pxx, 10);
772 xy += SHR32(pxy, 10);
773 yy += SHR32(pyy, 10);
774 }
775 #ifndef FIXED_POINT
776 if (!(xx < 1e9f) || celt_isnan(xx) || !(yy < 1e9f) || celt_isnan(yy))
777 {
778 xy = xx = yy = 0;
779 }
780 #endif
781 mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
782 mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
783 mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
784 mem->XX = MAX32(0, mem->XX);
785 mem->XY = MAX32(0, mem->XY);
786 mem->YY = MAX32(0, mem->YY);
787 if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
788 {
789 opus_val16 corr;
790 opus_val16 ldiff;
791 opus_val16 width;
792 sqrt_xx = celt_sqrt(mem->XX);
793 sqrt_yy = celt_sqrt(mem->YY);
794 qrrt_xx = celt_sqrt(sqrt_xx);
795 qrrt_yy = celt_sqrt(sqrt_yy);
796 /* Inter-channel correlation */
797 mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
798 corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
799 /* Approximate loudness difference */
800 ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy);
801 width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
802 /* Smoothing over one second */
803 mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
804 /* Peak follower */
805 mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
806 }
807 /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
808 return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower)));
809 }
810
decide_fec(int useInBandFEC,int PacketLoss_perc,int last_fec,int mode,int * bandwidth,opus_int32 rate)811 static int decide_fec(int useInBandFEC, int PacketLoss_perc, int last_fec, int mode, int *bandwidth, opus_int32 rate)
812 {
813 int orig_bandwidth;
814 if (!useInBandFEC || PacketLoss_perc == 0 || mode == MODE_CELT_ONLY)
815 return 0;
816 orig_bandwidth = *bandwidth;
817 for (;;)
818 {
819 opus_int32 hysteresis;
820 opus_int32 LBRR_rate_thres_bps;
821 /* Compute threshold for using FEC at the current bandwidth setting */
822 LBRR_rate_thres_bps = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND)];
823 hysteresis = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND) + 1];
824 if (last_fec == 1) LBRR_rate_thres_bps -= hysteresis;
825 if (last_fec == 0) LBRR_rate_thres_bps += hysteresis;
826 LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps,
827 125 - silk_min( PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
828 /* If loss <= 5%, we look at whether we have enough rate to enable FEC.
829 If loss > 5%, we decrease the bandwidth until we can enable FEC. */
830 if (rate > LBRR_rate_thres_bps)
831 return 1;
832 else if (PacketLoss_perc <= 5)
833 return 0;
834 else if (*bandwidth > OPUS_BANDWIDTH_NARROWBAND)
835 (*bandwidth)--;
836 else
837 break;
838 }
839 /* Couldn't find any bandwidth to enable FEC, keep original bandwidth. */
840 *bandwidth = orig_bandwidth;
841 return 0;
842 }
843
compute_silk_rate_for_hybrid(int rate,int bandwidth,int frame20ms,int vbr,int fec,int channels)844 static int compute_silk_rate_for_hybrid(int rate, int bandwidth, int frame20ms, int vbr, int fec, int channels) {
845 int entry;
846 int i;
847 int N;
848 int silk_rate;
849 static int rate_table[][5] = {
850 /* |total| |-------- SILK------------|
851 |-- No FEC -| |--- FEC ---|
852 10ms 20ms 10ms 20ms */
853 { 0, 0, 0, 0, 0},
854 {12000, 10000, 10000, 11000, 11000},
855 {16000, 13500, 13500, 15000, 15000},
856 {20000, 16000, 16000, 18000, 18000},
857 {24000, 18000, 18000, 21000, 21000},
858 {32000, 22000, 22000, 28000, 28000},
859 {64000, 38000, 38000, 50000, 50000}
860 };
861 /* Do the allocation per-channel. */
862 rate /= channels;
863 entry = 1 + frame20ms + 2*fec;
864 N = sizeof(rate_table)/sizeof(rate_table[0]);
865 for (i=1;i<N;i++)
866 {
867 if (rate_table[i][0] > rate) break;
868 }
869 if (i == N)
870 {
871 silk_rate = rate_table[i-1][entry];
872 /* For now, just give 50% of the extra bits to SILK. */
873 silk_rate += (rate-rate_table[i-1][0])/2;
874 } else {
875 opus_int32 lo, hi, x0, x1;
876 lo = rate_table[i-1][entry];
877 hi = rate_table[i][entry];
878 x0 = rate_table[i-1][0];
879 x1 = rate_table[i][0];
880 silk_rate = (lo*(x1-rate) + hi*(rate-x0))/(x1-x0);
881 }
882 if (!vbr)
883 {
884 /* Tiny boost to SILK for CBR. We should probably tune this better. */
885 silk_rate += 100;
886 }
887 if (bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND)
888 silk_rate += 300;
889 silk_rate *= channels;
890 /* Small adjustment for stereo (calibrated for 32 kb/s, haven't tried other bitrates). */
891 if (channels == 2 && rate >= 12000)
892 silk_rate -= 1000;
893 return silk_rate;
894 }
895
896 /* Returns the equivalent bitrate corresponding to 20 ms frames,
897 complexity 10 VBR operation. */
compute_equiv_rate(opus_int32 bitrate,int channels,int frame_rate,int vbr,int mode,int complexity,int loss)898 static opus_int32 compute_equiv_rate(opus_int32 bitrate, int channels,
899 int frame_rate, int vbr, int mode, int complexity, int loss)
900 {
901 opus_int32 equiv;
902 equiv = bitrate;
903 /* Take into account overhead from smaller frames. */
904 if (frame_rate > 50)
905 equiv -= (40*channels+20)*(frame_rate - 50);
906 /* CBR is about a 8% penalty for both SILK and CELT. */
907 if (!vbr)
908 equiv -= equiv/12;
909 /* Complexity makes about 10% difference (from 0 to 10) in general. */
910 equiv = equiv * (90+complexity)/100;
911 if (mode == MODE_SILK_ONLY || mode == MODE_HYBRID)
912 {
913 /* SILK complexity 0-1 uses the non-delayed-decision NSQ, which
914 costs about 20%. */
915 if (complexity<2)
916 equiv = equiv*4/5;
917 equiv -= equiv*loss/(6*loss + 10);
918 } else if (mode == MODE_CELT_ONLY) {
919 /* CELT complexity 0-4 doesn't have the pitch filter, which costs
920 about 10%. */
921 if (complexity<5)
922 equiv = equiv*9/10;
923 } else {
924 /* Mode not known yet */
925 /* Half the SILK loss*/
926 equiv -= equiv*loss/(12*loss + 20);
927 }
928 return equiv;
929 }
930
931 #ifndef DISABLE_FLOAT_API
932
is_digital_silence(const opus_val16 * pcm,int frame_size,int channels,int lsb_depth)933 int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth)
934 {
935 int silence = 0;
936 opus_val32 sample_max = 0;
937 #ifdef MLP_TRAINING
938 return 0;
939 #endif
940 sample_max = celt_maxabs16(pcm, frame_size*channels);
941
942 #ifdef FIXED_POINT
943 silence = (sample_max == 0);
944 (void)lsb_depth;
945 #else
946 silence = (sample_max <= (opus_val16) 1 / (1 << lsb_depth));
947 #endif
948
949 return silence;
950 }
951
952 #ifdef FIXED_POINT
compute_frame_energy(const opus_val16 * pcm,int frame_size,int channels,int arch)953 static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch)
954 {
955 int i;
956 opus_val32 sample_max;
957 int max_shift;
958 int shift;
959 opus_val32 energy = 0;
960 int len = frame_size*channels;
961 (void)arch;
962 /* Max amplitude in the signal */
963 sample_max = celt_maxabs16(pcm, len);
964
965 /* Compute the right shift required in the MAC to avoid an overflow */
966 max_shift = celt_ilog2(len);
967 shift = IMAX(0, (celt_ilog2(1+sample_max) << 1) + max_shift - 28);
968
969 /* Compute the energy */
970 for (i=0; i<len; i++)
971 energy += SHR32(MULT16_16(pcm[i], pcm[i]), shift);
972
973 /* Normalize energy by the frame size and left-shift back to the original position */
974 energy /= len;
975 energy = SHL32(energy, shift);
976
977 return energy;
978 }
979 #else
compute_frame_energy(const opus_val16 * pcm,int frame_size,int channels,int arch)980 static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch)
981 {
982 int len = frame_size*channels;
983 return celt_inner_prod(pcm, pcm, len, arch)/len;
984 }
985 #endif
986
987 /* Decides if DTX should be turned on (=1) or off (=0) */
decide_dtx_mode(opus_int activity,int * nb_no_activity_ms_Q1,int frame_size_ms_Q1)988 static int decide_dtx_mode(opus_int activity, /* indicates if this frame contains speech/music */
989 int *nb_no_activity_ms_Q1, /* number of consecutive milliseconds with no activity, in Q1 */
990 int frame_size_ms_Q1 /* number of miliseconds in this update, in Q1 */
991 )
992
993 {
994 if (!activity)
995 {
996 /* The number of consecutive DTX frames should be within the allowed bounds.
997 Note that the allowed bound is defined in the SILK headers and assumes 20 ms
998 frames. As this function can be called with any frame length, a conversion to
999 milliseconds is done before the comparisons. */
1000 (*nb_no_activity_ms_Q1) += frame_size_ms_Q1;
1001 if (*nb_no_activity_ms_Q1 > NB_SPEECH_FRAMES_BEFORE_DTX*20*2)
1002 {
1003 if (*nb_no_activity_ms_Q1 <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX)*20*2)
1004 /* Valid frame for DTX! */
1005 return 1;
1006 else
1007 (*nb_no_activity_ms_Q1) = NB_SPEECH_FRAMES_BEFORE_DTX*20*2;
1008 }
1009 } else
1010 (*nb_no_activity_ms_Q1) = 0;
1011
1012 return 0;
1013 }
1014
1015 #endif
1016
compute_redundancy_bytes(opus_int32 max_data_bytes,opus_int32 bitrate_bps,int frame_rate,int channels)1017 static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels)
1018 {
1019 int redundancy_bytes_cap;
1020 int redundancy_bytes;
1021 opus_int32 redundancy_rate;
1022 int base_bits;
1023 opus_int32 available_bits;
1024 base_bits = (40*channels+20);
1025
1026 /* Equivalent rate for 5 ms frames. */
1027 redundancy_rate = bitrate_bps + base_bits*(200 - frame_rate);
1028 /* For VBR, further increase the bitrate if we can afford it. It's pretty short
1029 and we'll avoid artefacts. */
1030 redundancy_rate = 3*redundancy_rate/2;
1031 redundancy_bytes = redundancy_rate/1600;
1032
1033 /* Compute the max rate we can use given CBR or VBR with cap. */
1034 available_bits = max_data_bytes*8 - 2*base_bits;
1035 redundancy_bytes_cap = (available_bits*240/(240+48000/frame_rate) + base_bits)/8;
1036 redundancy_bytes = IMIN(redundancy_bytes, redundancy_bytes_cap);
1037 /* It we can't get enough bits for redundancy to be worth it, rely on the decoder PLC. */
1038 if (redundancy_bytes > 4 + 8*channels)
1039 redundancy_bytes = IMIN(257, redundancy_bytes);
1040 else
1041 redundancy_bytes = 0;
1042 return redundancy_bytes;
1043 }
1044
1045 static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
1046 unsigned char *data, opus_int32 max_data_bytes,
1047 int float_api, int first_frame,
1048 #ifdef ENABLE_DRED
1049 opus_int32 dred_bitrate_bps,
1050 #endif
1051 #ifndef DISABLE_FLOAT_API
1052 AnalysisInfo *analysis_info, int is_silence,
1053 #endif
1054 int redundancy, int celt_to_silk, int prefill,
1055 opus_int32 equiv_rate, int to_celt);
1056
opus_encode_native(OpusEncoder * st,const opus_val16 * pcm,int frame_size,unsigned char * data,opus_int32 out_data_bytes,int lsb_depth,const void * analysis_pcm,opus_int32 analysis_size,int c1,int c2,int analysis_channels,downmix_func downmix,int float_api)1057 opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
1058 unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
1059 const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
1060 int analysis_channels, downmix_func downmix, int float_api)
1061 {
1062 void *silk_enc;
1063 CELTEncoder *celt_enc;
1064 int i;
1065 int ret=0;
1066 int prefill=0;
1067 int redundancy = 0;
1068 int celt_to_silk = 0;
1069 int to_celt = 0;
1070 int voice_est; /* Probability of voice in Q7 */
1071 opus_int32 equiv_rate;
1072 int frame_rate;
1073 opus_int32 max_rate; /* Max bitrate we're allowed to use */
1074 int curr_bandwidth;
1075 opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
1076 opus_int32 cbr_bytes=-1;
1077 opus_val16 stereo_width;
1078 const CELTMode *celt_mode;
1079 #ifndef DISABLE_FLOAT_API
1080 AnalysisInfo analysis_info;
1081 int analysis_read_pos_bak=-1;
1082 int analysis_read_subframe_bak=-1;
1083 int is_silence = 0;
1084 #endif
1085 #ifdef ENABLE_DRED
1086 opus_int32 dred_bitrate_bps;
1087 #endif
1088 ALLOC_STACK;
1089
1090 max_data_bytes = IMIN(1276, out_data_bytes);
1091
1092 st->rangeFinal = 0;
1093 if (frame_size <= 0 || max_data_bytes <= 0)
1094 {
1095 RESTORE_STACK;
1096 return OPUS_BAD_ARG;
1097 }
1098
1099 /* Cannot encode 100 ms in 1 byte */
1100 if (max_data_bytes==1 && st->Fs==(frame_size*10))
1101 {
1102 RESTORE_STACK;
1103 return OPUS_BUFFER_TOO_SMALL;
1104 }
1105
1106 silk_enc = (char*)st+st->silk_enc_offset;
1107 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
1108
1109 lsb_depth = IMIN(lsb_depth, st->lsb_depth);
1110
1111 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
1112 #ifndef DISABLE_FLOAT_API
1113 analysis_info.valid = 0;
1114 #ifdef FIXED_POINT
1115 if (st->silk_mode.complexity >= 10 && st->Fs>=16000)
1116 #else
1117 if (st->silk_mode.complexity >= 7 && st->Fs>=16000)
1118 #endif
1119 {
1120 is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth);
1121 analysis_read_pos_bak = st->analysis.read_pos;
1122 analysis_read_subframe_bak = st->analysis.read_subframe;
1123 run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
1124 c1, c2, analysis_channels, st->Fs,
1125 lsb_depth, downmix, &analysis_info);
1126
1127 /* Track the peak signal energy */
1128 if (!is_silence && analysis_info.activity_probability > DTX_ACTIVITY_THRESHOLD)
1129 st->peak_signal_energy = MAX32(MULT16_32_Q15(QCONST16(0.999f, 15), st->peak_signal_energy),
1130 compute_frame_energy(pcm, frame_size, st->channels, st->arch));
1131 } else if (st->analysis.initialized) {
1132 tonality_analysis_reset(&st->analysis);
1133 }
1134 #else
1135 (void)analysis_pcm;
1136 (void)analysis_size;
1137 (void)c1;
1138 (void)c2;
1139 (void)analysis_channels;
1140 (void)downmix;
1141 #endif
1142
1143 #ifndef DISABLE_FLOAT_API
1144 /* Reset voice_ratio if this frame is not silent or if analysis is disabled.
1145 * Otherwise, preserve voice_ratio from the last non-silent frame */
1146 if (!is_silence)
1147 st->voice_ratio = -1;
1148
1149 st->detected_bandwidth = 0;
1150 if (analysis_info.valid)
1151 {
1152 int analysis_bandwidth;
1153 if (st->signal_type == OPUS_AUTO)
1154 {
1155 float prob;
1156 if (st->prev_mode == 0)
1157 prob = analysis_info.music_prob;
1158 else if (st->prev_mode == MODE_CELT_ONLY)
1159 prob = analysis_info.music_prob_max;
1160 else
1161 prob = analysis_info.music_prob_min;
1162 st->voice_ratio = (int)floor(.5+100*(1-prob));
1163 }
1164
1165 analysis_bandwidth = analysis_info.bandwidth;
1166 if (analysis_bandwidth<=12)
1167 st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1168 else if (analysis_bandwidth<=14)
1169 st->detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1170 else if (analysis_bandwidth<=16)
1171 st->detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1172 else if (analysis_bandwidth<=18)
1173 st->detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1174 else
1175 st->detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
1176 }
1177 #else
1178 st->voice_ratio = -1;
1179 #endif
1180
1181 if (st->channels==2 && st->force_channels!=1)
1182 stereo_width = compute_stereo_width(pcm, frame_size, st->Fs, &st->width_mem);
1183 else
1184 stereo_width = 0;
1185 st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
1186
1187 frame_rate = st->Fs/frame_size;
1188 if (!st->use_vbr)
1189 {
1190 /* Multiply by 12 to make sure the division is exact. */
1191 int frame_rate12 = 12*st->Fs/frame_size;
1192 /* We need to make sure that "int" values always fit in 16 bits. */
1193 cbr_bytes = IMIN( (12*st->bitrate_bps/8 + frame_rate12/2)/frame_rate12, max_data_bytes);
1194 st->bitrate_bps = cbr_bytes*(opus_int32)frame_rate12*8/12;
1195 /* Make sure we provide at least one byte to avoid failing. */
1196 max_data_bytes = IMAX(1, cbr_bytes);
1197 }
1198 #ifdef ENABLE_DRED
1199 /* Allocate some of the bits to DRED if needed. */
1200 dred_bitrate_bps = compute_dred_bitrate(st, st->bitrate_bps, frame_size);
1201 st->bitrate_bps -= dred_bitrate_bps;
1202 #endif
1203 if (max_data_bytes<3 || st->bitrate_bps < 3*frame_rate*8
1204 || (frame_rate<50 && (max_data_bytes*frame_rate<300 || st->bitrate_bps < 2400)))
1205 {
1206 /*If the space is too low to do something useful, emit 'PLC' frames.*/
1207 int tocmode = st->mode;
1208 int bw = st->bandwidth == 0 ? OPUS_BANDWIDTH_NARROWBAND : st->bandwidth;
1209 int packet_code = 0;
1210 int num_multiframes = 0;
1211
1212 if (tocmode==0)
1213 tocmode = MODE_SILK_ONLY;
1214 if (frame_rate>100)
1215 tocmode = MODE_CELT_ONLY;
1216 /* 40 ms -> 2 x 20 ms if in CELT_ONLY or HYBRID mode */
1217 if (frame_rate==25 && tocmode!=MODE_SILK_ONLY)
1218 {
1219 frame_rate = 50;
1220 packet_code = 1;
1221 }
1222
1223 /* >= 60 ms frames */
1224 if (frame_rate<=16)
1225 {
1226 /* 1 x 60 ms, 2 x 40 ms, 2 x 60 ms */
1227 if (out_data_bytes==1 || (tocmode==MODE_SILK_ONLY && frame_rate!=10))
1228 {
1229 tocmode = MODE_SILK_ONLY;
1230
1231 packet_code = frame_rate <= 12;
1232 frame_rate = frame_rate == 12 ? 25 : 16;
1233 }
1234 else
1235 {
1236 num_multiframes = 50/frame_rate;
1237 frame_rate = 50;
1238 packet_code = 3;
1239 }
1240 }
1241
1242 if(tocmode==MODE_SILK_ONLY&&bw>OPUS_BANDWIDTH_WIDEBAND)
1243 bw=OPUS_BANDWIDTH_WIDEBAND;
1244 else if (tocmode==MODE_CELT_ONLY&&bw==OPUS_BANDWIDTH_MEDIUMBAND)
1245 bw=OPUS_BANDWIDTH_NARROWBAND;
1246 else if (tocmode==MODE_HYBRID&&bw<=OPUS_BANDWIDTH_SUPERWIDEBAND)
1247 bw=OPUS_BANDWIDTH_SUPERWIDEBAND;
1248
1249 data[0] = gen_toc(tocmode, frame_rate, bw, st->stream_channels);
1250 data[0] |= packet_code;
1251
1252 ret = packet_code <= 1 ? 1 : 2;
1253
1254 max_data_bytes = IMAX(max_data_bytes, ret);
1255
1256 if (packet_code==3)
1257 data[1] = num_multiframes;
1258
1259 if (!st->use_vbr)
1260 {
1261 ret = opus_packet_pad(data, ret, max_data_bytes);
1262 if (ret == OPUS_OK)
1263 ret = max_data_bytes;
1264 else
1265 ret = OPUS_INTERNAL_ERROR;
1266 }
1267 RESTORE_STACK;
1268 return ret;
1269 }
1270 max_rate = frame_rate*max_data_bytes*8;
1271
1272 /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
1273 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->channels, st->Fs/frame_size,
1274 st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage);
1275
1276 if (st->signal_type == OPUS_SIGNAL_VOICE)
1277 voice_est = 127;
1278 else if (st->signal_type == OPUS_SIGNAL_MUSIC)
1279 voice_est = 0;
1280 else if (st->voice_ratio >= 0)
1281 {
1282 voice_est = st->voice_ratio*327>>8;
1283 /* For AUDIO, never be more than 90% confident of having speech */
1284 if (st->application == OPUS_APPLICATION_AUDIO)
1285 voice_est = IMIN(voice_est, 115);
1286 } else if (st->application == OPUS_APPLICATION_VOIP)
1287 voice_est = 115;
1288 else
1289 voice_est = 48;
1290
1291 if (st->force_channels!=OPUS_AUTO && st->channels == 2)
1292 {
1293 st->stream_channels = st->force_channels;
1294 } else {
1295 #ifdef FUZZING
1296 (void)stereo_music_threshold;
1297 (void)stereo_voice_threshold;
1298 /* Random mono/stereo decision */
1299 if (st->channels == 2 && (rand()&0x1F)==0)
1300 st->stream_channels = 3-st->stream_channels;
1301 #else
1302 /* Rate-dependent mono-stereo decision */
1303 if (st->channels == 2)
1304 {
1305 opus_int32 stereo_threshold;
1306 stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
1307 if (st->stream_channels == 2)
1308 stereo_threshold -= 1000;
1309 else
1310 stereo_threshold += 1000;
1311 st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
1312 } else {
1313 st->stream_channels = st->channels;
1314 }
1315 #endif
1316 }
1317 /* Update equivalent rate for channels decision. */
1318 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size,
1319 st->use_vbr, 0, st->silk_mode.complexity, st->silk_mode.packetLossPercentage);
1320
1321 /* Allow SILK DTX if DTX is enabled but the generalized DTX cannot be used,
1322 e.g. because of the complexity setting or sample rate. */
1323 #ifndef DISABLE_FLOAT_API
1324 st->silk_mode.useDTX = st->use_dtx && !(analysis_info.valid || is_silence);
1325 #else
1326 st->silk_mode.useDTX = st->use_dtx;
1327 #endif
1328
1329 /* Mode selection depending on application and signal type */
1330 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
1331 {
1332 st->mode = MODE_CELT_ONLY;
1333 } else if (st->user_forced_mode == OPUS_AUTO)
1334 {
1335 #ifdef FUZZING
1336 (void)stereo_width;
1337 (void)mode_thresholds;
1338 /* Random mode switching */
1339 if ((rand()&0xF)==0)
1340 {
1341 if ((rand()&0x1)==0)
1342 st->mode = MODE_CELT_ONLY;
1343 else
1344 st->mode = MODE_SILK_ONLY;
1345 } else {
1346 if (st->prev_mode==MODE_CELT_ONLY)
1347 st->mode = MODE_CELT_ONLY;
1348 else
1349 st->mode = MODE_SILK_ONLY;
1350 }
1351 #else
1352 opus_int32 mode_voice, mode_music;
1353 opus_int32 threshold;
1354
1355 /* Interpolate based on stereo width */
1356 mode_voice = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[0][0])
1357 + MULT16_32_Q15(stereo_width,mode_thresholds[1][0]));
1358 mode_music = (opus_int32)(MULT16_32_Q15(Q15ONE-stereo_width,mode_thresholds[1][1])
1359 + MULT16_32_Q15(stereo_width,mode_thresholds[1][1]));
1360 /* Interpolate based on speech/music probability */
1361 threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
1362 /* Bias towards SILK for VoIP because of some useful features */
1363 if (st->application == OPUS_APPLICATION_VOIP)
1364 threshold += 8000;
1365
1366 /*printf("%f %d\n", stereo_width/(float)Q15ONE, threshold);*/
1367 /* Hysteresis */
1368 if (st->prev_mode == MODE_CELT_ONLY)
1369 threshold -= 4000;
1370 else if (st->prev_mode>0)
1371 threshold += 4000;
1372
1373 st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
1374
1375 /* When FEC is enabled and there's enough packet loss, use SILK.
1376 Unless the FEC is set to 2, in which case we don't switch to SILK if we're confident we have music. */
1377 if (st->silk_mode.useInBandFEC && st->silk_mode.packetLossPercentage > (128-voice_est)>>4 && (st->fec_config != 2 || voice_est > 25))
1378 st->mode = MODE_SILK_ONLY;
1379 /* When encoding voice and DTX is enabled but the generalized DTX cannot be used,
1380 use SILK in order to make use of its DTX. */
1381 if (st->silk_mode.useDTX && voice_est > 100)
1382 st->mode = MODE_SILK_ONLY;
1383 #endif
1384
1385 /* If max_data_bytes represents less than 6 kb/s, switch to CELT-only mode */
1386 if (max_data_bytes < (frame_rate > 50 ? 9000 : 6000)*frame_size / (st->Fs * 8))
1387 st->mode = MODE_CELT_ONLY;
1388 } else {
1389 st->mode = st->user_forced_mode;
1390 }
1391
1392 /* Override the chosen mode to make sure we meet the requested frame size */
1393 if (st->mode != MODE_CELT_ONLY && frame_size < st->Fs/100)
1394 st->mode = MODE_CELT_ONLY;
1395 if (st->lfe)
1396 st->mode = MODE_CELT_ONLY;
1397
1398 if (st->prev_mode > 0 &&
1399 ((st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ||
1400 (st->mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)))
1401 {
1402 redundancy = 1;
1403 celt_to_silk = (st->mode != MODE_CELT_ONLY);
1404 if (!celt_to_silk)
1405 {
1406 /* Switch to SILK/hybrid if frame size is 10 ms or more*/
1407 if (frame_size >= st->Fs/100)
1408 {
1409 st->mode = st->prev_mode;
1410 to_celt = 1;
1411 } else {
1412 redundancy=0;
1413 }
1414 }
1415 }
1416
1417 /* When encoding multiframes, we can ask for a switch to CELT only in the last frame. This switch
1418 * is processed above as the requested mode shouldn't interrupt stereo->mono transition. */
1419 if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0
1420 && st->mode != MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY)
1421 {
1422 /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
1423 st->silk_mode.toMono = 1;
1424 st->stream_channels = 2;
1425 } else {
1426 st->silk_mode.toMono = 0;
1427 }
1428
1429 /* Update equivalent rate with mode decision. */
1430 equiv_rate = compute_equiv_rate(st->bitrate_bps, st->stream_channels, st->Fs/frame_size,
1431 st->use_vbr, st->mode, st->silk_mode.complexity, st->silk_mode.packetLossPercentage);
1432
1433 if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
1434 {
1435 silk_EncControlStruct dummy;
1436 silk_InitEncoder( silk_enc, st->arch, &dummy);
1437 prefill=1;
1438 }
1439
1440 /* Automatic (rate-dependent) bandwidth selection */
1441 if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
1442 {
1443 const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
1444 opus_int32 bandwidth_thresholds[8];
1445 int bandwidth = OPUS_BANDWIDTH_FULLBAND;
1446
1447 if (st->channels==2 && st->force_channels!=1)
1448 {
1449 voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
1450 music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
1451 } else {
1452 voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
1453 music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
1454 }
1455 /* Interpolate bandwidth thresholds depending on voice estimation */
1456 for (i=0;i<8;i++)
1457 {
1458 bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
1459 + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
1460 }
1461 do {
1462 int threshold, hysteresis;
1463 threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
1464 hysteresis = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)+1];
1465 if (!st->first)
1466 {
1467 if (st->auto_bandwidth >= bandwidth)
1468 threshold -= hysteresis;
1469 else
1470 threshold += hysteresis;
1471 }
1472 if (equiv_rate >= threshold)
1473 break;
1474 } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
1475 /* We don't use mediumband anymore, except when explicitly requested or during
1476 mode transitions. */
1477 if (bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
1478 bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1479 st->bandwidth = st->auto_bandwidth = bandwidth;
1480 /* Prevents any transition to SWB/FB until the SILK layer has fully
1481 switched to WB mode and turned the variable LP filter off */
1482 if (!st->first && st->mode != MODE_CELT_ONLY && !st->silk_mode.inWBmodeWithoutVariableLP && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1483 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1484 }
1485
1486 if (st->bandwidth>st->max_bandwidth)
1487 st->bandwidth = st->max_bandwidth;
1488
1489 if (st->user_bandwidth != OPUS_AUTO)
1490 st->bandwidth = st->user_bandwidth;
1491
1492 /* This prevents us from using hybrid at unsafe CBR/max rates */
1493 if (st->mode != MODE_CELT_ONLY && max_rate < 15000)
1494 {
1495 st->bandwidth = IMIN(st->bandwidth, OPUS_BANDWIDTH_WIDEBAND);
1496 }
1497
1498 /* Prevents Opus from wasting bits on frequencies that are above
1499 the Nyquist rate of the input signal */
1500 if (st->Fs <= 24000 && st->bandwidth > OPUS_BANDWIDTH_SUPERWIDEBAND)
1501 st->bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1502 if (st->Fs <= 16000 && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1503 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1504 if (st->Fs <= 12000 && st->bandwidth > OPUS_BANDWIDTH_MEDIUMBAND)
1505 st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1506 if (st->Fs <= 8000 && st->bandwidth > OPUS_BANDWIDTH_NARROWBAND)
1507 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1508 #ifndef DISABLE_FLOAT_API
1509 /* Use detected bandwidth to reduce the encoded bandwidth. */
1510 if (st->detected_bandwidth && st->user_bandwidth == OPUS_AUTO)
1511 {
1512 int min_detected_bandwidth;
1513 /* Makes bandwidth detection more conservative just in case the detector
1514 gets it wrong when we could have coded a high bandwidth transparently.
1515 When operating in SILK/hybrid mode, we don't go below wideband to avoid
1516 more complicated switches that require redundancy. */
1517 if (equiv_rate <= 18000*st->stream_channels && st->mode == MODE_CELT_ONLY)
1518 min_detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1519 else if (equiv_rate <= 24000*st->stream_channels && st->mode == MODE_CELT_ONLY)
1520 min_detected_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
1521 else if (equiv_rate <= 30000*st->stream_channels)
1522 min_detected_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1523 else if (equiv_rate <= 44000*st->stream_channels)
1524 min_detected_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
1525 else
1526 min_detected_bandwidth = OPUS_BANDWIDTH_FULLBAND;
1527
1528 st->detected_bandwidth = IMAX(st->detected_bandwidth, min_detected_bandwidth);
1529 st->bandwidth = IMIN(st->bandwidth, st->detected_bandwidth);
1530 }
1531 #endif
1532 st->silk_mode.LBRR_coded = decide_fec(st->silk_mode.useInBandFEC, st->silk_mode.packetLossPercentage,
1533 st->silk_mode.LBRR_coded, st->mode, &st->bandwidth, equiv_rate);
1534 celt_encoder_ctl(celt_enc, OPUS_SET_LSB_DEPTH(lsb_depth));
1535
1536 /* CELT mode doesn't support mediumband, use wideband instead */
1537 if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
1538 st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
1539 if (st->lfe)
1540 st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
1541
1542 curr_bandwidth = st->bandwidth;
1543
1544 /* Chooses the appropriate mode for speech
1545 *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
1546 if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
1547 st->mode = MODE_HYBRID;
1548 if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
1549 st->mode = MODE_SILK_ONLY;
1550
1551 /* Can't support higher than >60 ms frames, and >20 ms when in Hybrid or CELT-only modes */
1552 if ((frame_size > st->Fs/50 && (st->mode != MODE_SILK_ONLY)) || frame_size > 3*st->Fs/50)
1553 {
1554 int enc_frame_size;
1555 int nb_frames;
1556 VARDECL(unsigned char, tmp_data);
1557 VARDECL(OpusRepacketizer, rp);
1558 int max_header_bytes;
1559 opus_int32 repacketize_len;
1560 opus_int32 max_len_sum;
1561 opus_int32 tot_size=0;
1562 unsigned char *curr_data;
1563 int tmp_len;
1564 int dtx_count = 0;
1565
1566 if (st->mode == MODE_SILK_ONLY)
1567 {
1568 if (frame_size == 2*st->Fs/25) /* 80 ms -> 2x 40 ms */
1569 enc_frame_size = st->Fs/25;
1570 else if (frame_size == 3*st->Fs/25) /* 120 ms -> 2x 60 ms */
1571 enc_frame_size = 3*st->Fs/50;
1572 else /* 100 ms -> 5x 20 ms */
1573 enc_frame_size = st->Fs/50;
1574 }
1575 else
1576 enc_frame_size = st->Fs/50;
1577
1578 nb_frames = frame_size/enc_frame_size;
1579
1580 #ifndef DISABLE_FLOAT_API
1581 if (analysis_read_pos_bak!= -1)
1582 {
1583 /* Reset analysis position to the beginning of the first frame so we
1584 can use it one frame at a time. */
1585 st->analysis.read_pos = analysis_read_pos_bak;
1586 st->analysis.read_subframe = analysis_read_subframe_bak;
1587 }
1588 #endif
1589
1590 /* Worst cases:
1591 * 2 frames: Code 2 with different compressed sizes
1592 * >2 frames: Code 3 VBR */
1593 max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);
1594
1595 if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
1596 repacketize_len = out_data_bytes;
1597 else {
1598 celt_assert(cbr_bytes>=0);
1599 repacketize_len = IMIN(cbr_bytes, out_data_bytes);
1600 }
1601 max_len_sum = nb_frames + repacketize_len - max_header_bytes;
1602
1603 ALLOC(tmp_data, max_len_sum, unsigned char);
1604 curr_data = tmp_data;
1605 ALLOC(rp, 1, OpusRepacketizer);
1606 opus_repacketizer_init(rp);
1607
1608
1609 int bak_to_mono = st->silk_mode.toMono;
1610 if (bak_to_mono)
1611 st->force_channels = 1;
1612 else
1613 st->prev_channels = st->stream_channels;
1614
1615 for (i=0;i<nb_frames;i++)
1616 {
1617 int first_frame;
1618 int frame_to_celt;
1619 int frame_redundancy;
1620 opus_int32 curr_max;
1621 /* Attempt DRED encoding until we have a non-DTX frame. In case of DTX refresh,
1622 that allows for DRED not to be in the first frame. */
1623 first_frame = (i == 0) || (i == dtx_count);
1624 st->silk_mode.toMono = 0;
1625 st->nonfinal_frame = i<(nb_frames-1);
1626
1627 /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
1628 frame_to_celt = to_celt && i==nb_frames-1;
1629 frame_redundancy = redundancy && (frame_to_celt || (!to_celt && i==0));
1630
1631 curr_max = IMIN(3*st->bitrate_bps/(3*8*st->Fs/enc_frame_size), max_len_sum/nb_frames);
1632 #ifdef ENABLE_DRED
1633 curr_max = IMIN(curr_max, (max_len_sum-3*dred_bitrate_bps/(3*8*st->Fs/frame_size))/nb_frames);
1634 if (first_frame) curr_max += 3*dred_bitrate_bps/(3*8*st->Fs/frame_size);
1635 #endif
1636 curr_max = IMIN(max_len_sum-tot_size, curr_max);
1637 #ifndef DISABLE_FLOAT_API
1638 if (analysis_read_pos_bak != -1) {
1639 is_silence = is_digital_silence(pcm, frame_size, st->channels, lsb_depth);
1640 /* Get analysis for current frame. */
1641 tonality_get_info(&st->analysis, &analysis_info, enc_frame_size);
1642 }
1643 #endif
1644
1645 tmp_len = opus_encode_frame_native(st, pcm+i*(st->channels*enc_frame_size), enc_frame_size, curr_data, curr_max, float_api, first_frame,
1646 #ifdef ENABLE_DRED
1647 dred_bitrate_bps,
1648 #endif
1649 #ifndef DISABLE_FLOAT_API
1650 &analysis_info,
1651 is_silence,
1652 #endif
1653 frame_redundancy, celt_to_silk, prefill,
1654 equiv_rate, frame_to_celt
1655 );
1656 if (tmp_len<0)
1657 {
1658 RESTORE_STACK;
1659 return OPUS_INTERNAL_ERROR;
1660 } else if (tmp_len==1) {
1661 dtx_count++;
1662 }
1663 ret = opus_repacketizer_cat(rp, curr_data, tmp_len);
1664
1665 if (ret<0)
1666 {
1667 RESTORE_STACK;
1668 return OPUS_INTERNAL_ERROR;
1669 }
1670 tot_size += tmp_len;
1671 curr_data += tmp_len;
1672 }
1673 ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr && (dtx_count != nb_frames), NULL, 0);
1674 if (ret<0)
1675 {
1676 ret = OPUS_INTERNAL_ERROR;
1677 }
1678 st->silk_mode.toMono = bak_to_mono;
1679 RESTORE_STACK;
1680 return ret;
1681 } else {
1682 ret = opus_encode_frame_native(st, pcm, frame_size, data, max_data_bytes, float_api, 1,
1683 #ifdef ENABLE_DRED
1684 dred_bitrate_bps,
1685 #endif
1686 #ifndef DISABLE_FLOAT_API
1687 &analysis_info,
1688 is_silence,
1689 #endif
1690 redundancy, celt_to_silk, prefill,
1691 equiv_rate, to_celt
1692 );
1693 RESTORE_STACK;
1694 return ret;
1695 }
1696 }
1697
opus_encode_frame_native(OpusEncoder * st,const opus_val16 * pcm,int frame_size,unsigned char * data,opus_int32 max_data_bytes,int float_api,int first_frame,opus_int32 dred_bitrate_bps,AnalysisInfo * analysis_info,int is_silence,int redundancy,int celt_to_silk,int prefill,opus_int32 equiv_rate,int to_celt)1698 static opus_int32 opus_encode_frame_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
1699 unsigned char *data, opus_int32 max_data_bytes,
1700 int float_api, int first_frame,
1701 #ifdef ENABLE_DRED
1702 opus_int32 dred_bitrate_bps,
1703 #endif
1704 #ifndef DISABLE_FLOAT_API
1705 AnalysisInfo *analysis_info, int is_silence,
1706 #endif
1707 int redundancy, int celt_to_silk, int prefill,
1708 opus_int32 equiv_rate, int to_celt)
1709 {
1710 void *silk_enc;
1711 CELTEncoder *celt_enc;
1712 const CELTMode *celt_mode;
1713 int i;
1714 int ret=0;
1715 opus_int32 nBytes;
1716 ec_enc enc;
1717 int bytes_target;
1718 int start_band = 0;
1719 int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
1720 int nb_compr_bytes;
1721 opus_uint32 redundant_rng = 0;
1722 int cutoff_Hz;
1723 int hp_freq_smth1;
1724 opus_val16 HB_gain;
1725 int apply_padding;
1726 int frame_rate;
1727 int curr_bandwidth;
1728 int delay_compensation;
1729 int total_buffer;
1730 opus_int activity = VAD_NO_DECISION;
1731 VARDECL(opus_val16, pcm_buf);
1732 VARDECL(opus_val16, tmp_prefill);
1733 SAVE_STACK;
1734
1735 st->rangeFinal = 0;
1736 silk_enc = (char*)st+st->silk_enc_offset;
1737 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
1738 celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode));
1739 curr_bandwidth = st->bandwidth;
1740 if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
1741 delay_compensation = 0;
1742 else
1743 delay_compensation = st->delay_compensation;
1744 total_buffer = delay_compensation;
1745
1746 frame_rate = st->Fs/frame_size;
1747
1748 #ifndef DISABLE_FLOAT_API
1749 if (is_silence)
1750 {
1751 activity = !is_silence;
1752 } else if (analysis_info->valid)
1753 {
1754 activity = analysis_info->activity_probability >= DTX_ACTIVITY_THRESHOLD;
1755 if (!activity)
1756 {
1757 /* Mark as active if this noise frame is sufficiently loud */
1758 opus_val32 noise_energy = compute_frame_energy(pcm, frame_size, st->channels, st->arch);
1759 activity = st->peak_signal_energy < (PSEUDO_SNR_THRESHOLD * noise_energy);
1760 }
1761 }
1762 #endif
1763
1764 /* For the first frame at a new SILK bandwidth */
1765 if (st->silk_bw_switch)
1766 {
1767 redundancy = 1;
1768 celt_to_silk = 1;
1769 st->silk_bw_switch = 0;
1770 /* Do a prefill without resetting the sampling rate control. */
1771 prefill=2;
1772 }
1773
1774 /* If we decided to go with CELT, make sure redundancy is off, no matter what
1775 we decided earlier. */
1776 if (st->mode == MODE_CELT_ONLY)
1777 redundancy = 0;
1778
1779 if (redundancy)
1780 {
1781 redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels);
1782 if (redundancy_bytes == 0)
1783 redundancy = 0;
1784 }
1785
1786 /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
1787 bytes_target = IMIN(max_data_bytes-redundancy_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
1788
1789 data += 1;
1790
1791 ec_enc_init(&enc, data, max_data_bytes-1);
1792
1793 ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
1794 OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels);
1795
1796 if (st->mode == MODE_CELT_ONLY)
1797 hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
1798 else
1799 hp_freq_smth1 = ((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.variable_HP_smth1_Q15;
1800
1801 st->variable_HP_smth2_Q15 = silk_SMLAWB( st->variable_HP_smth2_Q15,
1802 hp_freq_smth1 - st->variable_HP_smth2_Q15, SILK_FIX_CONST( VARIABLE_HP_SMTH_COEF2, 16 ) );
1803
1804 /* convert from log scale to Hertz */
1805 cutoff_Hz = silk_log2lin( silk_RSHIFT( st->variable_HP_smth2_Q15, 8 ) );
1806
1807 if (st->application == OPUS_APPLICATION_VOIP)
1808 {
1809 hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs, st->arch);
1810
1811 #ifdef ENABLE_OSCE_TRAINING_DATA
1812 /* write out high pass filtered clean signal*/
1813 static FILE *fout =NULL;
1814 if (fout == NULL)
1815 {
1816 fout = fopen("clean_hp.s16", "wb");
1817 }
1818
1819 {
1820 int idx;
1821 opus_int16 tmp;
1822 for (idx = 0; idx < frame_size; idx++)
1823 {
1824 tmp = (opus_int16) (32768 * pcm_buf[total_buffer + idx] + 0.5f);
1825 fwrite(&tmp, sizeof(tmp), 1, fout);
1826 }
1827 }
1828 #endif
1829 } else {
1830 dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
1831 }
1832 #ifndef FIXED_POINT
1833 if (float_api)
1834 {
1835 opus_val32 sum;
1836 sum = celt_inner_prod(&pcm_buf[total_buffer*st->channels], &pcm_buf[total_buffer*st->channels], frame_size*st->channels, st->arch);
1837 /* This should filter out both NaNs and ridiculous signals that could
1838 cause NaNs further down. */
1839 if (!(sum < 1e9f) || celt_isnan(sum))
1840 {
1841 OPUS_CLEAR(&pcm_buf[total_buffer*st->channels], frame_size*st->channels);
1842 st->hp_mem[0] = st->hp_mem[1] = st->hp_mem[2] = st->hp_mem[3] = 0;
1843 }
1844 }
1845 #else
1846 (void)float_api;
1847 #endif
1848
1849 #ifdef ENABLE_DRED
1850 if ( st->dred_duration > 0 && st->dred_encoder.loaded ) {
1851 int frame_size_400Hz;
1852 /* DRED Encoder */
1853 dred_compute_latents( &st->dred_encoder, &pcm_buf[total_buffer*st->channels], frame_size, total_buffer, st->arch );
1854 frame_size_400Hz = frame_size*400/st->Fs;
1855 OPUS_MOVE(&st->activity_mem[frame_size_400Hz], st->activity_mem, 4*DRED_MAX_FRAMES-frame_size_400Hz);
1856 for (i=0;i<frame_size_400Hz;i++)
1857 st->activity_mem[i] = activity;
1858 } else {
1859 st->dred_encoder.latents_buffer_fill = 0;
1860 OPUS_CLEAR(st->activity_mem, DRED_MAX_FRAMES);
1861 }
1862 #endif
1863
1864 /* SILK processing */
1865 HB_gain = Q15ONE;
1866 if (st->mode != MODE_CELT_ONLY)
1867 {
1868 opus_int32 total_bitRate, celt_rate;
1869 #ifdef FIXED_POINT
1870 const opus_int16 *pcm_silk;
1871 #else
1872 VARDECL(opus_int16, pcm_silk);
1873 ALLOC(pcm_silk, st->channels*frame_size, opus_int16);
1874 #endif
1875
1876 /* Distribute bits between SILK and CELT */
1877 total_bitRate = 8 * bytes_target * frame_rate;
1878 if( st->mode == MODE_HYBRID ) {
1879 /* Base rate for SILK */
1880 st->silk_mode.bitRate = compute_silk_rate_for_hybrid(total_bitRate,
1881 curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded,
1882 st->stream_channels);
1883 if (!st->energy_masking)
1884 {
1885 /* Increasingly attenuate high band when it gets allocated fewer bits */
1886 celt_rate = total_bitRate - st->silk_mode.bitRate;
1887 HB_gain = Q15ONE - SHR32(celt_exp2(-celt_rate * QCONST16(1.f/1024, 10)), 1);
1888 }
1889 } else {
1890 /* SILK gets all bits */
1891 st->silk_mode.bitRate = total_bitRate;
1892 }
1893
1894 /* Surround masking for SILK */
1895 if (st->energy_masking && st->use_vbr && !st->lfe)
1896 {
1897 opus_val32 mask_sum=0;
1898 opus_val16 masking_depth;
1899 opus_int32 rate_offset;
1900 int c;
1901 int end = 17;
1902 opus_int16 srate = 16000;
1903 if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
1904 {
1905 end = 13;
1906 srate = 8000;
1907 } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
1908 {
1909 end = 15;
1910 srate = 12000;
1911 }
1912 for (c=0;c<st->channels;c++)
1913 {
1914 for(i=0;i<end;i++)
1915 {
1916 opus_val16 mask;
1917 mask = MAX16(MIN16(st->energy_masking[21*c+i],
1918 QCONST16(.5f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT));
1919 if (mask > 0)
1920 mask = HALF16(mask);
1921 mask_sum += mask;
1922 }
1923 }
1924 /* Conservative rate reduction, we cut the masking in half */
1925 masking_depth = mask_sum / end*st->channels;
1926 masking_depth += QCONST16(.2f, DB_SHIFT);
1927 rate_offset = (opus_int32)PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT);
1928 rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3);
1929 /* Split the rate change between the SILK and CELT part for hybrid. */
1930 if (st->bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND || st->bandwidth==OPUS_BANDWIDTH_FULLBAND)
1931 st->silk_mode.bitRate += 3*rate_offset/5;
1932 else
1933 st->silk_mode.bitRate += rate_offset;
1934 }
1935
1936 st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
1937 st->silk_mode.nChannelsAPI = st->channels;
1938 st->silk_mode.nChannelsInternal = st->stream_channels;
1939 if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
1940 st->silk_mode.desiredInternalSampleRate = 8000;
1941 } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
1942 st->silk_mode.desiredInternalSampleRate = 12000;
1943 } else {
1944 celt_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
1945 st->silk_mode.desiredInternalSampleRate = 16000;
1946 }
1947 if( st->mode == MODE_HYBRID ) {
1948 /* Don't allow bandwidth reduction at lowest bitrates in hybrid mode */
1949 st->silk_mode.minInternalSampleRate = 16000;
1950 } else {
1951 st->silk_mode.minInternalSampleRate = 8000;
1952 }
1953
1954 st->silk_mode.maxInternalSampleRate = 16000;
1955 if (st->mode == MODE_SILK_ONLY)
1956 {
1957 opus_int32 effective_max_rate = frame_rate*max_data_bytes*8;
1958 if (frame_rate > 50)
1959 effective_max_rate = effective_max_rate*2/3;
1960 if (effective_max_rate < 8000)
1961 {
1962 st->silk_mode.maxInternalSampleRate = 12000;
1963 st->silk_mode.desiredInternalSampleRate = IMIN(12000, st->silk_mode.desiredInternalSampleRate);
1964 }
1965 if (effective_max_rate < 7000)
1966 {
1967 st->silk_mode.maxInternalSampleRate = 8000;
1968 st->silk_mode.desiredInternalSampleRate = IMIN(8000, st->silk_mode.desiredInternalSampleRate);
1969 }
1970 }
1971
1972 st->silk_mode.useCBR = !st->use_vbr;
1973
1974 /* Call SILK encoder for the low band */
1975
1976 /* Max bits for SILK, counting ToC, redundancy bytes, and optionally redundancy. */
1977 st->silk_mode.maxBits = (max_data_bytes-1)*8;
1978 if (redundancy && redundancy_bytes >= 2)
1979 {
1980 /* Counting 1 bit for redundancy position and 20 bits for flag+size (only for hybrid). */
1981 st->silk_mode.maxBits -= redundancy_bytes*8 + 1;
1982 if (st->mode == MODE_HYBRID)
1983 st->silk_mode.maxBits -= 20;
1984 }
1985 if (st->silk_mode.useCBR)
1986 {
1987 /* When we're in CBR mode, but we have non-SILK data to encode, switch SILK to VBR with cap to
1988 save on complexity. Any variations will be absorbed by CELT and/or DRED and we can still
1989 produce a constant bitrate without wasting bits. */
1990 #ifdef ENABLE_DRED
1991 if (st->mode == MODE_HYBRID || dred_bitrate_bps > 0)
1992 #else
1993 if (st->mode == MODE_HYBRID)
1994 #endif
1995 {
1996 /* Allow SILK to steal up to 25% of the remaining bits */
1997 opus_int16 other_bits = IMAX(0, st->silk_mode.maxBits - st->silk_mode.bitRate * frame_size / st->Fs);
1998 st->silk_mode.maxBits = IMAX(0, st->silk_mode.maxBits - other_bits*3/4);
1999 st->silk_mode.useCBR = 0;
2000 }
2001 } else {
2002 /* Constrained VBR. */
2003 if (st->mode == MODE_HYBRID)
2004 {
2005 /* Compute SILK bitrate corresponding to the max total bits available */
2006 opus_int32 maxBitRate = compute_silk_rate_for_hybrid(st->silk_mode.maxBits*st->Fs / frame_size,
2007 curr_bandwidth, st->Fs == 50 * frame_size, st->use_vbr, st->silk_mode.LBRR_coded,
2008 st->stream_channels);
2009 st->silk_mode.maxBits = maxBitRate * frame_size / st->Fs;
2010 }
2011 }
2012
2013 if (prefill)
2014 {
2015 opus_int32 zero=0;
2016 int prefill_offset;
2017 /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode
2018 a discontinuity. The exact location is what we need to avoid leaving any "gap"
2019 in the audio when mixing with the redundant CELT frame. Here we can afford to
2020 overwrite st->delay_buffer because the only thing that uses it before it gets
2021 rewritten is tmp_prefill[] and even then only the part after the ramp really
2022 gets used (rather than sent to the encoder and discarded) */
2023 prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
2024 gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
2025 0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
2026 OPUS_CLEAR(st->delay_buffer, prefill_offset);
2027 #ifdef FIXED_POINT
2028 pcm_silk = st->delay_buffer;
2029 #else
2030 for (i=0;i<st->encoder_buffer*st->channels;i++)
2031 pcm_silk[i] = FLOAT2INT16(st->delay_buffer[i]);
2032 #endif
2033 silk_Encode( silk_enc, &st->silk_mode, pcm_silk, st->encoder_buffer, NULL, &zero, prefill, activity );
2034 /* Prevent a second switch in the real encode call. */
2035 st->silk_mode.opusCanSwitch = 0;
2036 }
2037
2038 #ifdef FIXED_POINT
2039 pcm_silk = pcm_buf+total_buffer*st->channels;
2040 #else
2041 for (i=0;i<frame_size*st->channels;i++)
2042 pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
2043 #endif
2044 ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0, activity );
2045 if( ret ) {
2046 /*fprintf (stderr, "SILK encode error: %d\n", ret);*/
2047 /* Handle error */
2048 RESTORE_STACK;
2049 return OPUS_INTERNAL_ERROR;
2050 }
2051
2052 /* Extract SILK internal bandwidth for signaling in first byte */
2053 if( st->mode == MODE_SILK_ONLY ) {
2054 if( st->silk_mode.internalSampleRate == 8000 ) {
2055 curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
2056 } else if( st->silk_mode.internalSampleRate == 12000 ) {
2057 curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
2058 } else if( st->silk_mode.internalSampleRate == 16000 ) {
2059 curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
2060 }
2061 } else {
2062 celt_assert( st->silk_mode.internalSampleRate == 16000 );
2063 }
2064
2065 st->silk_mode.opusCanSwitch = st->silk_mode.switchReady && !st->nonfinal_frame;
2066
2067 if (nBytes==0)
2068 {
2069 st->rangeFinal = 0;
2070 data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
2071 RESTORE_STACK;
2072 return 1;
2073 }
2074
2075 /* FIXME: How do we allocate the redundancy for CBR? */
2076 if (st->silk_mode.opusCanSwitch)
2077 {
2078 redundancy_bytes = compute_redundancy_bytes(max_data_bytes, st->bitrate_bps, frame_rate, st->stream_channels);
2079 redundancy = (redundancy_bytes != 0);
2080 celt_to_silk = 0;
2081 st->silk_bw_switch = 1;
2082 }
2083 }
2084
2085 /* CELT processing */
2086 {
2087 int endband=21;
2088
2089 switch(curr_bandwidth)
2090 {
2091 case OPUS_BANDWIDTH_NARROWBAND:
2092 endband = 13;
2093 break;
2094 case OPUS_BANDWIDTH_MEDIUMBAND:
2095 case OPUS_BANDWIDTH_WIDEBAND:
2096 endband = 17;
2097 break;
2098 case OPUS_BANDWIDTH_SUPERWIDEBAND:
2099 endband = 19;
2100 break;
2101 case OPUS_BANDWIDTH_FULLBAND:
2102 endband = 21;
2103 break;
2104 }
2105 celt_encoder_ctl(celt_enc, CELT_SET_END_BAND(endband));
2106 celt_encoder_ctl(celt_enc, CELT_SET_CHANNELS(st->stream_channels));
2107 }
2108 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
2109 if (st->mode != MODE_SILK_ONLY)
2110 {
2111 opus_val32 celt_pred=2;
2112 /* We may still decide to disable prediction later */
2113 if (st->silk_mode.reducedDependency)
2114 celt_pred = 0;
2115 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(celt_pred));
2116 }
2117
2118 ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
2119 if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
2120 {
2121 OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400);
2122 }
2123
2124 if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0)
2125 {
2126 OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer));
2127 OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)],
2128 &pcm_buf[0],
2129 (frame_size+total_buffer)*st->channels);
2130 } else {
2131 OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels);
2132 }
2133 /* gain_fade() and stereo_fade() need to be after the buffer copying
2134 because we don't want any of this to affect the SILK part */
2135 if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {
2136 gain_fade(pcm_buf, pcm_buf,
2137 st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs);
2138 }
2139 st->prev_HB_gain = HB_gain;
2140 if (st->mode != MODE_HYBRID || st->stream_channels==1)
2141 {
2142 if (equiv_rate > 32000)
2143 st->silk_mode.stereoWidth_Q14 = 16384;
2144 else if (equiv_rate < 16000)
2145 st->silk_mode.stereoWidth_Q14 = 0;
2146 else
2147 st->silk_mode.stereoWidth_Q14 = 16384 - 2048*(opus_int32)(32000-equiv_rate)/(equiv_rate-14000);
2148 }
2149 if( !st->energy_masking && st->channels == 2 ) {
2150 /* Apply stereo width reduction (at low bitrates) */
2151 if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
2152 opus_val16 g1, g2;
2153 g1 = st->hybrid_stereo_width_Q14;
2154 g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14);
2155 #ifdef FIXED_POINT
2156 g1 = g1==16384 ? Q15ONE : SHL16(g1,1);
2157 g2 = g2==16384 ? Q15ONE : SHL16(g2,1);
2158 #else
2159 g1 *= (1.f/16384);
2160 g2 *= (1.f/16384);
2161 #endif
2162 stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
2163 frame_size, st->channels, celt_mode->window, st->Fs);
2164 st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
2165 }
2166 }
2167
2168 if ( st->mode != MODE_CELT_ONLY && ec_tell(&enc)+17+20*(st->mode == MODE_HYBRID) <= 8*(max_data_bytes-1))
2169 {
2170 /* For SILK mode, the redundancy is inferred from the length */
2171 if (st->mode == MODE_HYBRID)
2172 ec_enc_bit_logp(&enc, redundancy, 12);
2173 if (redundancy)
2174 {
2175 int max_redundancy;
2176 ec_enc_bit_logp(&enc, celt_to_silk, 1);
2177 if (st->mode == MODE_HYBRID)
2178 {
2179 /* Reserve the 8 bits needed for the redundancy length,
2180 and at least a few bits for CELT if possible */
2181 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+8+3+7)>>3);
2182 }
2183 else
2184 max_redundancy = (max_data_bytes-1)-((ec_tell(&enc)+7)>>3);
2185 /* Target the same bit-rate for redundancy as for the rest,
2186 up to a max of 257 bytes */
2187 redundancy_bytes = IMIN(max_redundancy, redundancy_bytes);
2188 redundancy_bytes = IMIN(257, IMAX(2, redundancy_bytes));
2189 if (st->mode == MODE_HYBRID)
2190 ec_enc_uint(&enc, redundancy_bytes-2, 256);
2191 }
2192 } else {
2193 redundancy = 0;
2194 }
2195
2196 if (!redundancy)
2197 {
2198 st->silk_bw_switch = 0;
2199 redundancy_bytes = 0;
2200 }
2201 if (st->mode != MODE_CELT_ONLY)start_band=17;
2202
2203 if (st->mode == MODE_SILK_ONLY)
2204 {
2205 ret = (ec_tell(&enc)+7)>>3;
2206 ec_enc_done(&enc);
2207 nb_compr_bytes = ret;
2208 } else {
2209 nb_compr_bytes = (max_data_bytes-1)-redundancy_bytes;
2210 #ifdef ENABLE_DRED
2211 if (st->dred_duration > 0)
2212 {
2213 int max_celt_bytes;
2214 opus_int32 dred_bytes = dred_bitrate_bps/(frame_rate*8);
2215 /* Allow CELT to steal up to 25% of the remaining bits. */
2216 max_celt_bytes = nb_compr_bytes - dred_bytes*3/4;
2217 /* But try to give CELT at least 5 bytes to prevent a mismatch with
2218 the redundancy signaling. */
2219 max_celt_bytes = IMAX((ec_tell(&enc)+7)/8 + 5, max_celt_bytes);
2220 /* Subject to the original max. */
2221 nb_compr_bytes = IMIN(nb_compr_bytes, max_celt_bytes);
2222 }
2223 #endif
2224 ec_enc_shrink(&enc, nb_compr_bytes);
2225 }
2226
2227 #ifndef DISABLE_FLOAT_API
2228 if (redundancy || st->mode != MODE_SILK_ONLY)
2229 celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info));
2230 #endif
2231 if (st->mode == MODE_HYBRID) {
2232 SILKInfo info;
2233 info.signalType = st->silk_mode.signalType;
2234 info.offset = st->silk_mode.offset;
2235 celt_encoder_ctl(celt_enc, CELT_SET_SILK_INFO(&info));
2236 }
2237
2238 /* 5 ms redundant frame for CELT->SILK */
2239 if (redundancy && celt_to_silk)
2240 {
2241 int err;
2242 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
2243 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
2244 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
2245 err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
2246 if (err < 0)
2247 {
2248 RESTORE_STACK;
2249 return OPUS_INTERNAL_ERROR;
2250 }
2251 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
2252 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
2253 }
2254
2255 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(start_band));
2256
2257 if (st->mode != MODE_SILK_ONLY)
2258 {
2259 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(st->use_vbr));
2260 if (st->mode == MODE_HYBRID)
2261 {
2262 if( st->use_vbr ) {
2263 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps-st->silk_mode.bitRate));
2264 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
2265 }
2266 } else {
2267 if (st->use_vbr)
2268 {
2269 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
2270 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(st->vbr_constraint));
2271 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(st->bitrate_bps));
2272 }
2273 }
2274 #ifdef ENABLE_DRED
2275 /* When Using DRED CBR, we can actually make the CELT part VBR and have DRED pick up the slack. */
2276 if (!st->use_vbr && st->dred_duration > 0)
2277 {
2278 opus_int32 celt_bitrate = st->bitrate_bps;
2279 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1));
2280 celt_encoder_ctl(celt_enc, OPUS_SET_VBR_CONSTRAINT(0));
2281 if (st->mode == MODE_HYBRID) {
2282 celt_bitrate -= st->silk_mode.bitRate;
2283 }
2284 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(celt_bitrate));
2285 }
2286 #endif
2287 if (st->mode != st->prev_mode && st->prev_mode > 0)
2288 {
2289 unsigned char dummy[2];
2290 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
2291
2292 /* Prefilling */
2293 celt_encode_with_ec(celt_enc, tmp_prefill, st->Fs/400, dummy, 2, NULL);
2294 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
2295 }
2296 /* If false, we already busted the budget and we'll end up with a "PLC frame" */
2297 if (ec_tell(&enc) <= 8*nb_compr_bytes)
2298 {
2299 ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
2300 if (ret < 0)
2301 {
2302 RESTORE_STACK;
2303 return OPUS_INTERNAL_ERROR;
2304 }
2305 /* Put CELT->SILK redundancy data in the right place. */
2306 if (redundancy && celt_to_silk && st->mode==MODE_HYBRID && nb_compr_bytes != ret)
2307 {
2308 OPUS_MOVE(data+ret, data+nb_compr_bytes, redundancy_bytes);
2309 nb_compr_bytes = ret+redundancy_bytes;
2310 }
2311 }
2312 }
2313
2314 /* 5 ms redundant frame for SILK->CELT */
2315 if (redundancy && !celt_to_silk)
2316 {
2317 int err;
2318 unsigned char dummy[2];
2319 int N2, N4;
2320 N2 = st->Fs/200;
2321 N4 = st->Fs/400;
2322
2323 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
2324 celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
2325 celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
2326 celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
2327 celt_encoder_ctl(celt_enc, OPUS_SET_BITRATE(OPUS_BITRATE_MAX));
2328
2329 if (st->mode == MODE_HYBRID)
2330 {
2331 /* Shrink packet to what the encoder actually used. */
2332 nb_compr_bytes = ret;
2333 ec_enc_shrink(&enc, nb_compr_bytes);
2334 }
2335 /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
2336 celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
2337
2338 err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
2339 if (err < 0)
2340 {
2341 RESTORE_STACK;
2342 return OPUS_INTERNAL_ERROR;
2343 }
2344 celt_encoder_ctl(celt_enc, OPUS_GET_FINAL_RANGE(&redundant_rng));
2345 }
2346
2347
2348
2349 /* Signalling the mode in the first byte */
2350 data--;
2351 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
2352
2353 st->rangeFinal = enc.rng ^ redundant_rng;
2354
2355 if (to_celt)
2356 st->prev_mode = MODE_CELT_ONLY;
2357 else
2358 st->prev_mode = st->mode;
2359 st->prev_channels = st->stream_channels;
2360 st->prev_framesize = frame_size;
2361
2362 st->first = 0;
2363
2364 /* DTX decision */
2365 #ifndef DISABLE_FLOAT_API
2366 if (st->use_dtx && (analysis_info->valid || is_silence))
2367 {
2368 if (decide_dtx_mode(activity, &st->nb_no_activity_ms_Q1, 2*1000*frame_size/st->Fs))
2369 {
2370 st->rangeFinal = 0;
2371 data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
2372 RESTORE_STACK;
2373 return 1;
2374 }
2375 } else {
2376 st->nb_no_activity_ms_Q1 = 0;
2377 }
2378 #endif
2379
2380 /* In the unlikely case that the SILK encoder busted its target, tell
2381 the decoder to call the PLC */
2382 if (ec_tell(&enc) > (max_data_bytes-1)*8)
2383 {
2384 if (max_data_bytes < 2)
2385 {
2386 RESTORE_STACK;
2387 return OPUS_BUFFER_TOO_SMALL;
2388 }
2389 data[1] = 0;
2390 ret = 1;
2391 st->rangeFinal = 0;
2392 } else if (st->mode==MODE_SILK_ONLY&&!redundancy)
2393 {
2394 /*When in LPC only mode it's perfectly
2395 reasonable to strip off trailing zero bytes as
2396 the required range decoder behavior is to
2397 fill these in. This can't be done when the MDCT
2398 modes are used because the decoder needs to know
2399 the actual length for allocation purposes.*/
2400 while(ret>2&&data[ret]==0)ret--;
2401 }
2402 /* Count ToC and redundancy */
2403 ret += 1+redundancy_bytes;
2404 apply_padding = !st->use_vbr;
2405 #ifdef ENABLE_DRED
2406 if (st->dred_duration > 0 && st->dred_encoder.loaded && first_frame) {
2407 opus_extension_data extension;
2408 unsigned char buf[DRED_MAX_DATA_SIZE];
2409 int dred_chunks;
2410 int dred_bytes_left;
2411 dred_chunks = IMIN((st->dred_duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2);
2412 if (st->use_vbr) dred_chunks = IMIN(dred_chunks, st->dred_target_chunks);
2413 /* Remaining space for DRED, accounting for cost the 3 extra bytes for code 3, padding length, and extension number. */
2414 dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-3);
2415 /* Account for the extra bytes required to signal large padding length. */
2416 dred_bytes_left -= (dred_bytes_left+1+DRED_EXPERIMENTAL_BYTES)/255;
2417 /* Check whether we actually have something to encode. */
2418 if (dred_chunks >= 1 && dred_bytes_left >= DRED_MIN_BYTES+DRED_EXPERIMENTAL_BYTES) {
2419 int dred_bytes;
2420 #ifdef DRED_EXPERIMENTAL_VERSION
2421 /* Add temporary extension type and version.
2422 These bytes will be removed once extension is finalized. */
2423 buf[0] = 'D';
2424 buf[1] = DRED_EXPERIMENTAL_VERSION;
2425 #endif
2426 dred_bytes = dred_encode_silk_frame(&st->dred_encoder, buf+DRED_EXPERIMENTAL_BYTES, dred_chunks, dred_bytes_left-DRED_EXPERIMENTAL_BYTES,
2427 st->dred_q0, st->dred_dQ, st->dred_qmax, st->activity_mem, st->arch);
2428 if (dred_bytes > 0) {
2429 dred_bytes += DRED_EXPERIMENTAL_BYTES;
2430 celt_assert(dred_bytes <= dred_bytes_left);
2431 extension.id = DRED_EXTENSION_ID;
2432 extension.frame = 0;
2433 extension.data = buf;
2434 extension.len = dred_bytes;
2435 ret = opus_packet_pad_impl(data, ret, max_data_bytes, !st->use_vbr, &extension, 1);
2436 if (ret < 0)
2437 {
2438 RESTORE_STACK;
2439 return OPUS_INTERNAL_ERROR;
2440 }
2441 apply_padding = 0;
2442 }
2443 }
2444 }
2445 #else
2446 (void)first_frame; /* Avoids a warning about first_frame being unused. */
2447 #endif
2448 if (apply_padding)
2449 {
2450 if (opus_packet_pad(data, ret, max_data_bytes) != OPUS_OK)
2451 {
2452 RESTORE_STACK;
2453 return OPUS_INTERNAL_ERROR;
2454 }
2455 ret = max_data_bytes;
2456 }
2457 RESTORE_STACK;
2458 return ret;
2459 }
2460
2461 #ifdef FIXED_POINT
2462
2463 #ifndef DISABLE_FLOAT_API
opus_encode_float(OpusEncoder * st,const float * pcm,int analysis_frame_size,unsigned char * data,opus_int32 max_data_bytes)2464 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
2465 unsigned char *data, opus_int32 max_data_bytes)
2466 {
2467 int i, ret;
2468 int frame_size;
2469 VARDECL(opus_int16, in);
2470 ALLOC_STACK;
2471
2472 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
2473 if (frame_size <= 0)
2474 {
2475 RESTORE_STACK;
2476 return OPUS_BAD_ARG;
2477 }
2478 ALLOC(in, frame_size*st->channels, opus_int16);
2479
2480 for (i=0;i<frame_size*st->channels;i++)
2481 in[i] = FLOAT2INT16(pcm[i]);
2482 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
2483 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
2484 RESTORE_STACK;
2485 return ret;
2486 }
2487 #endif
2488
opus_encode(OpusEncoder * st,const opus_int16 * pcm,int analysis_frame_size,unsigned char * data,opus_int32 out_data_bytes)2489 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
2490 unsigned char *data, opus_int32 out_data_bytes)
2491 {
2492 int frame_size;
2493 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
2494 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16,
2495 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
2496 }
2497
2498 #else
opus_encode(OpusEncoder * st,const opus_int16 * pcm,int analysis_frame_size,unsigned char * data,opus_int32 max_data_bytes)2499 opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size,
2500 unsigned char *data, opus_int32 max_data_bytes)
2501 {
2502 int i, ret;
2503 int frame_size;
2504 VARDECL(float, in);
2505 ALLOC_STACK;
2506
2507 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
2508 if (frame_size <= 0)
2509 {
2510 RESTORE_STACK;
2511 return OPUS_BAD_ARG;
2512 }
2513 ALLOC(in, frame_size*st->channels, float);
2514
2515 for (i=0;i<frame_size*st->channels;i++)
2516 in[i] = (1.0f/32768)*pcm[i];
2517 ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16,
2518 pcm, analysis_frame_size, 0, -2, st->channels, downmix_int, 0);
2519 RESTORE_STACK;
2520 return ret;
2521 }
opus_encode_float(OpusEncoder * st,const float * pcm,int analysis_frame_size,unsigned char * data,opus_int32 out_data_bytes)2522 opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size,
2523 unsigned char *data, opus_int32 out_data_bytes)
2524 {
2525 int frame_size;
2526 frame_size = frame_size_select(analysis_frame_size, st->variable_duration, st->Fs);
2527 return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24,
2528 pcm, analysis_frame_size, 0, -2, st->channels, downmix_float, 1);
2529 }
2530 #endif
2531
2532
opus_encoder_ctl(OpusEncoder * st,int request,...)2533 int opus_encoder_ctl(OpusEncoder *st, int request, ...)
2534 {
2535 int ret;
2536 CELTEncoder *celt_enc;
2537 va_list ap;
2538
2539 ret = OPUS_OK;
2540 va_start(ap, request);
2541
2542 celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
2543
2544 switch (request)
2545 {
2546 case OPUS_SET_APPLICATION_REQUEST:
2547 {
2548 opus_int32 value = va_arg(ap, opus_int32);
2549 if ( (value != OPUS_APPLICATION_VOIP && value != OPUS_APPLICATION_AUDIO
2550 && value != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2551 || (!st->first && st->application != value))
2552 {
2553 ret = OPUS_BAD_ARG;
2554 break;
2555 }
2556 st->application = value;
2557 #ifndef DISABLE_FLOAT_API
2558 st->analysis.application = value;
2559 #endif
2560 }
2561 break;
2562 case OPUS_GET_APPLICATION_REQUEST:
2563 {
2564 opus_int32 *value = va_arg(ap, opus_int32*);
2565 if (!value)
2566 {
2567 goto bad_arg;
2568 }
2569 *value = st->application;
2570 }
2571 break;
2572 case OPUS_SET_BITRATE_REQUEST:
2573 {
2574 opus_int32 value = va_arg(ap, opus_int32);
2575 if (value != OPUS_AUTO && value != OPUS_BITRATE_MAX)
2576 {
2577 if (value <= 0)
2578 goto bad_arg;
2579 else if (value <= 500)
2580 value = 500;
2581 else if (value > (opus_int32)300000*st->channels)
2582 value = (opus_int32)300000*st->channels;
2583 }
2584 st->user_bitrate_bps = value;
2585 }
2586 break;
2587 case OPUS_GET_BITRATE_REQUEST:
2588 {
2589 opus_int32 *value = va_arg(ap, opus_int32*);
2590 if (!value)
2591 {
2592 goto bad_arg;
2593 }
2594 *value = user_bitrate_to_bitrate(st, st->prev_framesize, 1276);
2595 }
2596 break;
2597 case OPUS_SET_FORCE_CHANNELS_REQUEST:
2598 {
2599 opus_int32 value = va_arg(ap, opus_int32);
2600 if((value<1 || value>st->channels) && value != OPUS_AUTO)
2601 {
2602 goto bad_arg;
2603 }
2604 st->force_channels = value;
2605 }
2606 break;
2607 case OPUS_GET_FORCE_CHANNELS_REQUEST:
2608 {
2609 opus_int32 *value = va_arg(ap, opus_int32*);
2610 if (!value)
2611 {
2612 goto bad_arg;
2613 }
2614 *value = st->force_channels;
2615 }
2616 break;
2617 case OPUS_SET_MAX_BANDWIDTH_REQUEST:
2618 {
2619 opus_int32 value = va_arg(ap, opus_int32);
2620 if (value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND)
2621 {
2622 goto bad_arg;
2623 }
2624 st->max_bandwidth = value;
2625 if (st->max_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
2626 st->silk_mode.maxInternalSampleRate = 8000;
2627 } else if (st->max_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
2628 st->silk_mode.maxInternalSampleRate = 12000;
2629 } else {
2630 st->silk_mode.maxInternalSampleRate = 16000;
2631 }
2632 }
2633 break;
2634 case OPUS_GET_MAX_BANDWIDTH_REQUEST:
2635 {
2636 opus_int32 *value = va_arg(ap, opus_int32*);
2637 if (!value)
2638 {
2639 goto bad_arg;
2640 }
2641 *value = st->max_bandwidth;
2642 }
2643 break;
2644 case OPUS_SET_BANDWIDTH_REQUEST:
2645 {
2646 opus_int32 value = va_arg(ap, opus_int32);
2647 if ((value < OPUS_BANDWIDTH_NARROWBAND || value > OPUS_BANDWIDTH_FULLBAND) && value != OPUS_AUTO)
2648 {
2649 goto bad_arg;
2650 }
2651 st->user_bandwidth = value;
2652 if (st->user_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
2653 st->silk_mode.maxInternalSampleRate = 8000;
2654 } else if (st->user_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
2655 st->silk_mode.maxInternalSampleRate = 12000;
2656 } else {
2657 st->silk_mode.maxInternalSampleRate = 16000;
2658 }
2659 }
2660 break;
2661 case OPUS_GET_BANDWIDTH_REQUEST:
2662 {
2663 opus_int32 *value = va_arg(ap, opus_int32*);
2664 if (!value)
2665 {
2666 goto bad_arg;
2667 }
2668 *value = st->bandwidth;
2669 }
2670 break;
2671 case OPUS_SET_DTX_REQUEST:
2672 {
2673 opus_int32 value = va_arg(ap, opus_int32);
2674 if(value<0 || value>1)
2675 {
2676 goto bad_arg;
2677 }
2678 st->use_dtx = value;
2679 }
2680 break;
2681 case OPUS_GET_DTX_REQUEST:
2682 {
2683 opus_int32 *value = va_arg(ap, opus_int32*);
2684 if (!value)
2685 {
2686 goto bad_arg;
2687 }
2688 *value = st->use_dtx;
2689 }
2690 break;
2691 case OPUS_SET_COMPLEXITY_REQUEST:
2692 {
2693 opus_int32 value = va_arg(ap, opus_int32);
2694 if(value<0 || value>10)
2695 {
2696 goto bad_arg;
2697 }
2698 st->silk_mode.complexity = value;
2699 celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(value));
2700 }
2701 break;
2702 case OPUS_GET_COMPLEXITY_REQUEST:
2703 {
2704 opus_int32 *value = va_arg(ap, opus_int32*);
2705 if (!value)
2706 {
2707 goto bad_arg;
2708 }
2709 *value = st->silk_mode.complexity;
2710 }
2711 break;
2712 case OPUS_SET_INBAND_FEC_REQUEST:
2713 {
2714 opus_int32 value = va_arg(ap, opus_int32);
2715 if(value<0 || value>2)
2716 {
2717 goto bad_arg;
2718 }
2719 st->fec_config = value;
2720 st->silk_mode.useInBandFEC = (value != 0);
2721 }
2722 break;
2723 case OPUS_GET_INBAND_FEC_REQUEST:
2724 {
2725 opus_int32 *value = va_arg(ap, opus_int32*);
2726 if (!value)
2727 {
2728 goto bad_arg;
2729 }
2730 *value = st->fec_config;
2731 }
2732 break;
2733 case OPUS_SET_PACKET_LOSS_PERC_REQUEST:
2734 {
2735 opus_int32 value = va_arg(ap, opus_int32);
2736 if (value < 0 || value > 100)
2737 {
2738 goto bad_arg;
2739 }
2740 st->silk_mode.packetLossPercentage = value;
2741 celt_encoder_ctl(celt_enc, OPUS_SET_PACKET_LOSS_PERC(value));
2742 }
2743 break;
2744 case OPUS_GET_PACKET_LOSS_PERC_REQUEST:
2745 {
2746 opus_int32 *value = va_arg(ap, opus_int32*);
2747 if (!value)
2748 {
2749 goto bad_arg;
2750 }
2751 *value = st->silk_mode.packetLossPercentage;
2752 }
2753 break;
2754 case OPUS_SET_VBR_REQUEST:
2755 {
2756 opus_int32 value = va_arg(ap, opus_int32);
2757 if(value<0 || value>1)
2758 {
2759 goto bad_arg;
2760 }
2761 st->use_vbr = value;
2762 st->silk_mode.useCBR = 1-value;
2763 }
2764 break;
2765 case OPUS_GET_VBR_REQUEST:
2766 {
2767 opus_int32 *value = va_arg(ap, opus_int32*);
2768 if (!value)
2769 {
2770 goto bad_arg;
2771 }
2772 *value = st->use_vbr;
2773 }
2774 break;
2775 case OPUS_SET_VOICE_RATIO_REQUEST:
2776 {
2777 opus_int32 value = va_arg(ap, opus_int32);
2778 if (value<-1 || value>100)
2779 {
2780 goto bad_arg;
2781 }
2782 st->voice_ratio = value;
2783 }
2784 break;
2785 case OPUS_GET_VOICE_RATIO_REQUEST:
2786 {
2787 opus_int32 *value = va_arg(ap, opus_int32*);
2788 if (!value)
2789 {
2790 goto bad_arg;
2791 }
2792 *value = st->voice_ratio;
2793 }
2794 break;
2795 case OPUS_SET_VBR_CONSTRAINT_REQUEST:
2796 {
2797 opus_int32 value = va_arg(ap, opus_int32);
2798 if(value<0 || value>1)
2799 {
2800 goto bad_arg;
2801 }
2802 st->vbr_constraint = value;
2803 }
2804 break;
2805 case OPUS_GET_VBR_CONSTRAINT_REQUEST:
2806 {
2807 opus_int32 *value = va_arg(ap, opus_int32*);
2808 if (!value)
2809 {
2810 goto bad_arg;
2811 }
2812 *value = st->vbr_constraint;
2813 }
2814 break;
2815 case OPUS_SET_SIGNAL_REQUEST:
2816 {
2817 opus_int32 value = va_arg(ap, opus_int32);
2818 if(value!=OPUS_AUTO && value!=OPUS_SIGNAL_VOICE && value!=OPUS_SIGNAL_MUSIC)
2819 {
2820 goto bad_arg;
2821 }
2822 st->signal_type = value;
2823 }
2824 break;
2825 case OPUS_GET_SIGNAL_REQUEST:
2826 {
2827 opus_int32 *value = va_arg(ap, opus_int32*);
2828 if (!value)
2829 {
2830 goto bad_arg;
2831 }
2832 *value = st->signal_type;
2833 }
2834 break;
2835 case OPUS_GET_LOOKAHEAD_REQUEST:
2836 {
2837 opus_int32 *value = va_arg(ap, opus_int32*);
2838 if (!value)
2839 {
2840 goto bad_arg;
2841 }
2842 *value = st->Fs/400;
2843 if (st->application != OPUS_APPLICATION_RESTRICTED_LOWDELAY)
2844 *value += st->delay_compensation;
2845 }
2846 break;
2847 case OPUS_GET_SAMPLE_RATE_REQUEST:
2848 {
2849 opus_int32 *value = va_arg(ap, opus_int32*);
2850 if (!value)
2851 {
2852 goto bad_arg;
2853 }
2854 *value = st->Fs;
2855 }
2856 break;
2857 case OPUS_GET_FINAL_RANGE_REQUEST:
2858 {
2859 opus_uint32 *value = va_arg(ap, opus_uint32*);
2860 if (!value)
2861 {
2862 goto bad_arg;
2863 }
2864 *value = st->rangeFinal;
2865 }
2866 break;
2867 case OPUS_SET_LSB_DEPTH_REQUEST:
2868 {
2869 opus_int32 value = va_arg(ap, opus_int32);
2870 if (value<8 || value>24)
2871 {
2872 goto bad_arg;
2873 }
2874 st->lsb_depth=value;
2875 }
2876 break;
2877 case OPUS_GET_LSB_DEPTH_REQUEST:
2878 {
2879 opus_int32 *value = va_arg(ap, opus_int32*);
2880 if (!value)
2881 {
2882 goto bad_arg;
2883 }
2884 *value = st->lsb_depth;
2885 }
2886 break;
2887 case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST:
2888 {
2889 opus_int32 value = va_arg(ap, opus_int32);
2890 if (value != OPUS_FRAMESIZE_ARG && value != OPUS_FRAMESIZE_2_5_MS &&
2891 value != OPUS_FRAMESIZE_5_MS && value != OPUS_FRAMESIZE_10_MS &&
2892 value != OPUS_FRAMESIZE_20_MS && value != OPUS_FRAMESIZE_40_MS &&
2893 value != OPUS_FRAMESIZE_60_MS && value != OPUS_FRAMESIZE_80_MS &&
2894 value != OPUS_FRAMESIZE_100_MS && value != OPUS_FRAMESIZE_120_MS)
2895 {
2896 goto bad_arg;
2897 }
2898 st->variable_duration = value;
2899 }
2900 break;
2901 case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST:
2902 {
2903 opus_int32 *value = va_arg(ap, opus_int32*);
2904 if (!value)
2905 {
2906 goto bad_arg;
2907 }
2908 *value = st->variable_duration;
2909 }
2910 break;
2911 case OPUS_SET_PREDICTION_DISABLED_REQUEST:
2912 {
2913 opus_int32 value = va_arg(ap, opus_int32);
2914 if (value > 1 || value < 0)
2915 goto bad_arg;
2916 st->silk_mode.reducedDependency = value;
2917 }
2918 break;
2919 case OPUS_GET_PREDICTION_DISABLED_REQUEST:
2920 {
2921 opus_int32 *value = va_arg(ap, opus_int32*);
2922 if (!value)
2923 goto bad_arg;
2924 *value = st->silk_mode.reducedDependency;
2925 }
2926 break;
2927 case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST:
2928 {
2929 opus_int32 value = va_arg(ap, opus_int32);
2930 if(value<0 || value>1)
2931 {
2932 goto bad_arg;
2933 }
2934 celt_encoder_ctl(celt_enc, OPUS_SET_PHASE_INVERSION_DISABLED(value));
2935 }
2936 break;
2937 case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST:
2938 {
2939 opus_int32 *value = va_arg(ap, opus_int32*);
2940 if (!value)
2941 {
2942 goto bad_arg;
2943 }
2944 celt_encoder_ctl(celt_enc, OPUS_GET_PHASE_INVERSION_DISABLED(value));
2945 }
2946 break;
2947 #ifdef ENABLE_DRED
2948 case OPUS_SET_DRED_DURATION_REQUEST:
2949 {
2950 opus_int32 value = va_arg(ap, opus_int32);
2951 if(value<0 || value>DRED_MAX_FRAMES)
2952 {
2953 goto bad_arg;
2954 }
2955 st->dred_duration = value;
2956 st->silk_mode.useDRED = !!value;
2957 }
2958 break;
2959 case OPUS_GET_DRED_DURATION_REQUEST:
2960 {
2961 opus_int32 *value = va_arg(ap, opus_int32*);
2962 if (!value)
2963 {
2964 goto bad_arg;
2965 }
2966 *value = st->dred_duration;
2967 }
2968 break;
2969 #endif
2970 case OPUS_RESET_STATE:
2971 {
2972 void *silk_enc;
2973 silk_EncControlStruct dummy;
2974 char *start;
2975 silk_enc = (char*)st+st->silk_enc_offset;
2976 #ifndef DISABLE_FLOAT_API
2977 tonality_analysis_reset(&st->analysis);
2978 #endif
2979
2980 start = (char*)&st->OPUS_ENCODER_RESET_START;
2981 OPUS_CLEAR(start, sizeof(OpusEncoder) - (start - (char*)st));
2982
2983 celt_encoder_ctl(celt_enc, OPUS_RESET_STATE);
2984 silk_InitEncoder( silk_enc, st->arch, &dummy );
2985 #ifdef ENABLE_DRED
2986 /* Initialize DRED Encoder */
2987 dred_encoder_reset( &st->dred_encoder );
2988 #endif
2989 st->stream_channels = st->channels;
2990 st->hybrid_stereo_width_Q14 = 1 << 14;
2991 st->prev_HB_gain = Q15ONE;
2992 st->first = 1;
2993 st->mode = MODE_HYBRID;
2994 st->bandwidth = OPUS_BANDWIDTH_FULLBAND;
2995 st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
2996 }
2997 break;
2998 case OPUS_SET_FORCE_MODE_REQUEST:
2999 {
3000 opus_int32 value = va_arg(ap, opus_int32);
3001 if ((value < MODE_SILK_ONLY || value > MODE_CELT_ONLY) && value != OPUS_AUTO)
3002 {
3003 goto bad_arg;
3004 }
3005 st->user_forced_mode = value;
3006 }
3007 break;
3008 case OPUS_SET_LFE_REQUEST:
3009 {
3010 opus_int32 value = va_arg(ap, opus_int32);
3011 st->lfe = value;
3012 ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
3013 }
3014 break;
3015 case OPUS_SET_ENERGY_MASK_REQUEST:
3016 {
3017 opus_val16 *value = va_arg(ap, opus_val16*);
3018 st->energy_masking = value;
3019 ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
3020 }
3021 break;
3022 case OPUS_GET_IN_DTX_REQUEST:
3023 {
3024 opus_int32 *value = va_arg(ap, opus_int32*);
3025 if (!value)
3026 {
3027 goto bad_arg;
3028 }
3029 if (st->silk_mode.useDTX && (st->prev_mode == MODE_SILK_ONLY || st->prev_mode == MODE_HYBRID)) {
3030 /* DTX determined by Silk. */
3031 silk_encoder *silk_enc = (silk_encoder*)(void *)((char*)st+st->silk_enc_offset);
3032 *value = silk_enc->state_Fxx[0].sCmn.noSpeechCounter >= NB_SPEECH_FRAMES_BEFORE_DTX;
3033 /* Stereo: check second channel unless only the middle channel was encoded. */
3034 if(*value == 1 && st->silk_mode.nChannelsInternal == 2 && silk_enc->prev_decode_only_middle == 0) {
3035 *value = silk_enc->state_Fxx[1].sCmn.noSpeechCounter >= NB_SPEECH_FRAMES_BEFORE_DTX;
3036 }
3037 }
3038 #ifndef DISABLE_FLOAT_API
3039 else if (st->use_dtx) {
3040 /* DTX determined by Opus. */
3041 *value = st->nb_no_activity_ms_Q1 >= NB_SPEECH_FRAMES_BEFORE_DTX*20*2;
3042 }
3043 #endif
3044 else {
3045 *value = 0;
3046 }
3047 }
3048 break;
3049 #ifdef USE_WEIGHTS_FILE
3050 case OPUS_SET_DNN_BLOB_REQUEST:
3051 {
3052 const unsigned char *data = va_arg(ap, const unsigned char *);
3053 opus_int32 len = va_arg(ap, opus_int32);
3054 if(len<0 || data == NULL)
3055 {
3056 goto bad_arg;
3057 }
3058 #ifdef ENABLE_DRED
3059 ret = dred_encoder_load_model(&st->dred_encoder, data, len);
3060 #endif
3061 }
3062 break;
3063 #endif
3064 case CELT_GET_MODE_REQUEST:
3065 {
3066 const CELTMode ** value = va_arg(ap, const CELTMode**);
3067 if (!value)
3068 {
3069 goto bad_arg;
3070 }
3071 ret = celt_encoder_ctl(celt_enc, CELT_GET_MODE(value));
3072 }
3073 break;
3074 default:
3075 /* fprintf(stderr, "unknown opus_encoder_ctl() request: %d", request);*/
3076 ret = OPUS_UNIMPLEMENTED;
3077 break;
3078 }
3079 va_end(ap);
3080 return ret;
3081 bad_arg:
3082 va_end(ap);
3083 return OPUS_BAD_ARG;
3084 }
3085
opus_encoder_destroy(OpusEncoder * st)3086 void opus_encoder_destroy(OpusEncoder *st)
3087 {
3088 opus_free(st);
3089 }
3090