1 /***********************************************************************
2 Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 - Redistributions of source code must retain the above copyright notice,
7 this list of conditions and the following disclaimer.
8 - Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 - Neither the name of Internet Society, IETF or IETF Trust, nor the
12 names of specific contributors, may be used to endorse or promote
13 products derived from this software without specific prior written
14 permission.
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 POSSIBILITY OF SUCH DAMAGE.
26 ***********************************************************************/
27
28 #ifdef HAVE_CONFIG_H
29 #include "config.h"
30 #endif
31 #include "define.h"
32 #include "API.h"
33 #include "control.h"
34 #include "typedef.h"
35 #include "stack_alloc.h"
36 #include "structs.h"
37 #include "tuning_parameters.h"
38 #ifdef FIXED_POINT
39 #include "main_FIX.h"
40 #else
41 #include "main_FLP.h"
42 #endif
43
44 #ifdef ENABLE_DRED
45 #include "dred_encoder.h"
46 #endif
47
48 /***************************************/
49 /* Read control structure from encoder */
50 /***************************************/
51 static opus_int silk_QueryEncoder( /* O Returns error code */
52 const void *encState, /* I State */
53 silk_EncControlStruct *encStatus /* O Encoder Status */
54 );
55
56 /****************************************/
57 /* Encoder functions */
58 /****************************************/
59
silk_Get_Encoder_Size(opus_int * encSizeBytes)60 opus_int silk_Get_Encoder_Size( /* O Returns error code */
61 opus_int *encSizeBytes /* O Number of bytes in SILK encoder state */
62 )
63 {
64 opus_int ret = SILK_NO_ERROR;
65
66 *encSizeBytes = sizeof( silk_encoder );
67
68 return ret;
69 }
70
71 /*************************/
72 /* Init or Reset encoder */
73 /*************************/
silk_InitEncoder(void * encState,int arch,silk_EncControlStruct * encStatus)74 opus_int silk_InitEncoder( /* O Returns error code */
75 void *encState, /* I/O State */
76 int arch, /* I Run-time architecture */
77 silk_EncControlStruct *encStatus /* O Encoder Status */
78 )
79 {
80 silk_encoder *psEnc;
81 opus_int n, ret = SILK_NO_ERROR;
82
83 psEnc = (silk_encoder *)encState;
84
85 /* Reset encoder */
86 silk_memset( psEnc, 0, sizeof( silk_encoder ) );
87 for( n = 0; n < ENCODER_NUM_CHANNELS; n++ ) {
88 if( ret += silk_init_encoder( &psEnc->state_Fxx[ n ], arch ) ) {
89 celt_assert( 0 );
90 }
91 }
92
93 psEnc->nChannelsAPI = 1;
94 psEnc->nChannelsInternal = 1;
95
96 /* Read control structure */
97 if( ret += silk_QueryEncoder( encState, encStatus ) ) {
98 celt_assert( 0 );
99 }
100
101 return ret;
102 }
103
104 /***************************************/
105 /* Read control structure from encoder */
106 /***************************************/
silk_QueryEncoder(const void * encState,silk_EncControlStruct * encStatus)107 static opus_int silk_QueryEncoder( /* O Returns error code */
108 const void *encState, /* I State */
109 silk_EncControlStruct *encStatus /* O Encoder Status */
110 )
111 {
112 opus_int ret = SILK_NO_ERROR;
113 silk_encoder_state_Fxx *state_Fxx;
114 silk_encoder *psEnc = (silk_encoder *)encState;
115
116 state_Fxx = psEnc->state_Fxx;
117
118 encStatus->nChannelsAPI = psEnc->nChannelsAPI;
119 encStatus->nChannelsInternal = psEnc->nChannelsInternal;
120 encStatus->API_sampleRate = state_Fxx[ 0 ].sCmn.API_fs_Hz;
121 encStatus->maxInternalSampleRate = state_Fxx[ 0 ].sCmn.maxInternal_fs_Hz;
122 encStatus->minInternalSampleRate = state_Fxx[ 0 ].sCmn.minInternal_fs_Hz;
123 encStatus->desiredInternalSampleRate = state_Fxx[ 0 ].sCmn.desiredInternal_fs_Hz;
124 encStatus->payloadSize_ms = state_Fxx[ 0 ].sCmn.PacketSize_ms;
125 encStatus->bitRate = state_Fxx[ 0 ].sCmn.TargetRate_bps;
126 encStatus->packetLossPercentage = state_Fxx[ 0 ].sCmn.PacketLoss_perc;
127 encStatus->complexity = state_Fxx[ 0 ].sCmn.Complexity;
128 encStatus->useInBandFEC = state_Fxx[ 0 ].sCmn.useInBandFEC;
129 encStatus->useDTX = state_Fxx[ 0 ].sCmn.useDTX;
130 encStatus->useCBR = state_Fxx[ 0 ].sCmn.useCBR;
131 encStatus->internalSampleRate = silk_SMULBB( state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
132 encStatus->allowBandwidthSwitch = state_Fxx[ 0 ].sCmn.allow_bandwidth_switch;
133 encStatus->inWBmodeWithoutVariableLP = state_Fxx[ 0 ].sCmn.fs_kHz == 16 && state_Fxx[ 0 ].sCmn.sLP.mode == 0;
134
135 return ret;
136 }
137
138
139 /**************************/
140 /* Encode frame with Silk */
141 /**************************/
142 /* Note: if prefillFlag is set, the input must contain 10 ms of audio, irrespective of what */
143 /* encControl->payloadSize_ms is set to */
silk_Encode(void * encState,silk_EncControlStruct * encControl,const opus_int16 * samplesIn,opus_int nSamplesIn,ec_enc * psRangeEnc,opus_int32 * nBytesOut,const opus_int prefillFlag,opus_int activity)144 opus_int silk_Encode( /* O Returns error code */
145 void *encState, /* I/O State */
146 silk_EncControlStruct *encControl, /* I Control status */
147 const opus_int16 *samplesIn, /* I Speech sample input vector */
148 opus_int nSamplesIn, /* I Number of samples in input vector */
149 ec_enc *psRangeEnc, /* I/O Compressor data structure */
150 opus_int32 *nBytesOut, /* I/O Number of bytes in payload (input: Max bytes) */
151 const opus_int prefillFlag, /* I Flag to indicate prefilling buffers no coding */
152 opus_int activity /* I Decision of Opus voice activity detector */
153 )
154 {
155 opus_int n, i, nBits, flags, tmp_payloadSize_ms = 0, tmp_complexity = 0, ret = 0;
156 opus_int nSamplesToBuffer, nSamplesToBufferMax, nBlocksOf10ms;
157 opus_int nSamplesFromInput = 0, nSamplesFromInputMax;
158 opus_int speech_act_thr_for_switch_Q8;
159 opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol, sum;
160 silk_encoder *psEnc = ( silk_encoder * )encState;
161 VARDECL( opus_int16, buf );
162 opus_int transition, curr_block, tot_blocks;
163 SAVE_STACK;
164
165 if (encControl->reducedDependency)
166 {
167 psEnc->state_Fxx[0].sCmn.first_frame_after_reset = 1;
168 psEnc->state_Fxx[1].sCmn.first_frame_after_reset = 1;
169 }
170 psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;
171
172 /* Check values in encoder control structure */
173 if( ( ret = check_control_input( encControl ) ) != 0 ) {
174 celt_assert( 0 );
175 RESTORE_STACK;
176 return ret;
177 }
178
179 encControl->switchReady = 0;
180
181 if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
182 /* Mono -> Stereo transition: init state of second channel and stereo state */
183 ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ], psEnc->state_Fxx[ 0 ].sCmn.arch );
184 silk_memset( psEnc->sStereo.pred_prev_Q13, 0, sizeof( psEnc->sStereo.pred_prev_Q13 ) );
185 silk_memset( psEnc->sStereo.sSide, 0, sizeof( psEnc->sStereo.sSide ) );
186 psEnc->sStereo.mid_side_amp_Q0[ 0 ] = 0;
187 psEnc->sStereo.mid_side_amp_Q0[ 1 ] = 1;
188 psEnc->sStereo.mid_side_amp_Q0[ 2 ] = 0;
189 psEnc->sStereo.mid_side_amp_Q0[ 3 ] = 1;
190 psEnc->sStereo.width_prev_Q14 = 0;
191 psEnc->sStereo.smth_width_Q14 = SILK_FIX_CONST( 1, 14 );
192 if( psEnc->nChannelsAPI == 2 ) {
193 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof( silk_resampler_state_struct ) );
194 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.In_HP_State, &psEnc->state_Fxx[ 0 ].sCmn.In_HP_State, sizeof( psEnc->state_Fxx[ 1 ].sCmn.In_HP_State ) );
195 }
196 }
197
198 transition = (encControl->payloadSize_ms != psEnc->state_Fxx[ 0 ].sCmn.PacketSize_ms) || (psEnc->nChannelsInternal != encControl->nChannelsInternal);
199
200 psEnc->nChannelsAPI = encControl->nChannelsAPI;
201 psEnc->nChannelsInternal = encControl->nChannelsInternal;
202
203 nBlocksOf10ms = silk_DIV32( 100 * nSamplesIn, encControl->API_sampleRate );
204 tot_blocks = ( nBlocksOf10ms > 1 ) ? nBlocksOf10ms >> 1 : 1;
205 curr_block = 0;
206 if( prefillFlag ) {
207 silk_LP_state save_LP;
208 /* Only accept input length of 10 ms */
209 if( nBlocksOf10ms != 1 ) {
210 celt_assert( 0 );
211 RESTORE_STACK;
212 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
213 }
214 if ( prefillFlag == 2 ) {
215 save_LP = psEnc->state_Fxx[ 0 ].sCmn.sLP;
216 /* Save the sampling rate so the bandwidth switching code can keep handling transitions. */
217 save_LP.saved_fs_kHz = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
218 }
219 /* Reset Encoder */
220 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
221 ret = silk_init_encoder( &psEnc->state_Fxx[ n ], psEnc->state_Fxx[ n ].sCmn.arch );
222 /* Restore the variable LP state. */
223 if ( prefillFlag == 2 ) {
224 psEnc->state_Fxx[ n ].sCmn.sLP = save_LP;
225 }
226 celt_assert( !ret );
227 }
228 tmp_payloadSize_ms = encControl->payloadSize_ms;
229 encControl->payloadSize_ms = 10;
230 tmp_complexity = encControl->complexity;
231 encControl->complexity = 0;
232 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
233 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
234 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 1;
235 }
236 } else {
237 /* Only accept input lengths that are a multiple of 10 ms */
238 if( nBlocksOf10ms * encControl->API_sampleRate != 100 * nSamplesIn || nSamplesIn < 0 ) {
239 celt_assert( 0 );
240 RESTORE_STACK;
241 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
242 }
243 /* Make sure no more than one packet can be produced */
244 if( 1000 * (opus_int32)nSamplesIn > encControl->payloadSize_ms * encControl->API_sampleRate ) {
245 celt_assert( 0 );
246 RESTORE_STACK;
247 return SILK_ENC_INPUT_INVALID_NO_OF_SAMPLES;
248 }
249 }
250
251 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
252 /* Force the side channel to the same rate as the mid */
253 opus_int force_fs_kHz = (n==1) ? psEnc->state_Fxx[0].sCmn.fs_kHz : 0;
254 if( ( ret = silk_control_encoder( &psEnc->state_Fxx[ n ], encControl, psEnc->allowBandwidthSwitch, n, force_fs_kHz ) ) != 0 ) {
255 silk_assert( 0 );
256 RESTORE_STACK;
257 return ret;
258 }
259 if( psEnc->state_Fxx[n].sCmn.first_frame_after_reset || transition ) {
260 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
261 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
262 }
263 }
264 psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
265 }
266 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
267
268 /* Input buffering/resampling and encoding */
269 nSamplesToBufferMax =
270 10 * nBlocksOf10ms * psEnc->state_Fxx[ 0 ].sCmn.fs_kHz;
271 nSamplesFromInputMax =
272 silk_DIV32_16( nSamplesToBufferMax *
273 psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz,
274 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
275 ALLOC( buf, nSamplesFromInputMax, opus_int16 );
276 while( 1 ) {
277 int curr_nBitsUsedLBRR = 0;
278 nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;
279 nSamplesToBuffer = silk_min( nSamplesToBuffer, nSamplesToBufferMax );
280 nSamplesFromInput = silk_DIV32_16( nSamplesToBuffer * psEnc->state_Fxx[ 0 ].sCmn.API_fs_Hz, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz * 1000 );
281 /* Resample and write to buffer */
282 if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
283 opus_int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
284 for( n = 0; n < nSamplesFromInput; n++ ) {
285 buf[ n ] = samplesIn[ 2 * n ];
286 }
287 /* Making sure to start both resamplers from the same state when switching from mono to stereo */
288 if( psEnc->nPrevChannelsInternal == 1 && id==0 ) {
289 silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
290 }
291
292 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
293 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
294 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
295
296 nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
297 nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
298 for( n = 0; n < nSamplesFromInput; n++ ) {
299 buf[ n ] = samplesIn[ 2 * n + 1 ];
300 }
301 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
302 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
303
304 psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;
305 } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {
306 /* Combine left and right channels before resampling */
307 for( n = 0; n < nSamplesFromInput; n++ ) {
308 sum = samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ];
309 buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( sum, 1 );
310 }
311 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
312 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
313 /* On the first mono frame, average the results for the two resampler states */
314 if( psEnc->nPrevChannelsInternal == 2 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 ) {
315 ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
316 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
317 for( n = 0; n < psEnc->state_Fxx[ 0 ].sCmn.frame_length; n++ ) {
318 psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ] =
319 silk_RSHIFT(psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx+n+2 ]
320 + psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx+n+2 ], 1);
321 }
322 }
323 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
324 } else {
325 celt_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );
326 silk_memcpy(buf, samplesIn, nSamplesFromInput*sizeof(opus_int16));
327 ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
328 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
329 psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;
330 }
331
332 samplesIn += nSamplesFromInput * encControl->nChannelsAPI;
333 nSamplesIn -= nSamplesFromInput;
334
335 /* Default */
336 psEnc->allowBandwidthSwitch = 0;
337
338 /* Silk encoder */
339 if( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx >= psEnc->state_Fxx[ 0 ].sCmn.frame_length ) {
340 /* Enough data in input buffer, so encode */
341 celt_assert( psEnc->state_Fxx[ 0 ].sCmn.inputBufIx == psEnc->state_Fxx[ 0 ].sCmn.frame_length );
342 celt_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inputBufIx == psEnc->state_Fxx[ 1 ].sCmn.frame_length );
343
344 /* Deal with LBRR data */
345 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == 0 && !prefillFlag ) {
346 /* Create space at start of payload for VAD and FEC flags */
347 opus_uint8 iCDF[ 2 ] = { 0, 0 };
348 iCDF[ 0 ] = 256 - silk_RSHIFT( 256, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
349 ec_enc_icdf( psRangeEnc, 0, iCDF, 8 );
350 curr_nBitsUsedLBRR = ec_tell( psRangeEnc );
351
352 /* Encode any LBRR data from previous packet */
353 /* Encode LBRR flags */
354 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
355 LBRR_symbol = 0;
356 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
357 LBRR_symbol |= silk_LSHIFT( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ], i );
358 }
359 psEnc->state_Fxx[ n ].sCmn.LBRR_flag = LBRR_symbol > 0 ? 1 : 0;
360 if( LBRR_symbol && psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket > 1 ) {
361 ec_enc_icdf( psRangeEnc, LBRR_symbol - 1, silk_LBRR_flags_iCDF_ptr[ psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket - 2 ], 8 );
362 }
363 }
364
365 /* Code LBRR indices and excitation signals */
366 for( i = 0; i < psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket; i++ ) {
367 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
368 if( psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] ) {
369 opus_int condCoding;
370
371 if( encControl->nChannelsInternal == 2 && n == 0 ) {
372 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ i ] );
373 /* For LBRR data there's no need to code the mid-only flag if the side-channel LBRR flag is set */
374 if( psEnc->state_Fxx[ 1 ].sCmn.LBRR_flags[ i ] == 0 ) {
375 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ i ] );
376 }
377 }
378 /* Use conditional coding if previous frame available */
379 if( i > 0 && psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i - 1 ] ) {
380 condCoding = CODE_CONDITIONALLY;
381 } else {
382 condCoding = CODE_INDEPENDENTLY;
383 }
384 silk_encode_indices( &psEnc->state_Fxx[ n ].sCmn, psRangeEnc, i, 1, condCoding );
385 silk_encode_pulses( psRangeEnc, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].signalType, psEnc->state_Fxx[ n ].sCmn.indices_LBRR[i].quantOffsetType,
386 psEnc->state_Fxx[ n ].sCmn.pulses_LBRR[ i ], psEnc->state_Fxx[ n ].sCmn.frame_length );
387 }
388 }
389 }
390
391 /* Reset LBRR flags */
392 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
393 silk_memset( psEnc->state_Fxx[ n ].sCmn.LBRR_flags, 0, sizeof( psEnc->state_Fxx[ n ].sCmn.LBRR_flags ) );
394 }
395 curr_nBitsUsedLBRR = ec_tell( psRangeEnc ) - curr_nBitsUsedLBRR;
396 }
397
398 silk_HP_variable_cutoff( psEnc->state_Fxx );
399
400 /* Total target bits for packet */
401 nBits = silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
402 /* Subtract bits used for LBRR */
403 if( !prefillFlag ) {
404 /* psEnc->nBitsUsedLBRR is an exponential moving average of the LBRR usage,
405 except that for the first LBRR frame it does no averaging and for the first
406 frame after after LBRR, it goes back to zero immediately. */
407 if ( curr_nBitsUsedLBRR < 10 ) {
408 psEnc->nBitsUsedLBRR = 0;
409 } else if ( psEnc->nBitsUsedLBRR < 10) {
410 psEnc->nBitsUsedLBRR = curr_nBitsUsedLBRR;
411 } else {
412 psEnc->nBitsUsedLBRR = ( psEnc->nBitsUsedLBRR + curr_nBitsUsedLBRR ) / 2;
413 }
414 nBits -= psEnc->nBitsUsedLBRR;
415 }
416 /* Divide by number of uncoded frames left in packet */
417 nBits = silk_DIV32_16( nBits, psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket );
418 /* Convert to bits/second */
419 if( encControl->payloadSize_ms == 10 ) {
420 TargetRate_bps = silk_SMULBB( nBits, 100 );
421 } else {
422 TargetRate_bps = silk_SMULBB( nBits, 50 );
423 }
424 /* Subtract fraction of bits in excess of target in previous frames and packets */
425 TargetRate_bps -= silk_DIV32_16( silk_MUL( psEnc->nBitsExceeded, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
426 if( !prefillFlag && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded > 0 ) {
427 /* Compare actual vs target bits so far in this packet */
428 opus_int32 bitsBalance = ec_tell( psRangeEnc ) - psEnc->nBitsUsedLBRR - nBits * psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
429 TargetRate_bps -= silk_DIV32_16( silk_MUL( bitsBalance, 1000 ), BITRESERVOIR_DECAY_TIME_MS );
430 }
431 /* Never exceed input bitrate */
432 TargetRate_bps = silk_LIMIT( TargetRate_bps, encControl->bitRate, 5000 );
433
434 /* Convert Left/Right to Mid/Side */
435 if( encControl->nChannelsInternal == 2 ) {
436 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
437 psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
438 MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
439 psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
440 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
441 /* Reset side channel encoder memory for first frame with side coding */
442 if( psEnc->prev_decode_only_middle == 1 ) {
443 silk_memset( &psEnc->state_Fxx[ 1 ].sShape, 0, sizeof( psEnc->state_Fxx[ 1 ].sShape ) );
444 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sNSQ, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sNSQ ) );
445 silk_memset( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.prev_NLSFq_Q15 ) );
446 silk_memset( &psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State, 0, sizeof( psEnc->state_Fxx[ 1 ].sCmn.sLP.In_LP_State ) );
447 psEnc->state_Fxx[ 1 ].sCmn.prevLag = 100;
448 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.lagPrev = 100;
449 psEnc->state_Fxx[ 1 ].sShape.LastGainIndex = 10;
450 psEnc->state_Fxx[ 1 ].sCmn.prevSignalType = TYPE_NO_VOICE_ACTIVITY;
451 psEnc->state_Fxx[ 1 ].sCmn.sNSQ.prev_gain_Q16 = 65536;
452 psEnc->state_Fxx[ 1 ].sCmn.first_frame_after_reset = 1;
453 }
454 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 1 ], activity );
455 } else {
456 psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] = 0;
457 }
458 if( !prefillFlag ) {
459 silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
460 if( psEnc->state_Fxx[ 1 ].sCmn.VAD_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
461 silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
462 }
463 }
464 } else {
465 /* Buffering */
466 silk_memcpy( psEnc->state_Fxx[ 0 ].sCmn.inputBuf, psEnc->sStereo.sMid, 2 * sizeof( opus_int16 ) );
467 silk_memcpy( psEnc->sStereo.sMid, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.frame_length ], 2 * sizeof( opus_int16 ) );
468 }
469 silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity );
470
471 /* Encode */
472 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
473 opus_int maxBits, useCBR;
474
475 /* Handling rate constraints */
476 maxBits = encControl->maxBits;
477 if( tot_blocks == 2 && curr_block == 0 ) {
478 maxBits = maxBits * 3 / 5;
479 } else if( tot_blocks == 3 ) {
480 if( curr_block == 0 ) {
481 maxBits = maxBits * 2 / 5;
482 } else if( curr_block == 1 ) {
483 maxBits = maxBits * 3 / 4;
484 }
485 }
486 useCBR = encControl->useCBR && curr_block == tot_blocks - 1;
487
488 if( encControl->nChannelsInternal == 1 ) {
489 channelRate_bps = TargetRate_bps;
490 } else {
491 channelRate_bps = MStargetRates_bps[ n ];
492 if( n == 0 && MStargetRates_bps[ 1 ] > 0 ) {
493 useCBR = 0;
494 /* Give mid up to 1/2 of the max bits for that frame */
495 maxBits -= encControl->maxBits / ( tot_blocks * 2 );
496 }
497 }
498
499 if( channelRate_bps > 0 ) {
500 opus_int condCoding;
501
502 silk_control_SNR( &psEnc->state_Fxx[ n ].sCmn, channelRate_bps );
503
504 /* Use independent coding if no previous frame available */
505 if( psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - n <= 0 ) {
506 condCoding = CODE_INDEPENDENTLY;
507 } else if( n > 0 && psEnc->prev_decode_only_middle ) {
508 /* If we skipped a side frame in this packet, we don't
509 need LTP scaling; the LTP state is well-defined. */
510 condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
511 } else {
512 condCoding = CODE_CONDITIONALLY;
513 }
514 if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding, maxBits, useCBR ) ) != 0 ) {
515 silk_assert( 0 );
516 }
517 }
518 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
519 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
520 psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
521 }
522 psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
523
524 /* Insert VAD and FEC flags at beginning of bitstream */
525 if( *nBytesOut > 0 && psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded == psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket) {
526 flags = 0;
527 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
528 for( i = 0; i < psEnc->state_Fxx[ n ].sCmn.nFramesPerPacket; i++ ) {
529 flags = silk_LSHIFT( flags, 1 );
530 flags |= psEnc->state_Fxx[ n ].sCmn.VAD_flags[ i ];
531 }
532 flags = silk_LSHIFT( flags, 1 );
533 flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
534 }
535 if( !prefillFlag ) {
536 ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
537 }
538
539 /* Return zero bytes if all channels DTXed */
540 if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
541 *nBytesOut = 0;
542 }
543
544 psEnc->nBitsExceeded += *nBytesOut * 8;
545 psEnc->nBitsExceeded -= silk_DIV32_16( silk_MUL( encControl->bitRate, encControl->payloadSize_ms ), 1000 );
546 psEnc->nBitsExceeded = silk_LIMIT( psEnc->nBitsExceeded, 0, 10000 );
547
548 /* Update flag indicating if bandwidth switching is allowed */
549 speech_act_thr_for_switch_Q8 = silk_SMLAWB( SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ),
550 SILK_FIX_CONST( ( 1 - SPEECH_ACTIVITY_DTX_THRES ) / MAX_BANDWIDTH_SWITCH_DELAY_MS, 16 + 8 ), psEnc->timeSinceSwitchAllowed_ms );
551 if( psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8 < speech_act_thr_for_switch_Q8 ) {
552 psEnc->allowBandwidthSwitch = 1;
553 psEnc->timeSinceSwitchAllowed_ms = 0;
554 } else {
555 psEnc->allowBandwidthSwitch = 0;
556 psEnc->timeSinceSwitchAllowed_ms += encControl->payloadSize_ms;
557 }
558 }
559
560 if( nSamplesIn == 0 ) {
561 break;
562 }
563 } else {
564 break;
565 }
566 curr_block++;
567 }
568
569 psEnc->nPrevChannelsInternal = encControl->nChannelsInternal;
570
571 encControl->allowBandwidthSwitch = psEnc->allowBandwidthSwitch;
572 encControl->inWBmodeWithoutVariableLP = psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == 16 && psEnc->state_Fxx[ 0 ].sCmn.sLP.mode == 0;
573 encControl->internalSampleRate = silk_SMULBB( psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, 1000 );
574 encControl->stereoWidth_Q14 = encControl->toMono ? 0 : psEnc->sStereo.smth_width_Q14;
575 if( prefillFlag ) {
576 encControl->payloadSize_ms = tmp_payloadSize_ms;
577 encControl->complexity = tmp_complexity;
578 for( n = 0; n < encControl->nChannelsInternal; n++ ) {
579 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
580 psEnc->state_Fxx[ n ].sCmn.prefillFlag = 0;
581 }
582 }
583
584 encControl->signalType = psEnc->state_Fxx[0].sCmn.indices.signalType;
585 encControl->offset = silk_Quantization_Offsets_Q10
586 [ psEnc->state_Fxx[0].sCmn.indices.signalType >> 1 ]
587 [ psEnc->state_Fxx[0].sCmn.indices.quantOffsetType ];
588 RESTORE_STACK;
589 return ret;
590 }
591
592