xref: /aosp_15_r20/external/webrtc/modules/audio_coding/neteq/neteq_impl.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/neteq/neteq_impl.h"
12 
13 #include <algorithm>
14 #include <cstdint>
15 #include <cstring>
16 #include <list>
17 #include <map>
18 #include <memory>
19 #include <utility>
20 #include <vector>
21 
22 #include "api/audio_codecs/audio_decoder.h"
23 #include "api/neteq/tick_timer.h"
24 #include "common_audio/signal_processing/include/signal_processing_library.h"
25 #include "modules/audio_coding/codecs/cng/webrtc_cng.h"
26 #include "modules/audio_coding/neteq/accelerate.h"
27 #include "modules/audio_coding/neteq/background_noise.h"
28 #include "modules/audio_coding/neteq/comfort_noise.h"
29 #include "modules/audio_coding/neteq/decision_logic.h"
30 #include "modules/audio_coding/neteq/decoder_database.h"
31 #include "modules/audio_coding/neteq/dtmf_buffer.h"
32 #include "modules/audio_coding/neteq/dtmf_tone_generator.h"
33 #include "modules/audio_coding/neteq/expand.h"
34 #include "modules/audio_coding/neteq/merge.h"
35 #include "modules/audio_coding/neteq/nack_tracker.h"
36 #include "modules/audio_coding/neteq/normal.h"
37 #include "modules/audio_coding/neteq/packet.h"
38 #include "modules/audio_coding/neteq/packet_buffer.h"
39 #include "modules/audio_coding/neteq/post_decode_vad.h"
40 #include "modules/audio_coding/neteq/preemptive_expand.h"
41 #include "modules/audio_coding/neteq/red_payload_splitter.h"
42 #include "modules/audio_coding/neteq/statistics_calculator.h"
43 #include "modules/audio_coding/neteq/sync_buffer.h"
44 #include "modules/audio_coding/neteq/time_stretch.h"
45 #include "modules/audio_coding/neteq/timestamp_scaler.h"
46 #include "rtc_base/checks.h"
47 #include "rtc_base/logging.h"
48 #include "rtc_base/numerics/safe_conversions.h"
49 #include "rtc_base/sanitizer.h"
50 #include "rtc_base/strings/audio_format_to_string.h"
51 #include "rtc_base/trace_event.h"
52 #include "system_wrappers/include/clock.h"
53 
54 namespace webrtc {
55 namespace {
56 
CreateNetEqController(const NetEqControllerFactory & controller_factory,int base_min_delay,int max_packets_in_buffer,bool allow_time_stretching,TickTimer * tick_timer,webrtc::Clock * clock)57 std::unique_ptr<NetEqController> CreateNetEqController(
58     const NetEqControllerFactory& controller_factory,
59     int base_min_delay,
60     int max_packets_in_buffer,
61     bool allow_time_stretching,
62     TickTimer* tick_timer,
63     webrtc::Clock* clock) {
64   NetEqController::Config config;
65   config.base_min_delay_ms = base_min_delay;
66   config.max_packets_in_buffer = max_packets_in_buffer;
67   config.allow_time_stretching = allow_time_stretching;
68   config.tick_timer = tick_timer;
69   config.clock = clock;
70   return controller_factory.CreateNetEqController(config);
71 }
72 
73 }  // namespace
74 
Dependencies(const NetEq::Config & config,Clock * clock,const rtc::scoped_refptr<AudioDecoderFactory> & decoder_factory,const NetEqControllerFactory & controller_factory)75 NetEqImpl::Dependencies::Dependencies(
76     const NetEq::Config& config,
77     Clock* clock,
78     const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
79     const NetEqControllerFactory& controller_factory)
80     : clock(clock),
81       tick_timer(new TickTimer),
82       stats(new StatisticsCalculator),
83       decoder_database(
84           new DecoderDatabase(decoder_factory, config.codec_pair_id)),
85       dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)),
86       dtmf_tone_generator(new DtmfToneGenerator),
87       packet_buffer(
88           new PacketBuffer(config.max_packets_in_buffer, tick_timer.get())),
89       neteq_controller(
90           CreateNetEqController(controller_factory,
91                                 config.min_delay_ms,
92                                 config.max_packets_in_buffer,
93                                 !config.for_test_no_time_stretching,
94                                 tick_timer.get(),
95                                 clock)),
96       red_payload_splitter(new RedPayloadSplitter),
97       timestamp_scaler(new TimestampScaler(*decoder_database)),
98       accelerate_factory(new AccelerateFactory),
99       expand_factory(new ExpandFactory),
100       preemptive_expand_factory(new PreemptiveExpandFactory) {}
101 
102 NetEqImpl::Dependencies::~Dependencies() = default;
103 
NetEqImpl(const NetEq::Config & config,Dependencies && deps,bool create_components)104 NetEqImpl::NetEqImpl(const NetEq::Config& config,
105                      Dependencies&& deps,
106                      bool create_components)
107     : clock_(deps.clock),
108       tick_timer_(std::move(deps.tick_timer)),
109       decoder_database_(std::move(deps.decoder_database)),
110       dtmf_buffer_(std::move(deps.dtmf_buffer)),
111       dtmf_tone_generator_(std::move(deps.dtmf_tone_generator)),
112       packet_buffer_(std::move(deps.packet_buffer)),
113       red_payload_splitter_(std::move(deps.red_payload_splitter)),
114       timestamp_scaler_(std::move(deps.timestamp_scaler)),
115       vad_(new PostDecodeVad()),
116       expand_factory_(std::move(deps.expand_factory)),
117       accelerate_factory_(std::move(deps.accelerate_factory)),
118       preemptive_expand_factory_(std::move(deps.preemptive_expand_factory)),
119       stats_(std::move(deps.stats)),
120       controller_(std::move(deps.neteq_controller)),
121       last_mode_(Mode::kNormal),
122       decoded_buffer_length_(kMaxFrameSize),
123       decoded_buffer_(new int16_t[decoded_buffer_length_]),
124       playout_timestamp_(0),
125       new_codec_(false),
126       timestamp_(0),
127       reset_decoder_(false),
128       first_packet_(true),
129       enable_fast_accelerate_(config.enable_fast_accelerate),
130       nack_enabled_(false),
131       enable_muted_state_(config.enable_muted_state),
132       expand_uma_logger_("WebRTC.Audio.ExpandRatePercent",
133                          10,  // Report once every 10 s.
134                          tick_timer_.get()),
135       speech_expand_uma_logger_("WebRTC.Audio.SpeechExpandRatePercent",
136                                 10,  // Report once every 10 s.
137                                 tick_timer_.get()),
138       no_time_stretching_(config.for_test_no_time_stretching) {
139   RTC_LOG(LS_INFO) << "NetEq config: " << config.ToString();
140   int fs = config.sample_rate_hz;
141   if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) {
142     RTC_LOG(LS_ERROR) << "Sample rate " << fs
143                       << " Hz not supported. "
144                          "Changing to 8000 Hz.";
145     fs = 8000;
146   }
147   controller_->SetMaximumDelay(config.max_delay_ms);
148   fs_hz_ = fs;
149   fs_mult_ = fs / 8000;
150   last_output_sample_rate_hz_ = fs;
151   output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
152   controller_->SetSampleRate(fs_hz_, output_size_samples_);
153   decoder_frame_length_ = 2 * output_size_samples_;  // 20 ms.
154   if (create_components) {
155     SetSampleRateAndChannels(fs, 1);  // Default is 1 channel.
156   }
157   RTC_DCHECK(!vad_->enabled());
158   if (config.enable_post_decode_vad) {
159     vad_->Enable();
160   }
161 }
162 
163 NetEqImpl::~NetEqImpl() = default;
164 
InsertPacket(const RTPHeader & rtp_header,rtc::ArrayView<const uint8_t> payload)165 int NetEqImpl::InsertPacket(const RTPHeader& rtp_header,
166                             rtc::ArrayView<const uint8_t> payload) {
167   rtc::MsanCheckInitialized(payload);
168   TRACE_EVENT0("webrtc", "NetEqImpl::InsertPacket");
169   MutexLock lock(&mutex_);
170   if (InsertPacketInternal(rtp_header, payload) != 0) {
171     return kFail;
172   }
173   return kOK;
174 }
175 
InsertEmptyPacket(const RTPHeader & rtp_header)176 void NetEqImpl::InsertEmptyPacket(const RTPHeader& rtp_header) {
177   MutexLock lock(&mutex_);
178   if (nack_enabled_) {
179     nack_->UpdateLastReceivedPacket(rtp_header.sequenceNumber,
180                                     rtp_header.timestamp);
181   }
182   controller_->RegisterEmptyPacket();
183 }
184 
185 namespace {
SetAudioFrameActivityAndType(bool vad_enabled,NetEqImpl::OutputType type,AudioFrame::VADActivity last_vad_activity,AudioFrame * audio_frame)186 void SetAudioFrameActivityAndType(bool vad_enabled,
187                                   NetEqImpl::OutputType type,
188                                   AudioFrame::VADActivity last_vad_activity,
189                                   AudioFrame* audio_frame) {
190   switch (type) {
191     case NetEqImpl::OutputType::kNormalSpeech: {
192       audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
193       audio_frame->vad_activity_ = AudioFrame::kVadActive;
194       break;
195     }
196     case NetEqImpl::OutputType::kVadPassive: {
197       // This should only be reached if the VAD is enabled.
198       RTC_DCHECK(vad_enabled);
199       audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
200       audio_frame->vad_activity_ = AudioFrame::kVadPassive;
201       break;
202     }
203     case NetEqImpl::OutputType::kCNG: {
204       audio_frame->speech_type_ = AudioFrame::kCNG;
205       audio_frame->vad_activity_ = AudioFrame::kVadPassive;
206       break;
207     }
208     case NetEqImpl::OutputType::kPLC: {
209       audio_frame->speech_type_ = AudioFrame::kPLC;
210       audio_frame->vad_activity_ = last_vad_activity;
211       break;
212     }
213     case NetEqImpl::OutputType::kPLCCNG: {
214       audio_frame->speech_type_ = AudioFrame::kPLCCNG;
215       audio_frame->vad_activity_ = AudioFrame::kVadPassive;
216       break;
217     }
218     case NetEqImpl::OutputType::kCodecPLC: {
219       audio_frame->speech_type_ = AudioFrame::kCodecPLC;
220       audio_frame->vad_activity_ = last_vad_activity;
221       break;
222     }
223     default:
224       RTC_DCHECK_NOTREACHED();
225   }
226   if (!vad_enabled) {
227     // Always set kVadUnknown when receive VAD is inactive.
228     audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
229   }
230 }
231 }  // namespace
232 
GetAudio(AudioFrame * audio_frame,bool * muted,int * current_sample_rate_hz,absl::optional<Operation> action_override)233 int NetEqImpl::GetAudio(AudioFrame* audio_frame,
234                         bool* muted,
235                         int* current_sample_rate_hz,
236                         absl::optional<Operation> action_override) {
237   TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio");
238   MutexLock lock(&mutex_);
239   if (GetAudioInternal(audio_frame, muted, action_override) != 0) {
240     return kFail;
241   }
242   RTC_DCHECK_EQ(
243       audio_frame->sample_rate_hz_,
244       rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
245   RTC_DCHECK_EQ(*muted, audio_frame->muted());
246   SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(),
247                                last_vad_activity_, audio_frame);
248   last_vad_activity_ = audio_frame->vad_activity_;
249   last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
250   RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
251              last_output_sample_rate_hz_ == 16000 ||
252              last_output_sample_rate_hz_ == 32000 ||
253              last_output_sample_rate_hz_ == 48000)
254       << "Unexpected sample rate " << last_output_sample_rate_hz_;
255 
256   if (current_sample_rate_hz) {
257     *current_sample_rate_hz = last_output_sample_rate_hz_;
258   }
259 
260   return kOK;
261 }
262 
SetCodecs(const std::map<int,SdpAudioFormat> & codecs)263 void NetEqImpl::SetCodecs(const std::map<int, SdpAudioFormat>& codecs) {
264   MutexLock lock(&mutex_);
265   const std::vector<int> changed_payload_types =
266       decoder_database_->SetCodecs(codecs);
267   for (const int pt : changed_payload_types) {
268     packet_buffer_->DiscardPacketsWithPayloadType(pt, stats_.get());
269   }
270 }
271 
RegisterPayloadType(int rtp_payload_type,const SdpAudioFormat & audio_format)272 bool NetEqImpl::RegisterPayloadType(int rtp_payload_type,
273                                     const SdpAudioFormat& audio_format) {
274   RTC_LOG(LS_VERBOSE) << "NetEqImpl::RegisterPayloadType: payload type "
275                       << rtp_payload_type << ", codec "
276                       << rtc::ToString(audio_format);
277   MutexLock lock(&mutex_);
278   return decoder_database_->RegisterPayload(rtp_payload_type, audio_format) ==
279          DecoderDatabase::kOK;
280 }
281 
RemovePayloadType(uint8_t rtp_payload_type)282 int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) {
283   MutexLock lock(&mutex_);
284   int ret = decoder_database_->Remove(rtp_payload_type);
285   if (ret == DecoderDatabase::kOK || ret == DecoderDatabase::kDecoderNotFound) {
286     packet_buffer_->DiscardPacketsWithPayloadType(rtp_payload_type,
287                                                   stats_.get());
288     return kOK;
289   }
290   return kFail;
291 }
292 
RemoveAllPayloadTypes()293 void NetEqImpl::RemoveAllPayloadTypes() {
294   MutexLock lock(&mutex_);
295   decoder_database_->RemoveAll();
296 }
297 
SetMinimumDelay(int delay_ms)298 bool NetEqImpl::SetMinimumDelay(int delay_ms) {
299   MutexLock lock(&mutex_);
300   if (delay_ms >= 0 && delay_ms <= 10000) {
301     RTC_DCHECK(controller_.get());
302     return controller_->SetMinimumDelay(delay_ms);
303   }
304   return false;
305 }
306 
SetMaximumDelay(int delay_ms)307 bool NetEqImpl::SetMaximumDelay(int delay_ms) {
308   MutexLock lock(&mutex_);
309   if (delay_ms >= 0 && delay_ms <= 10000) {
310     RTC_DCHECK(controller_.get());
311     return controller_->SetMaximumDelay(delay_ms);
312   }
313   return false;
314 }
315 
SetBaseMinimumDelayMs(int delay_ms)316 bool NetEqImpl::SetBaseMinimumDelayMs(int delay_ms) {
317   MutexLock lock(&mutex_);
318   if (delay_ms >= 0 && delay_ms <= 10000) {
319     return controller_->SetBaseMinimumDelay(delay_ms);
320   }
321   return false;
322 }
323 
GetBaseMinimumDelayMs() const324 int NetEqImpl::GetBaseMinimumDelayMs() const {
325   MutexLock lock(&mutex_);
326   return controller_->GetBaseMinimumDelay();
327 }
328 
TargetDelayMs() const329 int NetEqImpl::TargetDelayMs() const {
330   MutexLock lock(&mutex_);
331   RTC_DCHECK(controller_.get());
332   return controller_->TargetLevelMs();
333 }
334 
FilteredCurrentDelayMs() const335 int NetEqImpl::FilteredCurrentDelayMs() const {
336   MutexLock lock(&mutex_);
337   // Sum up the filtered packet buffer level with the future length of the sync
338   // buffer.
339   const int delay_samples =
340       controller_->GetFilteredBufferLevel() + sync_buffer_->FutureLength();
341   // The division below will truncate. The return value is in ms.
342   return delay_samples / rtc::CheckedDivExact(fs_hz_, 1000);
343 }
344 
NetworkStatistics(NetEqNetworkStatistics * stats)345 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
346   MutexLock lock(&mutex_);
347   RTC_DCHECK(decoder_database_.get());
348   *stats = CurrentNetworkStatisticsInternal();
349   stats_->GetNetworkStatistics(decoder_frame_length_, stats);
350   return 0;
351 }
352 
CurrentNetworkStatistics() const353 NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatistics() const {
354   MutexLock lock(&mutex_);
355   return CurrentNetworkStatisticsInternal();
356 }
357 
CurrentNetworkStatisticsInternal() const358 NetEqNetworkStatistics NetEqImpl::CurrentNetworkStatisticsInternal() const {
359   RTC_DCHECK(decoder_database_.get());
360   NetEqNetworkStatistics stats;
361   const size_t total_samples_in_buffers =
362       packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) +
363       sync_buffer_->FutureLength();
364 
365   RTC_DCHECK(controller_.get());
366   stats.preferred_buffer_size_ms = controller_->TargetLevelMs();
367   stats.jitter_peaks_found = controller_->PeakFound();
368   RTC_DCHECK_GT(fs_hz_, 0);
369   stats.current_buffer_size_ms =
370       static_cast<uint16_t>(total_samples_in_buffers * 1000 / fs_hz_);
371   return stats;
372 }
373 
GetLifetimeStatistics() const374 NetEqLifetimeStatistics NetEqImpl::GetLifetimeStatistics() const {
375   MutexLock lock(&mutex_);
376   return stats_->GetLifetimeStatistics();
377 }
378 
GetOperationsAndState() const379 NetEqOperationsAndState NetEqImpl::GetOperationsAndState() const {
380   MutexLock lock(&mutex_);
381   auto result = stats_->GetOperationsAndState();
382   result.current_buffer_size_ms =
383       (packet_buffer_->NumSamplesInBuffer(decoder_frame_length_) +
384        sync_buffer_->FutureLength()) *
385       1000 / fs_hz_;
386   result.current_frame_size_ms = decoder_frame_length_ * 1000 / fs_hz_;
387   result.next_packet_available = packet_buffer_->PeekNextPacket() &&
388                                  packet_buffer_->PeekNextPacket()->timestamp ==
389                                      sync_buffer_->end_timestamp();
390   return result;
391 }
392 
EnableVad()393 void NetEqImpl::EnableVad() {
394   MutexLock lock(&mutex_);
395   RTC_DCHECK(vad_.get());
396   vad_->Enable();
397 }
398 
DisableVad()399 void NetEqImpl::DisableVad() {
400   MutexLock lock(&mutex_);
401   RTC_DCHECK(vad_.get());
402   vad_->Disable();
403 }
404 
GetPlayoutTimestamp() const405 absl::optional<uint32_t> NetEqImpl::GetPlayoutTimestamp() const {
406   MutexLock lock(&mutex_);
407   if (first_packet_ || last_mode_ == Mode::kRfc3389Cng ||
408       last_mode_ == Mode::kCodecInternalCng) {
409     // We don't have a valid RTP timestamp until we have decoded our first
410     // RTP packet. Also, the RTP timestamp is not accurate while playing CNG,
411     // which is indicated by returning an empty value.
412     return absl::nullopt;
413   }
414   return timestamp_scaler_->ToExternal(playout_timestamp_);
415 }
416 
last_output_sample_rate_hz() const417 int NetEqImpl::last_output_sample_rate_hz() const {
418   MutexLock lock(&mutex_);
419   return last_output_sample_rate_hz_;
420 }
421 
GetDecoderFormat(int payload_type) const422 absl::optional<NetEq::DecoderFormat> NetEqImpl::GetDecoderFormat(
423     int payload_type) const {
424   MutexLock lock(&mutex_);
425   const DecoderDatabase::DecoderInfo* const di =
426       decoder_database_->GetDecoderInfo(payload_type);
427   if (di) {
428     const AudioDecoder* const decoder = di->GetDecoder();
429     // TODO(kwiberg): Why the special case for RED?
430     return DecoderFormat{
431         /*sample_rate_hz=*/di->IsRed() ? 8000 : di->SampleRateHz(),
432         /*num_channels=*/
433         decoder ? rtc::dchecked_cast<int>(decoder->Channels()) : 1,
434         /*sdp_format=*/di->GetFormat()};
435   } else {
436     // Payload type not registered.
437     return absl::nullopt;
438   }
439 }
440 
FlushBuffers()441 void NetEqImpl::FlushBuffers() {
442   MutexLock lock(&mutex_);
443   RTC_LOG(LS_VERBOSE) << "FlushBuffers";
444   packet_buffer_->Flush(stats_.get());
445   RTC_DCHECK(sync_buffer_.get());
446   RTC_DCHECK(expand_.get());
447   sync_buffer_->Flush();
448   sync_buffer_->set_next_index(sync_buffer_->next_index() -
449                                expand_->overlap_length());
450   // Set to wait for new codec.
451   first_packet_ = true;
452 }
453 
EnableNack(size_t max_nack_list_size)454 void NetEqImpl::EnableNack(size_t max_nack_list_size) {
455   MutexLock lock(&mutex_);
456   if (!nack_enabled_) {
457     nack_ = std::make_unique<NackTracker>();
458     nack_enabled_ = true;
459     nack_->UpdateSampleRate(fs_hz_);
460   }
461   nack_->SetMaxNackListSize(max_nack_list_size);
462 }
463 
DisableNack()464 void NetEqImpl::DisableNack() {
465   MutexLock lock(&mutex_);
466   nack_.reset();
467   nack_enabled_ = false;
468 }
469 
GetNackList(int64_t round_trip_time_ms) const470 std::vector<uint16_t> NetEqImpl::GetNackList(int64_t round_trip_time_ms) const {
471   MutexLock lock(&mutex_);
472   if (!nack_enabled_) {
473     return std::vector<uint16_t>();
474   }
475   RTC_DCHECK(nack_.get());
476   return nack_->GetNackList(round_trip_time_ms);
477 }
478 
SyncBufferSizeMs() const479 int NetEqImpl::SyncBufferSizeMs() const {
480   MutexLock lock(&mutex_);
481   return rtc::dchecked_cast<int>(sync_buffer_->FutureLength() /
482                                  rtc::CheckedDivExact(fs_hz_, 1000));
483 }
484 
sync_buffer_for_test() const485 const SyncBuffer* NetEqImpl::sync_buffer_for_test() const {
486   MutexLock lock(&mutex_);
487   return sync_buffer_.get();
488 }
489 
last_operation_for_test() const490 NetEq::Operation NetEqImpl::last_operation_for_test() const {
491   MutexLock lock(&mutex_);
492   return last_operation_;
493 }
494 
495 // Methods below this line are private.
496 
InsertPacketInternal(const RTPHeader & rtp_header,rtc::ArrayView<const uint8_t> payload)497 int NetEqImpl::InsertPacketInternal(const RTPHeader& rtp_header,
498                                     rtc::ArrayView<const uint8_t> payload) {
499   if (payload.empty()) {
500     RTC_LOG_F(LS_ERROR) << "payload is empty";
501     return kInvalidPointer;
502   }
503 
504   Timestamp receive_time = clock_->CurrentTime();
505   stats_->ReceivedPacket();
506 
507   PacketList packet_list;
508   // Insert packet in a packet list.
509   packet_list.push_back([&rtp_header, &payload, &receive_time] {
510     // Convert to Packet.
511     Packet packet;
512     packet.payload_type = rtp_header.payloadType;
513     packet.sequence_number = rtp_header.sequenceNumber;
514     packet.timestamp = rtp_header.timestamp;
515     packet.payload.SetData(payload.data(), payload.size());
516     packet.packet_info = RtpPacketInfo(rtp_header, receive_time);
517     // Waiting time will be set upon inserting the packet in the buffer.
518     RTC_DCHECK(!packet.waiting_time);
519     return packet;
520   }());
521 
522   bool update_sample_rate_and_channels = first_packet_;
523 
524   if (update_sample_rate_and_channels) {
525     // Reset timestamp scaling.
526     timestamp_scaler_->Reset();
527   }
528 
529   if (!decoder_database_->IsRed(rtp_header.payloadType)) {
530     // Scale timestamp to internal domain (only for some codecs).
531     timestamp_scaler_->ToInternal(&packet_list);
532   }
533 
534   // Store these for later use, since the first packet may very well disappear
535   // before we need these values.
536   uint32_t main_timestamp = packet_list.front().timestamp;
537   uint8_t main_payload_type = packet_list.front().payload_type;
538   uint16_t main_sequence_number = packet_list.front().sequence_number;
539 
540   // Reinitialize NetEq if it's needed (changed SSRC or first call).
541   if (update_sample_rate_and_channels) {
542     // Note: `first_packet_` will be cleared further down in this method, once
543     // the packet has been successfully inserted into the packet buffer.
544 
545     // Flush the packet buffer and DTMF buffer.
546     packet_buffer_->Flush(stats_.get());
547     dtmf_buffer_->Flush();
548 
549     // Update audio buffer timestamp.
550     sync_buffer_->IncreaseEndTimestamp(main_timestamp - timestamp_);
551 
552     // Update codecs.
553     timestamp_ = main_timestamp;
554   }
555 
556   if (nack_enabled_) {
557     RTC_DCHECK(nack_);
558     if (update_sample_rate_and_channels) {
559       nack_->Reset();
560     }
561     nack_->UpdateLastReceivedPacket(main_sequence_number, main_timestamp);
562   }
563 
564   // Check for RED payload type, and separate payloads into several packets.
565   if (decoder_database_->IsRed(rtp_header.payloadType)) {
566     if (!red_payload_splitter_->SplitRed(&packet_list)) {
567       return kRedundancySplitError;
568     }
569     // Only accept a few RED payloads of the same type as the main data,
570     // DTMF events and CNG.
571     red_payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_);
572     if (packet_list.empty()) {
573       return kRedundancySplitError;
574     }
575   }
576 
577   // Check payload types.
578   if (decoder_database_->CheckPayloadTypes(packet_list) ==
579       DecoderDatabase::kDecoderNotFound) {
580     return kUnknownRtpPayloadType;
581   }
582 
583   RTC_DCHECK(!packet_list.empty());
584 
585   // Update main_timestamp, if new packets appear in the list
586   // after RED splitting.
587   if (decoder_database_->IsRed(rtp_header.payloadType)) {
588     timestamp_scaler_->ToInternal(&packet_list);
589     main_timestamp = packet_list.front().timestamp;
590     main_payload_type = packet_list.front().payload_type;
591     main_sequence_number = packet_list.front().sequence_number;
592   }
593 
594   // Process DTMF payloads. Cycle through the list of packets, and pick out any
595   // DTMF payloads found.
596   PacketList::iterator it = packet_list.begin();
597   while (it != packet_list.end()) {
598     const Packet& current_packet = (*it);
599     RTC_DCHECK(!current_packet.payload.empty());
600     if (decoder_database_->IsDtmf(current_packet.payload_type)) {
601       DtmfEvent event;
602       int ret = DtmfBuffer::ParseEvent(current_packet.timestamp,
603                                        current_packet.payload.data(),
604                                        current_packet.payload.size(), &event);
605       if (ret != DtmfBuffer::kOK) {
606         return kDtmfParsingError;
607       }
608       if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) {
609         return kDtmfInsertError;
610       }
611       it = packet_list.erase(it);
612     } else {
613       ++it;
614     }
615   }
616 
617   PacketList parsed_packet_list;
618   bool is_dtx = false;
619   while (!packet_list.empty()) {
620     Packet& packet = packet_list.front();
621     const DecoderDatabase::DecoderInfo* info =
622         decoder_database_->GetDecoderInfo(packet.payload_type);
623     if (!info) {
624       RTC_LOG(LS_WARNING) << "SplitAudio unknown payload type";
625       return kUnknownRtpPayloadType;
626     }
627 
628     if (info->IsComfortNoise()) {
629       // Carry comfort noise packets along.
630       parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
631                                 packet_list.begin());
632     } else {
633       const auto sequence_number = packet.sequence_number;
634       const auto payload_type = packet.payload_type;
635       const Packet::Priority original_priority = packet.priority;
636       const auto& packet_info = packet.packet_info;
637       auto packet_from_result = [&](AudioDecoder::ParseResult& result) {
638         Packet new_packet;
639         new_packet.sequence_number = sequence_number;
640         new_packet.payload_type = payload_type;
641         new_packet.timestamp = result.timestamp;
642         new_packet.priority.codec_level = result.priority;
643         new_packet.priority.red_level = original_priority.red_level;
644         new_packet.packet_info = packet_info;
645         new_packet.frame = std::move(result.frame);
646         return new_packet;
647       };
648 
649       std::vector<AudioDecoder::ParseResult> results =
650           info->GetDecoder()->ParsePayload(std::move(packet.payload),
651                                            packet.timestamp);
652       if (results.empty()) {
653         packet_list.pop_front();
654       } else {
655         bool first = true;
656         for (auto& result : results) {
657           RTC_DCHECK(result.frame);
658           RTC_DCHECK_GE(result.priority, 0);
659           is_dtx = is_dtx || result.frame->IsDtxPacket();
660           if (first) {
661             // Re-use the node and move it to parsed_packet_list.
662             packet_list.front() = packet_from_result(result);
663             parsed_packet_list.splice(parsed_packet_list.end(), packet_list,
664                                       packet_list.begin());
665             first = false;
666           } else {
667             parsed_packet_list.push_back(packet_from_result(result));
668           }
669         }
670       }
671     }
672   }
673 
674   // Calculate the number of primary (non-FEC/RED) packets.
675   const size_t number_of_primary_packets = std::count_if(
676       parsed_packet_list.begin(), parsed_packet_list.end(),
677       [](const Packet& in) { return in.priority.codec_level == 0; });
678   if (number_of_primary_packets < parsed_packet_list.size()) {
679     stats_->SecondaryPacketsReceived(parsed_packet_list.size() -
680                                      number_of_primary_packets);
681   }
682 
683   // Insert packets in buffer.
684   const int target_level_ms = controller_->TargetLevelMs();
685   const int ret = packet_buffer_->InsertPacketList(
686       &parsed_packet_list, *decoder_database_, &current_rtp_payload_type_,
687       &current_cng_rtp_payload_type_, stats_.get(), decoder_frame_length_,
688       last_output_sample_rate_hz_, target_level_ms);
689   bool buffer_flush_occured = false;
690   if (ret == PacketBuffer::kFlushed) {
691     // Reset DSP timestamp etc. if packet buffer flushed.
692     new_codec_ = true;
693     update_sample_rate_and_channels = true;
694     buffer_flush_occured = true;
695   } else if (ret == PacketBuffer::kPartialFlush) {
696     // Forward sync buffer timestamp
697     timestamp_ = packet_buffer_->PeekNextPacket()->timestamp;
698     sync_buffer_->IncreaseEndTimestamp(timestamp_ -
699                                        sync_buffer_->end_timestamp());
700     buffer_flush_occured = true;
701   } else if (ret != PacketBuffer::kOK) {
702     return kOtherError;
703   }
704 
705   if (first_packet_) {
706     first_packet_ = false;
707     // Update the codec on the next GetAudio call.
708     new_codec_ = true;
709   }
710 
711   if (current_rtp_payload_type_) {
712     RTC_DCHECK(decoder_database_->GetDecoderInfo(*current_rtp_payload_type_))
713         << "Payload type " << static_cast<int>(*current_rtp_payload_type_)
714         << " is unknown where it shouldn't be";
715   }
716 
717   if (update_sample_rate_and_channels && !packet_buffer_->Empty()) {
718     // We do not use `current_rtp_payload_type_` to |set payload_type|, but
719     // get the next RTP header from `packet_buffer_` to obtain the payload type.
720     // The reason for it is the following corner case. If NetEq receives a
721     // CNG packet with a sample rate different than the current CNG then it
722     // flushes its buffer, assuming send codec must have been changed. However,
723     // payload type of the hypothetically new send codec is not known.
724     const Packet* next_packet = packet_buffer_->PeekNextPacket();
725     RTC_DCHECK(next_packet);
726     const int payload_type = next_packet->payload_type;
727     size_t channels = 1;
728     if (!decoder_database_->IsComfortNoise(payload_type)) {
729       AudioDecoder* decoder = decoder_database_->GetDecoder(payload_type);
730       RTC_DCHECK(decoder);  // Payloads are already checked to be valid.
731       channels = decoder->Channels();
732     }
733     const DecoderDatabase::DecoderInfo* decoder_info =
734         decoder_database_->GetDecoderInfo(payload_type);
735     RTC_DCHECK(decoder_info);
736     if (decoder_info->SampleRateHz() != fs_hz_ ||
737         channels != algorithm_buffer_->Channels()) {
738       SetSampleRateAndChannels(decoder_info->SampleRateHz(), channels);
739     }
740     if (nack_enabled_) {
741       RTC_DCHECK(nack_);
742       // Update the sample rate even if the rate is not new, because of Reset().
743       nack_->UpdateSampleRate(fs_hz_);
744     }
745   }
746 
747   const DecoderDatabase::DecoderInfo* dec_info =
748       decoder_database_->GetDecoderInfo(main_payload_type);
749   RTC_DCHECK(dec_info);  // Already checked that the payload type is known.
750 
751   NetEqController::PacketArrivedInfo info;
752   info.is_cng_or_dtmf = dec_info->IsComfortNoise() || dec_info->IsDtmf();
753   info.packet_length_samples =
754       number_of_primary_packets * decoder_frame_length_;
755   info.main_timestamp = main_timestamp;
756   info.main_sequence_number = main_sequence_number;
757   info.is_dtx = is_dtx;
758   info.buffer_flush = buffer_flush_occured;
759 
760   const bool should_update_stats = !new_codec_;
761   auto relative_delay =
762       controller_->PacketArrived(fs_hz_, should_update_stats, info);
763   if (relative_delay) {
764     stats_->RelativePacketArrivalDelay(relative_delay.value());
765   }
766   return 0;
767 }
768 
GetAudioInternal(AudioFrame * audio_frame,bool * muted,absl::optional<Operation> action_override)769 int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame,
770                                 bool* muted,
771                                 absl::optional<Operation> action_override) {
772   PacketList packet_list;
773   DtmfEvent dtmf_event;
774   Operation operation;
775   bool play_dtmf;
776   *muted = false;
777   last_decoded_packet_infos_.clear();
778   tick_timer_->Increment();
779   stats_->IncreaseCounter(output_size_samples_, fs_hz_);
780   const auto lifetime_stats = stats_->GetLifetimeStatistics();
781   expand_uma_logger_.UpdateSampleCounter(lifetime_stats.concealed_samples,
782                                          fs_hz_);
783   speech_expand_uma_logger_.UpdateSampleCounter(
784       lifetime_stats.concealed_samples -
785           lifetime_stats.silent_concealed_samples,
786       fs_hz_);
787 
788   // Check for muted state.
789   if (enable_muted_state_ && expand_->Muted() && packet_buffer_->Empty()) {
790     RTC_DCHECK_EQ(last_mode_, Mode::kExpand);
791     audio_frame->Reset();
792     RTC_DCHECK(audio_frame->muted());  // Reset() should mute the frame.
793     playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
794     audio_frame->sample_rate_hz_ = fs_hz_;
795     // Make sure the total number of samples fits in the AudioFrame.
796     if (output_size_samples_ * sync_buffer_->Channels() >
797         AudioFrame::kMaxDataSizeSamples) {
798       return kSampleUnderrun;
799     }
800     audio_frame->samples_per_channel_ = output_size_samples_;
801     audio_frame->timestamp_ =
802         first_packet_
803             ? 0
804             : timestamp_scaler_->ToExternal(playout_timestamp_) -
805                   static_cast<uint32_t>(audio_frame->samples_per_channel_);
806     audio_frame->num_channels_ = sync_buffer_->Channels();
807     stats_->ExpandedNoiseSamples(output_size_samples_, false);
808     controller_->NotifyMutedState();
809     *muted = true;
810     return 0;
811   }
812   int return_value = GetDecision(&operation, &packet_list, &dtmf_event,
813                                  &play_dtmf, action_override);
814   if (return_value != 0) {
815     last_mode_ = Mode::kError;
816     return return_value;
817   }
818 
819   AudioDecoder::SpeechType speech_type;
820   int length = 0;
821   const size_t start_num_packets = packet_list.size();
822   int decode_return_value =
823       Decode(&packet_list, &operation, &length, &speech_type);
824 
825   RTC_DCHECK(vad_.get());
826   bool sid_frame_available =
827       (operation == Operation::kRfc3389Cng && !packet_list.empty());
828   vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
829                sid_frame_available, fs_hz_);
830 
831   // This is the criterion that we did decode some data through the speech
832   // decoder, and the operation resulted in comfort noise.
833   const bool codec_internal_sid_frame =
834       (speech_type == AudioDecoder::kComfortNoise &&
835        start_num_packets > packet_list.size());
836 
837   if (sid_frame_available || codec_internal_sid_frame) {
838     // Start a new stopwatch since we are decoding a new CNG packet.
839     generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
840   }
841 
842   algorithm_buffer_->Clear();
843   switch (operation) {
844     case Operation::kNormal: {
845       DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf);
846       if (length > 0) {
847         stats_->DecodedOutputPlayed();
848       }
849       break;
850     }
851     case Operation::kMerge: {
852       DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf);
853       break;
854     }
855     case Operation::kExpand: {
856       RTC_DCHECK_EQ(return_value, 0);
857       if (!current_rtp_payload_type_ || !DoCodecPlc()) {
858         return_value = DoExpand(play_dtmf);
859       }
860       RTC_DCHECK_GE(sync_buffer_->FutureLength() - expand_->overlap_length(),
861                     output_size_samples_);
862       break;
863     }
864     case Operation::kAccelerate:
865     case Operation::kFastAccelerate: {
866       const bool fast_accelerate =
867           enable_fast_accelerate_ && (operation == Operation::kFastAccelerate);
868       return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type,
869                                   play_dtmf, fast_accelerate);
870       break;
871     }
872     case Operation::kPreemptiveExpand: {
873       return_value = DoPreemptiveExpand(decoded_buffer_.get(), length,
874                                         speech_type, play_dtmf);
875       break;
876     }
877     case Operation::kRfc3389Cng:
878     case Operation::kRfc3389CngNoPacket: {
879       return_value = DoRfc3389Cng(&packet_list, play_dtmf);
880       break;
881     }
882     case Operation::kCodecInternalCng: {
883       // This handles the case when there is no transmission and the decoder
884       // should produce internal comfort noise.
885       // TODO(hlundin): Write test for codec-internal CNG.
886       DoCodecInternalCng(decoded_buffer_.get(), length);
887       break;
888     }
889     case Operation::kDtmf: {
890       // TODO(hlundin): Write test for this.
891       return_value = DoDtmf(dtmf_event, &play_dtmf);
892       break;
893     }
894     case Operation::kUndefined: {
895       RTC_LOG(LS_ERROR) << "Invalid operation kUndefined.";
896       RTC_DCHECK_NOTREACHED();  // This should not happen.
897       last_mode_ = Mode::kError;
898       return kInvalidOperation;
899     }
900   }  // End of switch.
901   last_operation_ = operation;
902   if (return_value < 0) {
903     return return_value;
904   }
905 
906   if (last_mode_ != Mode::kRfc3389Cng) {
907     comfort_noise_->Reset();
908   }
909 
910   // We treat it as if all packets referenced to by `last_decoded_packet_infos_`
911   // were mashed together when creating the samples in `algorithm_buffer_`.
912   RtpPacketInfos packet_infos(last_decoded_packet_infos_);
913 
914   // Copy samples from `algorithm_buffer_` to `sync_buffer_`.
915   //
916   // TODO(bugs.webrtc.org/10757):
917   //   We would in the future also like to pass `packet_infos` so that we can do
918   //   sample-perfect tracking of that information across `sync_buffer_`.
919   sync_buffer_->PushBack(*algorithm_buffer_);
920 
921   // Extract data from `sync_buffer_` to `output`.
922   size_t num_output_samples_per_channel = output_size_samples_;
923   size_t num_output_samples = output_size_samples_ * sync_buffer_->Channels();
924   if (num_output_samples > AudioFrame::kMaxDataSizeSamples) {
925     RTC_LOG(LS_WARNING) << "Output array is too short. "
926                         << AudioFrame::kMaxDataSizeSamples << " < "
927                         << output_size_samples_ << " * "
928                         << sync_buffer_->Channels();
929     num_output_samples = AudioFrame::kMaxDataSizeSamples;
930     num_output_samples_per_channel =
931         AudioFrame::kMaxDataSizeSamples / sync_buffer_->Channels();
932   }
933   sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
934                                         audio_frame);
935   audio_frame->sample_rate_hz_ = fs_hz_;
936   // TODO(bugs.webrtc.org/10757):
937   //   We don't have the ability to properly track individual packets once their
938   //   audio samples have entered `sync_buffer_`. So for now, treat it as if
939   //   `packet_infos` from packets decoded by the current `GetAudioInternal()`
940   //   call were all consumed assembling the current audio frame and the current
941   //   audio frame only.
942   audio_frame->packet_infos_ = std::move(packet_infos);
943   if (sync_buffer_->FutureLength() < expand_->overlap_length()) {
944     // The sync buffer should always contain `overlap_length` samples, but now
945     // too many samples have been extracted. Reinstall the `overlap_length`
946     // lookahead by moving the index.
947     const size_t missing_lookahead_samples =
948         expand_->overlap_length() - sync_buffer_->FutureLength();
949     RTC_DCHECK_GE(sync_buffer_->next_index(), missing_lookahead_samples);
950     sync_buffer_->set_next_index(sync_buffer_->next_index() -
951                                  missing_lookahead_samples);
952   }
953   if (audio_frame->samples_per_channel_ != output_size_samples_) {
954     RTC_LOG(LS_ERROR) << "audio_frame->samples_per_channel_ ("
955                       << audio_frame->samples_per_channel_
956                       << ") != output_size_samples_ (" << output_size_samples_
957                       << ")";
958     // TODO(minyue): treatment of under-run, filling zeros
959     audio_frame->Mute();
960     return kSampleUnderrun;
961   }
962 
963   // Should always have overlap samples left in the `sync_buffer_`.
964   RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
965 
966   // TODO(yujo): For muted frames, this can be a copy rather than an addition.
967   if (play_dtmf) {
968     return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
969                                audio_frame->mutable_data());
970   }
971 
972   // Update the background noise parameters if last operation wrote data
973   // straight from the decoder to the `sync_buffer_`. That is, none of the
974   // operations that modify the signal can be followed by a parameter update.
975   if ((last_mode_ == Mode::kNormal) || (last_mode_ == Mode::kAccelerateFail) ||
976       (last_mode_ == Mode::kPreemptiveExpandFail) ||
977       (last_mode_ == Mode::kRfc3389Cng) ||
978       (last_mode_ == Mode::kCodecInternalCng)) {
979     background_noise_->Update(*sync_buffer_, *vad_.get());
980   }
981 
982   if (operation == Operation::kDtmf) {
983     // DTMF data was written the end of `sync_buffer_`.
984     // Update index to end of DTMF data in `sync_buffer_`.
985     sync_buffer_->set_dtmf_index(sync_buffer_->Size());
986   }
987 
988   if (last_mode_ != Mode::kExpand && last_mode_ != Mode::kCodecPlc) {
989     // If last operation was not expand, calculate the `playout_timestamp_` from
990     // the `sync_buffer_`. However, do not update the `playout_timestamp_` if it
991     // would be moved "backwards".
992     uint32_t temp_timestamp =
993         sync_buffer_->end_timestamp() -
994         static_cast<uint32_t>(sync_buffer_->FutureLength());
995     if (static_cast<int32_t>(temp_timestamp - playout_timestamp_) > 0) {
996       playout_timestamp_ = temp_timestamp;
997     }
998   } else {
999     // Use dead reckoning to estimate the `playout_timestamp_`.
1000     playout_timestamp_ += static_cast<uint32_t>(output_size_samples_);
1001   }
1002   // Set the timestamp in the audio frame to zero before the first packet has
1003   // been inserted. Otherwise, subtract the frame size in samples to get the
1004   // timestamp of the first sample in the frame (playout_timestamp_ is the
1005   // last + 1).
1006   audio_frame->timestamp_ =
1007       first_packet_
1008           ? 0
1009           : timestamp_scaler_->ToExternal(playout_timestamp_) -
1010                 static_cast<uint32_t>(audio_frame->samples_per_channel_);
1011 
1012   if (!(last_mode_ == Mode::kRfc3389Cng ||
1013         last_mode_ == Mode::kCodecInternalCng || last_mode_ == Mode::kExpand ||
1014         last_mode_ == Mode::kCodecPlc)) {
1015     generated_noise_stopwatch_.reset();
1016   }
1017 
1018   if (decode_return_value)
1019     return decode_return_value;
1020   return return_value;
1021 }
1022 
GetDecision(Operation * operation,PacketList * packet_list,DtmfEvent * dtmf_event,bool * play_dtmf,absl::optional<Operation> action_override)1023 int NetEqImpl::GetDecision(Operation* operation,
1024                            PacketList* packet_list,
1025                            DtmfEvent* dtmf_event,
1026                            bool* play_dtmf,
1027                            absl::optional<Operation> action_override) {
1028   // Initialize output variables.
1029   *play_dtmf = false;
1030   *operation = Operation::kUndefined;
1031 
1032   RTC_DCHECK(sync_buffer_.get());
1033   uint32_t end_timestamp = sync_buffer_->end_timestamp();
1034   if (!new_codec_) {
1035     const uint32_t five_seconds_samples = 5 * fs_hz_;
1036     packet_buffer_->DiscardOldPackets(end_timestamp, five_seconds_samples,
1037                                       stats_.get());
1038   }
1039   const Packet* packet = packet_buffer_->PeekNextPacket();
1040 
1041   RTC_DCHECK(!generated_noise_stopwatch_ ||
1042              generated_noise_stopwatch_->ElapsedTicks() >= 1);
1043   uint64_t generated_noise_samples =
1044       generated_noise_stopwatch_ ? (generated_noise_stopwatch_->ElapsedTicks() -
1045                                     1) * output_size_samples_ +
1046                                        controller_->noise_fast_forward()
1047                                  : 0;
1048 
1049   if (controller_->CngRfc3389On() || last_mode_ == Mode::kRfc3389Cng) {
1050     // Because of timestamp peculiarities, we have to "manually" disallow using
1051     // a CNG packet with the same timestamp as the one that was last played.
1052     // This can happen when using redundancy and will cause the timing to shift.
1053     while (packet && decoder_database_->IsComfortNoise(packet->payload_type) &&
1054            (end_timestamp >= packet->timestamp ||
1055             end_timestamp + generated_noise_samples > packet->timestamp)) {
1056       // Don't use this packet, discard it.
1057       if (packet_buffer_->DiscardNextPacket(stats_.get()) !=
1058           PacketBuffer::kOK) {
1059         RTC_DCHECK_NOTREACHED();  // Must be ok by design.
1060       }
1061       // Check buffer again.
1062       if (!new_codec_) {
1063         packet_buffer_->DiscardOldPackets(end_timestamp, 5 * fs_hz_,
1064                                           stats_.get());
1065       }
1066       packet = packet_buffer_->PeekNextPacket();
1067     }
1068   }
1069 
1070   RTC_DCHECK(expand_.get());
1071   const int samples_left = static_cast<int>(sync_buffer_->FutureLength() -
1072                                             expand_->overlap_length());
1073   if (last_mode_ == Mode::kAccelerateSuccess ||
1074       last_mode_ == Mode::kAccelerateLowEnergy ||
1075       last_mode_ == Mode::kPreemptiveExpandSuccess ||
1076       last_mode_ == Mode::kPreemptiveExpandLowEnergy) {
1077     // Subtract (samples_left + output_size_samples_) from sampleMemory.
1078     controller_->AddSampleMemory(
1079         -(samples_left + rtc::dchecked_cast<int>(output_size_samples_)));
1080   }
1081 
1082   // Check if it is time to play a DTMF event.
1083   if (dtmf_buffer_->GetEvent(
1084           static_cast<uint32_t>(end_timestamp + generated_noise_samples),
1085           dtmf_event)) {
1086     *play_dtmf = true;
1087   }
1088 
1089   // Get instruction.
1090   RTC_DCHECK(sync_buffer_.get());
1091   RTC_DCHECK(expand_.get());
1092   generated_noise_samples =
1093       generated_noise_stopwatch_
1094           ? generated_noise_stopwatch_->ElapsedTicks() * output_size_samples_ +
1095                 controller_->noise_fast_forward()
1096           : 0;
1097   NetEqController::NetEqStatus status;
1098   status.packet_buffer_info.dtx_or_cng =
1099       packet_buffer_->ContainsDtxOrCngPacket(decoder_database_.get());
1100   status.packet_buffer_info.num_samples =
1101       packet_buffer_->NumSamplesInBuffer(decoder_frame_length_);
1102   status.packet_buffer_info.span_samples = packet_buffer_->GetSpanSamples(
1103       decoder_frame_length_, last_output_sample_rate_hz_, true);
1104   status.packet_buffer_info.span_samples_no_dtx =
1105       packet_buffer_->GetSpanSamples(decoder_frame_length_,
1106                                      last_output_sample_rate_hz_, false);
1107   status.packet_buffer_info.num_packets = packet_buffer_->NumPacketsInBuffer();
1108   status.target_timestamp = sync_buffer_->end_timestamp();
1109   status.expand_mutefactor = expand_->MuteFactor(0);
1110   status.last_packet_samples = decoder_frame_length_;
1111   status.last_mode = last_mode_;
1112   status.play_dtmf = *play_dtmf;
1113   status.generated_noise_samples = generated_noise_samples;
1114   status.sync_buffer_samples = sync_buffer_->FutureLength();
1115   if (packet) {
1116     status.next_packet = {
1117         packet->timestamp, packet->frame && packet->frame->IsDtxPacket(),
1118         decoder_database_->IsComfortNoise(packet->payload_type)};
1119   }
1120   *operation = controller_->GetDecision(status, &reset_decoder_);
1121 
1122   // Disallow time stretching if this packet is DTX, because such a decision may
1123   // be based on earlier buffer level estimate, as we do not update buffer level
1124   // during DTX. When we have a better way to update buffer level during DTX,
1125   // this can be discarded.
1126   if (packet && packet->frame && packet->frame->IsDtxPacket() &&
1127       (*operation == Operation::kMerge ||
1128        *operation == Operation::kAccelerate ||
1129        *operation == Operation::kFastAccelerate ||
1130        *operation == Operation::kPreemptiveExpand)) {
1131     *operation = Operation::kNormal;
1132   }
1133 
1134   if (action_override) {
1135     // Use the provided action instead of the decision NetEq decided on.
1136     *operation = *action_override;
1137   }
1138   // Check if we already have enough samples in the `sync_buffer_`. If so,
1139   // change decision to normal, unless the decision was merge, accelerate, or
1140   // preemptive expand.
1141   if (samples_left >= rtc::dchecked_cast<int>(output_size_samples_) &&
1142       *operation != Operation::kMerge && *operation != Operation::kAccelerate &&
1143       *operation != Operation::kFastAccelerate &&
1144       *operation != Operation::kPreemptiveExpand) {
1145     *operation = Operation::kNormal;
1146     return 0;
1147   }
1148 
1149   controller_->ExpandDecision(*operation);
1150   if ((last_mode_ == Mode::kCodecPlc) && (*operation != Operation::kExpand)) {
1151     // Getting out of the PLC expand mode, reporting interruptions.
1152     // NetEq PLC reports this metrics in expand.cc
1153     stats_->EndExpandEvent(fs_hz_);
1154   }
1155 
1156   // Check conditions for reset.
1157   if (new_codec_ || *operation == Operation::kUndefined) {
1158     // The only valid reason to get kUndefined is that new_codec_ is set.
1159     RTC_DCHECK(new_codec_);
1160     if (*play_dtmf && !packet) {
1161       timestamp_ = dtmf_event->timestamp;
1162     } else {
1163       if (!packet) {
1164         RTC_LOG(LS_ERROR) << "Packet missing where it shouldn't.";
1165         return -1;
1166       }
1167       timestamp_ = packet->timestamp;
1168       if (*operation == Operation::kRfc3389CngNoPacket &&
1169           decoder_database_->IsComfortNoise(packet->payload_type)) {
1170         // Change decision to CNG packet, since we do have a CNG packet, but it
1171         // was considered too early to use. Now, use it anyway.
1172         *operation = Operation::kRfc3389Cng;
1173       } else if (*operation != Operation::kRfc3389Cng) {
1174         *operation = Operation::kNormal;
1175       }
1176     }
1177     // Adjust `sync_buffer_` timestamp before setting `end_timestamp` to the
1178     // new value.
1179     sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp);
1180     end_timestamp = timestamp_;
1181     new_codec_ = false;
1182     controller_->SoftReset();
1183     stats_->ResetMcu();
1184   }
1185 
1186   size_t required_samples = output_size_samples_;
1187   const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_);
1188   const size_t samples_20_ms = 2 * samples_10_ms;
1189   const size_t samples_30_ms = 3 * samples_10_ms;
1190 
1191   switch (*operation) {
1192     case Operation::kExpand: {
1193       timestamp_ = end_timestamp;
1194       return 0;
1195     }
1196     case Operation::kRfc3389CngNoPacket:
1197     case Operation::kCodecInternalCng: {
1198       return 0;
1199     }
1200     case Operation::kDtmf: {
1201       // TODO(hlundin): Write test for this.
1202       // Update timestamp.
1203       timestamp_ = end_timestamp;
1204       const uint64_t generated_noise_samples =
1205           generated_noise_stopwatch_
1206               ? generated_noise_stopwatch_->ElapsedTicks() *
1207                         output_size_samples_ +
1208                     controller_->noise_fast_forward()
1209               : 0;
1210       if (generated_noise_samples > 0 && last_mode_ != Mode::kDtmf) {
1211         // Make a jump in timestamp due to the recently played comfort noise.
1212         uint32_t timestamp_jump =
1213             static_cast<uint32_t>(generated_noise_samples);
1214         sync_buffer_->IncreaseEndTimestamp(timestamp_jump);
1215         timestamp_ += timestamp_jump;
1216       }
1217       return 0;
1218     }
1219     case Operation::kAccelerate:
1220     case Operation::kFastAccelerate: {
1221       // In order to do an accelerate we need at least 30 ms of audio data.
1222       if (samples_left >= static_cast<int>(samples_30_ms)) {
1223         // Already have enough data, so we do not need to extract any more.
1224         controller_->set_sample_memory(samples_left);
1225         controller_->set_prev_time_scale(true);
1226         return 0;
1227       } else if (samples_left >= static_cast<int>(samples_10_ms) &&
1228                  decoder_frame_length_ >= samples_30_ms) {
1229         // Avoid decoding more data as it might overflow the playout buffer.
1230         *operation = Operation::kNormal;
1231         return 0;
1232       } else if (samples_left < static_cast<int>(samples_20_ms) &&
1233                  decoder_frame_length_ < samples_30_ms) {
1234         // Build up decoded data by decoding at least 20 ms of audio data. Do
1235         // not perform accelerate yet, but wait until we only need to do one
1236         // decoding.
1237         required_samples = 2 * output_size_samples_;
1238         *operation = Operation::kNormal;
1239       }
1240       // If none of the above is true, we have one of two possible situations:
1241       // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or
1242       // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms.
1243       // In either case, we move on with the accelerate decision, and decode one
1244       // frame now.
1245       break;
1246     }
1247     case Operation::kPreemptiveExpand: {
1248       // In order to do a preemptive expand we need at least 30 ms of decoded
1249       // audio data.
1250       if ((samples_left >= static_cast<int>(samples_30_ms)) ||
1251           (samples_left >= static_cast<int>(samples_10_ms) &&
1252            decoder_frame_length_ >= samples_30_ms)) {
1253         // Already have enough data, so we do not need to extract any more.
1254         // Or, avoid decoding more data as it might overflow the playout buffer.
1255         // Still try preemptive expand, though.
1256         controller_->set_sample_memory(samples_left);
1257         controller_->set_prev_time_scale(true);
1258         return 0;
1259       }
1260       if (samples_left < static_cast<int>(samples_20_ms) &&
1261           decoder_frame_length_ < samples_30_ms) {
1262         // Build up decoded data by decoding at least 20 ms of audio data.
1263         // Still try to perform preemptive expand.
1264         required_samples = 2 * output_size_samples_;
1265       }
1266       // Move on with the preemptive expand decision.
1267       break;
1268     }
1269     case Operation::kMerge: {
1270       required_samples =
1271           std::max(merge_->RequiredFutureSamples(), required_samples);
1272       break;
1273     }
1274     default: {
1275       // Do nothing.
1276     }
1277   }
1278 
1279   // Get packets from buffer.
1280   int extracted_samples = 0;
1281   if (packet) {
1282     sync_buffer_->IncreaseEndTimestamp(packet->timestamp - end_timestamp);
1283 
1284     if (*operation != Operation::kRfc3389Cng) {
1285       // We are about to decode and use a non-CNG packet.
1286       controller_->SetCngOff();
1287     }
1288 
1289     extracted_samples = ExtractPackets(required_samples, packet_list);
1290     if (extracted_samples < 0) {
1291       return kPacketBufferCorruption;
1292     }
1293   }
1294 
1295   if (*operation == Operation::kAccelerate ||
1296       *operation == Operation::kFastAccelerate ||
1297       *operation == Operation::kPreemptiveExpand) {
1298     controller_->set_sample_memory(samples_left + extracted_samples);
1299     controller_->set_prev_time_scale(true);
1300   }
1301 
1302   if (*operation == Operation::kAccelerate ||
1303       *operation == Operation::kFastAccelerate) {
1304     // Check that we have enough data (30ms) to do accelerate.
1305     if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) {
1306       // TODO(hlundin): Write test for this.
1307       // Not enough, do normal operation instead.
1308       *operation = Operation::kNormal;
1309     }
1310   }
1311 
1312   timestamp_ = sync_buffer_->end_timestamp();
1313   return 0;
1314 }
1315 
Decode(PacketList * packet_list,Operation * operation,int * decoded_length,AudioDecoder::SpeechType * speech_type)1316 int NetEqImpl::Decode(PacketList* packet_list,
1317                       Operation* operation,
1318                       int* decoded_length,
1319                       AudioDecoder::SpeechType* speech_type) {
1320   *speech_type = AudioDecoder::kSpeech;
1321 
1322   // When packet_list is empty, we may be in kCodecInternalCng mode, and for
1323   // that we use current active decoder.
1324   AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1325 
1326   if (!packet_list->empty()) {
1327     const Packet& packet = packet_list->front();
1328     uint8_t payload_type = packet.payload_type;
1329     if (!decoder_database_->IsComfortNoise(payload_type)) {
1330       decoder = decoder_database_->GetDecoder(payload_type);
1331       RTC_DCHECK(decoder);
1332       if (!decoder) {
1333         RTC_LOG(LS_WARNING)
1334             << "Unknown payload type " << static_cast<int>(payload_type);
1335         packet_list->clear();
1336         return kDecoderNotFound;
1337       }
1338       bool decoder_changed;
1339       decoder_database_->SetActiveDecoder(payload_type, &decoder_changed);
1340       if (decoder_changed) {
1341         // We have a new decoder. Re-init some values.
1342         const DecoderDatabase::DecoderInfo* decoder_info =
1343             decoder_database_->GetDecoderInfo(payload_type);
1344         RTC_DCHECK(decoder_info);
1345         if (!decoder_info) {
1346           RTC_LOG(LS_WARNING)
1347               << "Unknown payload type " << static_cast<int>(payload_type);
1348           packet_list->clear();
1349           return kDecoderNotFound;
1350         }
1351         // If sampling rate or number of channels has changed, we need to make
1352         // a reset.
1353         if (decoder_info->SampleRateHz() != fs_hz_ ||
1354             decoder->Channels() != algorithm_buffer_->Channels()) {
1355           // TODO(tlegrand): Add unittest to cover this event.
1356           SetSampleRateAndChannels(decoder_info->SampleRateHz(),
1357                                    decoder->Channels());
1358         }
1359         sync_buffer_->set_end_timestamp(timestamp_);
1360         playout_timestamp_ = timestamp_;
1361       }
1362     }
1363   }
1364 
1365   if (reset_decoder_) {
1366     // TODO(hlundin): Write test for this.
1367     if (decoder)
1368       decoder->Reset();
1369 
1370     // Reset comfort noise decoder.
1371     ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
1372     if (cng_decoder)
1373       cng_decoder->Reset();
1374 
1375     reset_decoder_ = false;
1376   }
1377 
1378   *decoded_length = 0;
1379   // Update codec-internal PLC state.
1380   if ((*operation == Operation::kMerge) && decoder && decoder->HasDecodePlc()) {
1381     decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]);
1382   }
1383 
1384   int return_value;
1385   if (*operation == Operation::kCodecInternalCng) {
1386     RTC_DCHECK(packet_list->empty());
1387     return_value = DecodeCng(decoder, decoded_length, speech_type);
1388   } else {
1389     return_value = DecodeLoop(packet_list, *operation, decoder, decoded_length,
1390                               speech_type);
1391   }
1392 
1393   if (*decoded_length < 0) {
1394     // Error returned from the decoder.
1395     *decoded_length = 0;
1396     sync_buffer_->IncreaseEndTimestamp(
1397         static_cast<uint32_t>(decoder_frame_length_));
1398     int error_code = 0;
1399     if (decoder)
1400       error_code = decoder->ErrorCode();
1401     if (error_code != 0) {
1402       // Got some error code from the decoder.
1403       return_value = kDecoderErrorCode;
1404       RTC_LOG(LS_WARNING) << "Decoder returned error code: " << error_code;
1405     } else {
1406       // Decoder does not implement error codes. Return generic error.
1407       return_value = kOtherDecoderError;
1408       RTC_LOG(LS_WARNING) << "Decoder error (no error code)";
1409     }
1410     *operation = Operation::kExpand;  // Do expansion to get data instead.
1411   }
1412   if (*speech_type != AudioDecoder::kComfortNoise) {
1413     // Don't increment timestamp if codec returned CNG speech type
1414     // since in this case, the we will increment the CNGplayedTS counter.
1415     // Increase with number of samples per channel.
1416     RTC_DCHECK(*decoded_length == 0 ||
1417                (decoder && decoder->Channels() == sync_buffer_->Channels()));
1418     sync_buffer_->IncreaseEndTimestamp(
1419         *decoded_length / static_cast<int>(sync_buffer_->Channels()));
1420   }
1421   return return_value;
1422 }
1423 
DecodeCng(AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1424 int NetEqImpl::DecodeCng(AudioDecoder* decoder,
1425                          int* decoded_length,
1426                          AudioDecoder::SpeechType* speech_type) {
1427   if (!decoder) {
1428     // This happens when active decoder is not defined.
1429     *decoded_length = -1;
1430     return 0;
1431   }
1432 
1433   while (*decoded_length < rtc::dchecked_cast<int>(output_size_samples_)) {
1434     const int length = decoder->Decode(
1435         nullptr, 0, fs_hz_,
1436         (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t),
1437         &decoded_buffer_[*decoded_length], speech_type);
1438     if (length > 0) {
1439       *decoded_length += length;
1440     } else {
1441       // Error.
1442       RTC_LOG(LS_WARNING) << "Failed to decode CNG";
1443       *decoded_length = -1;
1444       break;
1445     }
1446     if (*decoded_length > static_cast<int>(decoded_buffer_length_)) {
1447       // Guard against overflow.
1448       RTC_LOG(LS_WARNING) << "Decoded too much CNG.";
1449       return kDecodedTooMuch;
1450     }
1451   }
1452   stats_->GeneratedNoiseSamples(*decoded_length);
1453   return 0;
1454 }
1455 
DecodeLoop(PacketList * packet_list,const Operation & operation,AudioDecoder * decoder,int * decoded_length,AudioDecoder::SpeechType * speech_type)1456 int NetEqImpl::DecodeLoop(PacketList* packet_list,
1457                           const Operation& operation,
1458                           AudioDecoder* decoder,
1459                           int* decoded_length,
1460                           AudioDecoder::SpeechType* speech_type) {
1461   RTC_DCHECK(last_decoded_packet_infos_.empty());
1462 
1463   // Do decoding.
1464   while (!packet_list->empty() && !decoder_database_->IsComfortNoise(
1465                                       packet_list->front().payload_type)) {
1466     RTC_DCHECK(decoder);  // At this point, we must have a decoder object.
1467     // The number of channels in the `sync_buffer_` should be the same as the
1468     // number decoder channels.
1469     RTC_DCHECK_EQ(sync_buffer_->Channels(), decoder->Channels());
1470     RTC_DCHECK_GE(decoded_buffer_length_, kMaxFrameSize * decoder->Channels());
1471     RTC_DCHECK(operation == Operation::kNormal ||
1472                operation == Operation::kAccelerate ||
1473                operation == Operation::kFastAccelerate ||
1474                operation == Operation::kMerge ||
1475                operation == Operation::kPreemptiveExpand);
1476 
1477     auto opt_result = packet_list->front().frame->Decode(
1478         rtc::ArrayView<int16_t>(&decoded_buffer_[*decoded_length],
1479                                 decoded_buffer_length_ - *decoded_length));
1480     last_decoded_packet_infos_.push_back(
1481         std::move(packet_list->front().packet_info));
1482     packet_list->pop_front();
1483     if (opt_result) {
1484       const auto& result = *opt_result;
1485       *speech_type = result.speech_type;
1486       if (result.num_decoded_samples > 0) {
1487         *decoded_length += rtc::dchecked_cast<int>(result.num_decoded_samples);
1488         // Update `decoder_frame_length_` with number of samples per channel.
1489         decoder_frame_length_ =
1490             result.num_decoded_samples / decoder->Channels();
1491       }
1492     } else {
1493       // Error.
1494       // TODO(ossu): What to put here?
1495       RTC_LOG(LS_WARNING) << "Decode error";
1496       *decoded_length = -1;
1497       last_decoded_packet_infos_.clear();
1498       packet_list->clear();
1499       break;
1500     }
1501     if (*decoded_length > rtc::dchecked_cast<int>(decoded_buffer_length_)) {
1502       // Guard against overflow.
1503       RTC_LOG(LS_WARNING) << "Decoded too much.";
1504       packet_list->clear();
1505       return kDecodedTooMuch;
1506     }
1507   }  // End of decode loop.
1508 
1509   // If the list is not empty at this point, either a decoding error terminated
1510   // the while-loop, or list must hold exactly one CNG packet.
1511   RTC_DCHECK(
1512       packet_list->empty() || *decoded_length < 0 ||
1513       (packet_list->size() == 1 &&
1514        decoder_database_->IsComfortNoise(packet_list->front().payload_type)));
1515   return 0;
1516 }
1517 
DoNormal(const int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1518 void NetEqImpl::DoNormal(const int16_t* decoded_buffer,
1519                          size_t decoded_length,
1520                          AudioDecoder::SpeechType speech_type,
1521                          bool play_dtmf) {
1522   RTC_DCHECK(normal_.get());
1523   normal_->Process(decoded_buffer, decoded_length, last_mode_,
1524                    algorithm_buffer_.get());
1525   if (decoded_length != 0) {
1526     last_mode_ = Mode::kNormal;
1527   }
1528 
1529   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1530   if ((speech_type == AudioDecoder::kComfortNoise) ||
1531       ((last_mode_ == Mode::kCodecInternalCng) && (decoded_length == 0))) {
1532     // TODO(hlundin): Remove second part of || statement above.
1533     last_mode_ = Mode::kCodecInternalCng;
1534   }
1535 
1536   if (!play_dtmf) {
1537     dtmf_tone_generator_->Reset();
1538   }
1539 }
1540 
DoMerge(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1541 void NetEqImpl::DoMerge(int16_t* decoded_buffer,
1542                         size_t decoded_length,
1543                         AudioDecoder::SpeechType speech_type,
1544                         bool play_dtmf) {
1545   RTC_DCHECK(merge_.get());
1546   size_t new_length =
1547       merge_->Process(decoded_buffer, decoded_length, algorithm_buffer_.get());
1548   // Correction can be negative.
1549   int expand_length_correction =
1550       rtc::dchecked_cast<int>(new_length) -
1551       rtc::dchecked_cast<int>(decoded_length / algorithm_buffer_->Channels());
1552 
1553   // Update in-call and post-call statistics.
1554   if (expand_->MuteFactor(0) == 0) {
1555     // Expand generates only noise.
1556     stats_->ExpandedNoiseSamplesCorrection(expand_length_correction);
1557   } else {
1558     // Expansion generates more than only noise.
1559     stats_->ExpandedVoiceSamplesCorrection(expand_length_correction);
1560   }
1561 
1562   last_mode_ = Mode::kMerge;
1563   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1564   if (speech_type == AudioDecoder::kComfortNoise) {
1565     last_mode_ = Mode::kCodecInternalCng;
1566   }
1567   expand_->Reset();
1568   if (!play_dtmf) {
1569     dtmf_tone_generator_->Reset();
1570   }
1571 }
1572 
DoCodecPlc()1573 bool NetEqImpl::DoCodecPlc() {
1574   AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
1575   if (!decoder) {
1576     return false;
1577   }
1578   const size_t channels = algorithm_buffer_->Channels();
1579   const size_t requested_samples_per_channel =
1580       output_size_samples_ -
1581       (sync_buffer_->FutureLength() - expand_->overlap_length());
1582   concealment_audio_.Clear();
1583   decoder->GeneratePlc(requested_samples_per_channel, &concealment_audio_);
1584   if (concealment_audio_.empty()) {
1585     // Nothing produced. Resort to regular expand.
1586     return false;
1587   }
1588   RTC_CHECK_GE(concealment_audio_.size(),
1589                requested_samples_per_channel * channels);
1590   sync_buffer_->PushBackInterleaved(concealment_audio_);
1591   RTC_DCHECK_NE(algorithm_buffer_->Channels(), 0);
1592   const size_t concealed_samples_per_channel =
1593       concealment_audio_.size() / channels;
1594 
1595   // Update in-call and post-call statistics.
1596   const bool is_new_concealment_event = (last_mode_ != Mode::kCodecPlc);
1597   if (std::all_of(concealment_audio_.cbegin(), concealment_audio_.cend(),
1598                   [](int16_t i) { return i == 0; })) {
1599     // Expand operation generates only noise.
1600     stats_->ExpandedNoiseSamples(concealed_samples_per_channel,
1601                                  is_new_concealment_event);
1602   } else {
1603     // Expand operation generates more than only noise.
1604     stats_->ExpandedVoiceSamples(concealed_samples_per_channel,
1605                                  is_new_concealment_event);
1606   }
1607   last_mode_ = Mode::kCodecPlc;
1608   if (!generated_noise_stopwatch_) {
1609     // Start a new stopwatch since we may be covering for a lost CNG packet.
1610     generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
1611   }
1612   return true;
1613 }
1614 
DoExpand(bool play_dtmf)1615 int NetEqImpl::DoExpand(bool play_dtmf) {
1616   while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
1617          output_size_samples_) {
1618     algorithm_buffer_->Clear();
1619     int return_value = expand_->Process(algorithm_buffer_.get());
1620     size_t length = algorithm_buffer_->Size();
1621     bool is_new_concealment_event = (last_mode_ != Mode::kExpand);
1622 
1623     // Update in-call and post-call statistics.
1624     if (expand_->MuteFactor(0) == 0) {
1625       // Expand operation generates only noise.
1626       stats_->ExpandedNoiseSamples(length, is_new_concealment_event);
1627     } else {
1628       // Expand operation generates more than only noise.
1629       stats_->ExpandedVoiceSamples(length, is_new_concealment_event);
1630     }
1631 
1632     last_mode_ = Mode::kExpand;
1633 
1634     if (return_value < 0) {
1635       return return_value;
1636     }
1637 
1638     sync_buffer_->PushBack(*algorithm_buffer_);
1639     algorithm_buffer_->Clear();
1640   }
1641   if (!play_dtmf) {
1642     dtmf_tone_generator_->Reset();
1643   }
1644 
1645   if (!generated_noise_stopwatch_) {
1646     // Start a new stopwatch since we may be covering for a lost CNG packet.
1647     generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch();
1648   }
1649 
1650   return 0;
1651 }
1652 
DoAccelerate(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf,bool fast_accelerate)1653 int NetEqImpl::DoAccelerate(int16_t* decoded_buffer,
1654                             size_t decoded_length,
1655                             AudioDecoder::SpeechType speech_type,
1656                             bool play_dtmf,
1657                             bool fast_accelerate) {
1658   const size_t required_samples =
1659       static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
1660   size_t borrowed_samples_per_channel = 0;
1661   size_t num_channels = algorithm_buffer_->Channels();
1662   size_t decoded_length_per_channel = decoded_length / num_channels;
1663   if (decoded_length_per_channel < required_samples) {
1664     // Must move data from the `sync_buffer_` in order to get 30 ms.
1665     borrowed_samples_per_channel =
1666         static_cast<int>(required_samples - decoded_length_per_channel);
1667     memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1668             decoded_buffer, sizeof(int16_t) * decoded_length);
1669     sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1670                                          decoded_buffer);
1671     decoded_length = required_samples * num_channels;
1672   }
1673 
1674   size_t samples_removed = 0;
1675   Accelerate::ReturnCodes return_code =
1676       accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate,
1677                            algorithm_buffer_.get(), &samples_removed);
1678   stats_->AcceleratedSamples(samples_removed);
1679   switch (return_code) {
1680     case Accelerate::kSuccess:
1681       last_mode_ = Mode::kAccelerateSuccess;
1682       break;
1683     case Accelerate::kSuccessLowEnergy:
1684       last_mode_ = Mode::kAccelerateLowEnergy;
1685       break;
1686     case Accelerate::kNoStretch:
1687       last_mode_ = Mode::kAccelerateFail;
1688       break;
1689     case Accelerate::kError:
1690       // TODO(hlundin): Map to Modes::kError instead?
1691       last_mode_ = Mode::kAccelerateFail;
1692       return kAccelerateError;
1693   }
1694 
1695   if (borrowed_samples_per_channel > 0) {
1696     // Copy borrowed samples back to the `sync_buffer_`.
1697     size_t length = algorithm_buffer_->Size();
1698     if (length < borrowed_samples_per_channel) {
1699       // This destroys the beginning of the buffer, but will not cause any
1700       // problems.
1701       sync_buffer_->ReplaceAtIndex(
1702           *algorithm_buffer_,
1703           sync_buffer_->Size() - borrowed_samples_per_channel);
1704       sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length);
1705       algorithm_buffer_->PopFront(length);
1706       RTC_DCHECK(algorithm_buffer_->Empty());
1707     } else {
1708       sync_buffer_->ReplaceAtIndex(
1709           *algorithm_buffer_, borrowed_samples_per_channel,
1710           sync_buffer_->Size() - borrowed_samples_per_channel);
1711       algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1712     }
1713   }
1714 
1715   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1716   if (speech_type == AudioDecoder::kComfortNoise) {
1717     last_mode_ = Mode::kCodecInternalCng;
1718   }
1719   if (!play_dtmf) {
1720     dtmf_tone_generator_->Reset();
1721   }
1722   expand_->Reset();
1723   return 0;
1724 }
1725 
DoPreemptiveExpand(int16_t * decoded_buffer,size_t decoded_length,AudioDecoder::SpeechType speech_type,bool play_dtmf)1726 int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer,
1727                                   size_t decoded_length,
1728                                   AudioDecoder::SpeechType speech_type,
1729                                   bool play_dtmf) {
1730   const size_t required_samples =
1731       static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
1732   size_t num_channels = algorithm_buffer_->Channels();
1733   size_t borrowed_samples_per_channel = 0;
1734   size_t old_borrowed_samples_per_channel = 0;
1735   size_t decoded_length_per_channel = decoded_length / num_channels;
1736   if (decoded_length_per_channel < required_samples) {
1737     // Must move data from the `sync_buffer_` in order to get 30 ms.
1738     borrowed_samples_per_channel =
1739         required_samples - decoded_length_per_channel;
1740     // Calculate how many of these were already played out.
1741     old_borrowed_samples_per_channel =
1742         (borrowed_samples_per_channel > sync_buffer_->FutureLength())
1743             ? (borrowed_samples_per_channel - sync_buffer_->FutureLength())
1744             : 0;
1745     memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
1746             decoded_buffer, sizeof(int16_t) * decoded_length);
1747     sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel,
1748                                          decoded_buffer);
1749     decoded_length = required_samples * num_channels;
1750   }
1751 
1752   size_t samples_added = 0;
1753   PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process(
1754       decoded_buffer, decoded_length, old_borrowed_samples_per_channel,
1755       algorithm_buffer_.get(), &samples_added);
1756   stats_->PreemptiveExpandedSamples(samples_added);
1757   switch (return_code) {
1758     case PreemptiveExpand::kSuccess:
1759       last_mode_ = Mode::kPreemptiveExpandSuccess;
1760       break;
1761     case PreemptiveExpand::kSuccessLowEnergy:
1762       last_mode_ = Mode::kPreemptiveExpandLowEnergy;
1763       break;
1764     case PreemptiveExpand::kNoStretch:
1765       last_mode_ = Mode::kPreemptiveExpandFail;
1766       break;
1767     case PreemptiveExpand::kError:
1768       // TODO(hlundin): Map to Modes::kError instead?
1769       last_mode_ = Mode::kPreemptiveExpandFail;
1770       return kPreemptiveExpandError;
1771   }
1772 
1773   if (borrowed_samples_per_channel > 0) {
1774     // Copy borrowed samples back to the `sync_buffer_`.
1775     sync_buffer_->ReplaceAtIndex(
1776         *algorithm_buffer_, borrowed_samples_per_channel,
1777         sync_buffer_->Size() - borrowed_samples_per_channel);
1778     algorithm_buffer_->PopFront(borrowed_samples_per_channel);
1779   }
1780 
1781   // If last packet was decoded as an inband CNG, set mode to CNG instead.
1782   if (speech_type == AudioDecoder::kComfortNoise) {
1783     last_mode_ = Mode::kCodecInternalCng;
1784   }
1785   if (!play_dtmf) {
1786     dtmf_tone_generator_->Reset();
1787   }
1788   expand_->Reset();
1789   return 0;
1790 }
1791 
DoRfc3389Cng(PacketList * packet_list,bool play_dtmf)1792 int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) {
1793   if (!packet_list->empty()) {
1794     // Must have exactly one SID frame at this point.
1795     RTC_DCHECK_EQ(packet_list->size(), 1);
1796     const Packet& packet = packet_list->front();
1797     if (!decoder_database_->IsComfortNoise(packet.payload_type)) {
1798       RTC_LOG(LS_ERROR) << "Trying to decode non-CNG payload as CNG.";
1799       return kOtherError;
1800     }
1801     if (comfort_noise_->UpdateParameters(packet) ==
1802         ComfortNoise::kInternalError) {
1803       algorithm_buffer_->Zeros(output_size_samples_);
1804       return -comfort_noise_->internal_error_code();
1805     }
1806   }
1807   int cn_return =
1808       comfort_noise_->Generate(output_size_samples_, algorithm_buffer_.get());
1809   expand_->Reset();
1810   last_mode_ = Mode::kRfc3389Cng;
1811   if (!play_dtmf) {
1812     dtmf_tone_generator_->Reset();
1813   }
1814   if (cn_return == ComfortNoise::kInternalError) {
1815     RTC_LOG(LS_WARNING) << "Comfort noise generator returned error code: "
1816                         << comfort_noise_->internal_error_code();
1817     return kComfortNoiseErrorCode;
1818   } else if (cn_return == ComfortNoise::kUnknownPayloadType) {
1819     return kUnknownRtpPayloadType;
1820   }
1821   return 0;
1822 }
1823 
DoCodecInternalCng(const int16_t * decoded_buffer,size_t decoded_length)1824 void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer,
1825                                    size_t decoded_length) {
1826   RTC_DCHECK(normal_.get());
1827   normal_->Process(decoded_buffer, decoded_length, last_mode_,
1828                    algorithm_buffer_.get());
1829   last_mode_ = Mode::kCodecInternalCng;
1830   expand_->Reset();
1831 }
1832 
DoDtmf(const DtmfEvent & dtmf_event,bool * play_dtmf)1833 int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) {
1834   // This block of the code and the block further down, handling `dtmf_switch`
1835   // are commented out. Otherwise playing out-of-band DTMF would fail in VoE
1836   // test, DtmfTest.ManualSuccessfullySendsOutOfBandTelephoneEvents. This is
1837   // equivalent to `dtmf_switch` always be false.
1838   //
1839   // See http://webrtc-codereview.appspot.com/1195004/ for discussion
1840   // On this issue. This change might cause some glitches at the point of
1841   // switch from audio to DTMF. Issue 1545 is filed to track this.
1842   //
1843   //  bool dtmf_switch = false;
1844   //  if ((last_mode_ != Modes::kDtmf) &&
1845   //      dtmf_tone_generator_->initialized()) {
1846   //    // Special case; see below.
1847   //    // We must catch this before calling Generate, since `initialized` is
1848   //    // modified in that call.
1849   //    dtmf_switch = true;
1850   //  }
1851 
1852   int dtmf_return_value = 0;
1853   if (!dtmf_tone_generator_->initialized()) {
1854     // Initialize if not already done.
1855     dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1856                                                    dtmf_event.volume);
1857   }
1858 
1859   if (dtmf_return_value == 0) {
1860     // Generate DTMF signal.
1861     dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_,
1862                                                        algorithm_buffer_.get());
1863   }
1864 
1865   if (dtmf_return_value < 0) {
1866     algorithm_buffer_->Zeros(output_size_samples_);
1867     return dtmf_return_value;
1868   }
1869 
1870   //  if (dtmf_switch) {
1871   //    // This is the special case where the previous operation was DTMF
1872   //    // overdub, but the current instruction is "regular" DTMF. We must make
1873   //    // sure that the DTMF does not have any discontinuities. The first DTMF
1874   //    // sample that we generate now must be played out immediately, therefore
1875   //    // it must be copied to the speech buffer.
1876   //    // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and
1877   //    // verify correct operation.
1878   //    RTC_DCHECK_NOTREACHED();
1879   //    // Must generate enough data to replace all of the `sync_buffer_`
1880   //    // "future".
1881   //    int required_length = sync_buffer_->FutureLength();
1882   //    RTC_DCHECK(dtmf_tone_generator_->initialized());
1883   //    dtmf_return_value = dtmf_tone_generator_->Generate(required_length,
1884   //                                                       algorithm_buffer_);
1885   //    RTC_DCHECK((size_t) required_length == algorithm_buffer_->Size());
1886   //    if (dtmf_return_value < 0) {
1887   //      algorithm_buffer_->Zeros(output_size_samples_);
1888   //      return dtmf_return_value;
1889   //    }
1890   //
1891   //    // Overwrite the "future" part of the speech buffer with the new DTMF
1892   //    // data.
1893   //    // TODO(hlundin): It seems that this overwriting has gone lost.
1894   //    // Not adapted for multi-channel yet.
1895   //    RTC_DCHECK(algorithm_buffer_->Channels() == 1);
1896   //    if (algorithm_buffer_->Channels() != 1) {
1897   //      RTC_LOG(LS_WARNING) << "DTMF not supported for more than one channel";
1898   //      return kStereoNotSupported;
1899   //    }
1900   //    // Shuffle the remaining data to the beginning of algorithm buffer.
1901   //    algorithm_buffer_->PopFront(sync_buffer_->FutureLength());
1902   //  }
1903 
1904   sync_buffer_->IncreaseEndTimestamp(
1905       static_cast<uint32_t>(output_size_samples_));
1906   expand_->Reset();
1907   last_mode_ = Mode::kDtmf;
1908 
1909   // Set to false because the DTMF is already in the algorithm buffer.
1910   *play_dtmf = false;
1911   return 0;
1912 }
1913 
DtmfOverdub(const DtmfEvent & dtmf_event,size_t num_channels,int16_t * output) const1914 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event,
1915                            size_t num_channels,
1916                            int16_t* output) const {
1917   size_t out_index = 0;
1918   size_t overdub_length = output_size_samples_;  // Default value.
1919 
1920   if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) {
1921     // Special operation for transition from "DTMF only" to "DTMF overdub".
1922     out_index =
1923         std::min(sync_buffer_->dtmf_index() - sync_buffer_->next_index(),
1924                  output_size_samples_);
1925     overdub_length = output_size_samples_ - out_index;
1926   }
1927 
1928   AudioMultiVector dtmf_output(num_channels);
1929   int dtmf_return_value = 0;
1930   if (!dtmf_tone_generator_->initialized()) {
1931     dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no,
1932                                                    dtmf_event.volume);
1933   }
1934   if (dtmf_return_value == 0) {
1935     dtmf_return_value =
1936         dtmf_tone_generator_->Generate(overdub_length, &dtmf_output);
1937     RTC_DCHECK_EQ(overdub_length, dtmf_output.Size());
1938   }
1939   dtmf_output.ReadInterleaved(overdub_length, &output[out_index]);
1940   return dtmf_return_value < 0 ? dtmf_return_value : 0;
1941 }
1942 
ExtractPackets(size_t required_samples,PacketList * packet_list)1943 int NetEqImpl::ExtractPackets(size_t required_samples,
1944                               PacketList* packet_list) {
1945   bool first_packet = true;
1946   uint8_t prev_payload_type = 0;
1947   uint32_t prev_timestamp = 0;
1948   uint16_t prev_sequence_number = 0;
1949   bool next_packet_available = false;
1950 
1951   const Packet* next_packet = packet_buffer_->PeekNextPacket();
1952   RTC_DCHECK(next_packet);
1953   if (!next_packet) {
1954     RTC_LOG(LS_ERROR) << "Packet buffer unexpectedly empty.";
1955     return -1;
1956   }
1957   uint32_t first_timestamp = next_packet->timestamp;
1958   size_t extracted_samples = 0;
1959 
1960   // Packet extraction loop.
1961   do {
1962     timestamp_ = next_packet->timestamp;
1963     absl::optional<Packet> packet = packet_buffer_->GetNextPacket();
1964     // `next_packet` may be invalid after the `packet_buffer_` operation.
1965     next_packet = nullptr;
1966     if (!packet) {
1967       RTC_LOG(LS_ERROR) << "Should always be able to extract a packet here";
1968       RTC_DCHECK_NOTREACHED();  // Should always be able to extract a packet
1969                                 // here.
1970       return -1;
1971     }
1972     const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs();
1973     stats_->StoreWaitingTime(waiting_time_ms);
1974     RTC_DCHECK(!packet->empty());
1975 
1976     if (first_packet) {
1977       first_packet = false;
1978       if (nack_enabled_) {
1979         RTC_DCHECK(nack_);
1980         // TODO(henrik.lundin): Should we update this for all decoded packets?
1981         nack_->UpdateLastDecodedPacket(packet->sequence_number,
1982                                        packet->timestamp);
1983       }
1984       prev_sequence_number = packet->sequence_number;
1985       prev_timestamp = packet->timestamp;
1986       prev_payload_type = packet->payload_type;
1987     }
1988 
1989     const bool has_cng_packet =
1990         decoder_database_->IsComfortNoise(packet->payload_type);
1991     // Store number of extracted samples.
1992     size_t packet_duration = 0;
1993     if (packet->frame) {
1994       packet_duration = packet->frame->Duration();
1995       // TODO(ossu): Is this the correct way to track Opus FEC packets?
1996       if (packet->priority.codec_level > 0) {
1997         stats_->SecondaryDecodedSamples(
1998             rtc::dchecked_cast<int>(packet_duration));
1999       }
2000     } else if (!has_cng_packet) {
2001       RTC_LOG(LS_WARNING) << "Unknown payload type "
2002                           << static_cast<int>(packet->payload_type);
2003       RTC_DCHECK_NOTREACHED();
2004     }
2005 
2006     if (packet_duration == 0) {
2007       // Decoder did not return a packet duration. Assume that the packet
2008       // contains the same number of samples as the previous one.
2009       packet_duration = decoder_frame_length_;
2010     }
2011     extracted_samples = packet->timestamp - first_timestamp + packet_duration;
2012 
2013     RTC_DCHECK(controller_);
2014     stats_->JitterBufferDelay(packet_duration, waiting_time_ms,
2015                               controller_->TargetLevelMs(),
2016                               controller_->UnlimitedTargetLevelMs());
2017 
2018     packet_list->push_back(std::move(*packet));  // Store packet in list.
2019     packet = absl::nullopt;  // Ensure it's never used after the move.
2020 
2021     // Check what packet is available next.
2022     next_packet = packet_buffer_->PeekNextPacket();
2023     next_packet_available = false;
2024     if (next_packet && prev_payload_type == next_packet->payload_type &&
2025         !has_cng_packet) {
2026       int16_t seq_no_diff = next_packet->sequence_number - prev_sequence_number;
2027       size_t ts_diff = next_packet->timestamp - prev_timestamp;
2028       if ((seq_no_diff == 1 || seq_no_diff == 0) &&
2029           ts_diff <= packet_duration) {
2030         // The next sequence number is available, or the next part of a packet
2031         // that was split into pieces upon insertion.
2032         next_packet_available = true;
2033       }
2034       prev_sequence_number = next_packet->sequence_number;
2035       prev_timestamp = next_packet->timestamp;
2036     }
2037   } while (extracted_samples < required_samples && next_packet_available);
2038 
2039   if (extracted_samples > 0) {
2040     // Delete old packets only when we are going to decode something. Otherwise,
2041     // we could end up in the situation where we never decode anything, since
2042     // all incoming packets are considered too old but the buffer will also
2043     // never be flooded and flushed.
2044     packet_buffer_->DiscardAllOldPackets(timestamp_, stats_.get());
2045   }
2046 
2047   return rtc::dchecked_cast<int>(extracted_samples);
2048 }
2049 
UpdatePlcComponents(int fs_hz,size_t channels)2050 void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) {
2051   // Delete objects and create new ones.
2052   expand_.reset(expand_factory_->Create(background_noise_.get(),
2053                                         sync_buffer_.get(), &random_vector_,
2054                                         stats_.get(), fs_hz, channels));
2055   merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get()));
2056 }
2057 
SetSampleRateAndChannels(int fs_hz,size_t channels)2058 void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
2059   RTC_LOG(LS_VERBOSE) << "SetSampleRateAndChannels " << fs_hz << " "
2060                       << channels;
2061   // TODO(hlundin): Change to an enumerator and skip assert.
2062   RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 ||
2063              fs_hz == 48000);
2064   RTC_DCHECK_GT(channels, 0);
2065 
2066   // Before changing the sample rate, end and report any ongoing expand event.
2067   stats_->EndExpandEvent(fs_hz_);
2068   fs_hz_ = fs_hz;
2069   fs_mult_ = fs_hz / 8000;
2070   output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
2071   decoder_frame_length_ = 3 * output_size_samples_;  // Initialize to 30ms.
2072 
2073   last_mode_ = Mode::kNormal;
2074 
2075   ComfortNoiseDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder();
2076   if (cng_decoder)
2077     cng_decoder->Reset();
2078 
2079   // Reinit post-decode VAD with new sample rate.
2080   RTC_DCHECK(vad_.get());  // Cannot be NULL here.
2081   vad_->Init();
2082 
2083   // Delete algorithm buffer and create a new one.
2084   algorithm_buffer_.reset(new AudioMultiVector(channels));
2085 
2086   // Delete sync buffer and create a new one.
2087   sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_));
2088 
2089   // Delete BackgroundNoise object and create a new one.
2090   background_noise_.reset(new BackgroundNoise(channels));
2091 
2092   // Reset random vector.
2093   random_vector_.Reset();
2094 
2095   UpdatePlcComponents(fs_hz, channels);
2096 
2097   // Move index so that we create a small set of future samples (all 0).
2098   sync_buffer_->set_next_index(sync_buffer_->next_index() -
2099                                expand_->overlap_length());
2100 
2101   normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_,
2102                            expand_.get(), stats_.get()));
2103   accelerate_.reset(
2104       accelerate_factory_->Create(fs_hz, channels, *background_noise_));
2105   preemptive_expand_.reset(preemptive_expand_factory_->Create(
2106       fs_hz, channels, *background_noise_, expand_->overlap_length()));
2107 
2108   // Delete ComfortNoise object and create a new one.
2109   comfort_noise_.reset(
2110       new ComfortNoise(fs_hz, decoder_database_.get(), sync_buffer_.get()));
2111 
2112   // Verify that `decoded_buffer_` is long enough.
2113   if (decoded_buffer_length_ < kMaxFrameSize * channels) {
2114     // Reallocate to larger size.
2115     decoded_buffer_length_ = kMaxFrameSize * channels;
2116     decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
2117   }
2118   RTC_CHECK(controller_) << "Unexpectedly found no NetEqController";
2119   controller_->SetSampleRate(fs_hz_, output_size_samples_);
2120 }
2121 
LastOutputType()2122 NetEqImpl::OutputType NetEqImpl::LastOutputType() {
2123   RTC_DCHECK(vad_.get());
2124   RTC_DCHECK(expand_.get());
2125   if (last_mode_ == Mode::kCodecInternalCng ||
2126       last_mode_ == Mode::kRfc3389Cng) {
2127     return OutputType::kCNG;
2128   } else if (last_mode_ == Mode::kExpand && expand_->MuteFactor(0) == 0) {
2129     // Expand mode has faded down to background noise only (very long expand).
2130     return OutputType::kPLCCNG;
2131   } else if (last_mode_ == Mode::kExpand) {
2132     return OutputType::kPLC;
2133   } else if (vad_->running() && !vad_->active_speech()) {
2134     return OutputType::kVadPassive;
2135   } else if (last_mode_ == Mode::kCodecPlc) {
2136     return OutputType::kCodecPLC;
2137   } else {
2138     return OutputType::kNormalSpeech;
2139   }
2140 }
2141 }  // namespace webrtc
2142