xref: /aosp_15_r20/external/webrtc/modules/audio_coding/neteq/decision_logic.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/neteq/decision_logic.h"
12 
13 #include <stdio.h>
14 
15 #include <cstdint>
16 #include <memory>
17 #include <string>
18 
19 #include "absl/types/optional.h"
20 #include "api/neteq/neteq.h"
21 #include "api/neteq/neteq_controller.h"
22 #include "modules/audio_coding/neteq/packet_arrival_history.h"
23 #include "modules/audio_coding/neteq/packet_buffer.h"
24 #include "rtc_base/checks.h"
25 #include "rtc_base/experiments/field_trial_parser.h"
26 #include "rtc_base/experiments/struct_parameters_parser.h"
27 #include "rtc_base/logging.h"
28 #include "rtc_base/numerics/safe_conversions.h"
29 #include "system_wrappers/include/field_trial.h"
30 
31 namespace webrtc {
32 
33 namespace {
34 
35 constexpr int kPostponeDecodingLevel = 50;
36 constexpr int kTargetLevelWindowMs = 100;
37 constexpr int kMaxWaitForPacketTicks = 10;
38 // The granularity of delay adjustments (accelerate/preemptive expand) is 15ms,
39 // but round up since the clock has a granularity of 10ms.
40 constexpr int kDelayAdjustmentGranularityMs = 20;
41 
CreateDelayManager(const NetEqController::Config & neteq_config)42 std::unique_ptr<DelayManager> CreateDelayManager(
43     const NetEqController::Config& neteq_config) {
44   DelayManager::Config config;
45   config.max_packets_in_buffer = neteq_config.max_packets_in_buffer;
46   config.base_minimum_delay_ms = neteq_config.base_min_delay_ms;
47   config.Log();
48   return std::make_unique<DelayManager>(config, neteq_config.tick_timer);
49 }
50 
IsTimestretch(NetEq::Mode mode)51 bool IsTimestretch(NetEq::Mode mode) {
52   return mode == NetEq::Mode::kAccelerateSuccess ||
53          mode == NetEq::Mode::kAccelerateLowEnergy ||
54          mode == NetEq::Mode::kPreemptiveExpandSuccess ||
55          mode == NetEq::Mode::kPreemptiveExpandLowEnergy;
56 }
57 
IsCng(NetEq::Mode mode)58 bool IsCng(NetEq::Mode mode) {
59   return mode == NetEq::Mode::kRfc3389Cng ||
60          mode == NetEq::Mode::kCodecInternalCng;
61 }
62 
IsExpand(NetEq::Mode mode)63 bool IsExpand(NetEq::Mode mode) {
64   return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc;
65 }
66 
67 }  // namespace
68 
Config()69 DecisionLogic::Config::Config() {
70   StructParametersParser::Create(
71       "enable_stable_playout_delay", &enable_stable_playout_delay,  //
72       "reinit_after_expands", &reinit_after_expands,                //
73       "packet_history_size_ms", &packet_history_size_ms,            //
74       "deceleration_target_level_offset_ms",
75       &deceleration_target_level_offset_ms)
76       ->Parse(webrtc::field_trial::FindFullName(
77           "WebRTC-Audio-NetEqDecisionLogicConfig"));
78   RTC_LOG(LS_INFO) << "NetEq decision logic config:"
79                    << " enable_stable_playout_delay="
80                    << enable_stable_playout_delay
81                    << " reinit_after_expands=" << reinit_after_expands
82                    << " packet_history_size_ms=" << packet_history_size_ms
83                    << " deceleration_target_level_offset_ms="
84                    << deceleration_target_level_offset_ms;
85 }
86 
DecisionLogic(NetEqController::Config config)87 DecisionLogic::DecisionLogic(NetEqController::Config config)
88     : DecisionLogic(config,
89                     CreateDelayManager(config),
90                     std::make_unique<BufferLevelFilter>()) {}
91 
DecisionLogic(NetEqController::Config config,std::unique_ptr<DelayManager> delay_manager,std::unique_ptr<BufferLevelFilter> buffer_level_filter)92 DecisionLogic::DecisionLogic(
93     NetEqController::Config config,
94     std::unique_ptr<DelayManager> delay_manager,
95     std::unique_ptr<BufferLevelFilter> buffer_level_filter)
96     : delay_manager_(std::move(delay_manager)),
97       buffer_level_filter_(std::move(buffer_level_filter)),
98       packet_arrival_history_(config_.packet_history_size_ms),
99       tick_timer_(config.tick_timer),
100       disallow_time_stretching_(!config.allow_time_stretching),
101       timescale_countdown_(
102           tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)) {}
103 
104 DecisionLogic::~DecisionLogic() = default;
105 
SoftReset()106 void DecisionLogic::SoftReset() {
107   packet_length_samples_ = 0;
108   sample_memory_ = 0;
109   prev_time_scale_ = false;
110   timescale_countdown_ =
111       tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
112   time_stretched_cn_samples_ = 0;
113   delay_manager_->Reset();
114   buffer_level_filter_->Reset();
115   packet_arrival_history_.Reset();
116   last_playout_delay_ms_ = 0;
117 }
118 
SetSampleRate(int fs_hz,size_t output_size_samples)119 void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
120   // TODO(hlundin): Change to an enumerator and skip assert.
121   RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 ||
122              fs_hz == 48000);
123   sample_rate_khz_ = fs_hz / 1000;
124   output_size_samples_ = output_size_samples;
125   packet_arrival_history_.set_sample_rate(fs_hz);
126 }
127 
GetDecision(const NetEqStatus & status,bool * reset_decoder)128 NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
129                                             bool* reset_decoder) {
130   // If last mode was CNG (or Expand, since this could be covering up for
131   // a lost CNG packet), remember that CNG is on. This is needed if comfort
132   // noise is interrupted by DTMF.
133   if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
134     cng_state_ = kCngRfc3389On;
135   } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) {
136     cng_state_ = kCngInternalOn;
137   }
138 
139   if (IsExpand(status.last_mode)) {
140     ++num_consecutive_expands_;
141   } else {
142     num_consecutive_expands_ = 0;
143   }
144 
145   if (!IsExpand(status.last_mode) && !IsCng(status.last_mode)) {
146     last_playout_delay_ms_ = GetPlayoutDelayMs(status);
147   }
148 
149   prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode);
150   if (prev_time_scale_) {
151     timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
152   }
153   if (!IsCng(status.last_mode)) {
154     FilterBufferLevel(status.packet_buffer_info.span_samples);
155   }
156 
157   // Guard for errors, to avoid getting stuck in error mode.
158   if (status.last_mode == NetEq::Mode::kError) {
159     if (!status.next_packet) {
160       return NetEq::Operation::kExpand;
161     } else {
162       // Use kUndefined to flag for a reset.
163       return NetEq::Operation::kUndefined;
164     }
165   }
166 
167   if (status.next_packet && status.next_packet->is_cng) {
168     return CngOperation(status);
169   }
170 
171   // Handle the case with no packet at all available (except maybe DTMF).
172   if (!status.next_packet) {
173     return NoPacket(status);
174   }
175 
176   // If the expand period was very long, reset NetEQ since it is likely that the
177   // sender was restarted.
178   if (num_consecutive_expands_ > config_.reinit_after_expands) {
179     *reset_decoder = true;
180     return NetEq::Operation::kNormal;
181   }
182 
183   // Make sure we don't restart audio too soon after an expansion to avoid
184   // running out of data right away again. We should only wait if there are no
185   // DTX or CNG packets in the buffer (otherwise we should just play out what we
186   // have, since we cannot know the exact duration of DTX or CNG packets), and
187   // if the mute factor is low enough (otherwise the expansion was short enough
188   // to not be noticable).
189   // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
190   const int target_level_samples = TargetLevelMs() * sample_rate_khz_;
191   if (!config_.enable_stable_playout_delay && IsExpand(status.last_mode) &&
192       status.expand_mutefactor < 16384 / 2 &&
193       status.packet_buffer_info.span_samples <
194           static_cast<size_t>(target_level_samples * kPostponeDecodingLevel /
195                               100) &&
196       !status.packet_buffer_info.dtx_or_cng) {
197     return NetEq::Operation::kExpand;
198   }
199 
200   const uint32_t five_seconds_samples =
201       static_cast<uint32_t>(5000 * sample_rate_khz_);
202   // Check if the required packet is available.
203   if (status.target_timestamp == status.next_packet->timestamp) {
204     return ExpectedPacketAvailable(status);
205   }
206   if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
207                                          status.target_timestamp,
208                                          five_seconds_samples)) {
209     return FuturePacketAvailable(status);
210   }
211   // This implies that available_timestamp < target_timestamp, which can
212   // happen when a new stream or codec is received. Signal for a reset.
213   return NetEq::Operation::kUndefined;
214 }
215 
NotifyMutedState()216 void DecisionLogic::NotifyMutedState() {
217   ++num_consecutive_expands_;
218 }
219 
TargetLevelMs() const220 int DecisionLogic::TargetLevelMs() const {
221   int target_delay_ms = delay_manager_->TargetDelayMs();
222   if (!config_.enable_stable_playout_delay) {
223     target_delay_ms =
224         std::max(target_delay_ms,
225                  static_cast<int>(packet_length_samples_ / sample_rate_khz_));
226   }
227   return target_delay_ms;
228 }
229 
UnlimitedTargetLevelMs() const230 int DecisionLogic::UnlimitedTargetLevelMs() const {
231   return delay_manager_->UnlimitedTargetLevelMs();
232 }
233 
GetFilteredBufferLevel() const234 int DecisionLogic::GetFilteredBufferLevel() const {
235   if (config_.enable_stable_playout_delay) {
236     return last_playout_delay_ms_ * sample_rate_khz_;
237   }
238   return buffer_level_filter_->filtered_current_level();
239 }
240 
PacketArrived(int fs_hz,bool should_update_stats,const PacketArrivedInfo & info)241 absl::optional<int> DecisionLogic::PacketArrived(
242     int fs_hz,
243     bool should_update_stats,
244     const PacketArrivedInfo& info) {
245   buffer_flush_ = buffer_flush_ || info.buffer_flush;
246   if (!should_update_stats || info.is_cng_or_dtmf) {
247     return absl::nullopt;
248   }
249   if (info.packet_length_samples > 0 && fs_hz > 0 &&
250       info.packet_length_samples != packet_length_samples_) {
251     packet_length_samples_ = info.packet_length_samples;
252     delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz);
253   }
254   int64_t time_now_ms = tick_timer_->ticks() * tick_timer_->ms_per_tick();
255   packet_arrival_history_.Insert(info.main_timestamp, time_now_ms);
256   if (packet_arrival_history_.size() < 2) {
257     // No meaningful delay estimate unless at least 2 packets have arrived.
258     return absl::nullopt;
259   }
260   int arrival_delay_ms =
261       packet_arrival_history_.GetDelayMs(info.main_timestamp, time_now_ms);
262   bool reordered =
263       !packet_arrival_history_.IsNewestRtpTimestamp(info.main_timestamp);
264   delay_manager_->Update(arrival_delay_ms, reordered);
265   return arrival_delay_ms;
266 }
267 
FilterBufferLevel(size_t buffer_size_samples)268 void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
269   buffer_level_filter_->SetTargetBufferLevel(TargetLevelMs());
270 
271   int time_stretched_samples = time_stretched_cn_samples_;
272   if (prev_time_scale_) {
273     time_stretched_samples += sample_memory_;
274   }
275 
276   if (buffer_flush_) {
277     buffer_level_filter_->SetFilteredBufferLevel(buffer_size_samples);
278     buffer_flush_ = false;
279   } else {
280     buffer_level_filter_->Update(buffer_size_samples, time_stretched_samples);
281   }
282   prev_time_scale_ = false;
283   time_stretched_cn_samples_ = 0;
284 }
285 
CngOperation(NetEqController::NetEqStatus status)286 NetEq::Operation DecisionLogic::CngOperation(
287     NetEqController::NetEqStatus status) {
288   // Signed difference between target and available timestamp.
289   int32_t timestamp_diff = static_cast<int32_t>(
290       static_cast<uint32_t>(status.generated_noise_samples +
291                             status.target_timestamp) -
292       status.next_packet->timestamp);
293   int optimal_level_samp = TargetLevelMs() * sample_rate_khz_;
294   const int64_t excess_waiting_time_samp =
295       -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
296 
297   if (excess_waiting_time_samp > optimal_level_samp / 2) {
298     // The waiting time for this packet will be longer than 1.5
299     // times the wanted buffer delay. Apply fast-forward to cut the
300     // waiting time down to the optimal.
301     noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
302                                                       excess_waiting_time_samp);
303     timestamp_diff =
304         rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
305   }
306 
307   if (timestamp_diff < 0 && status.last_mode == NetEq::Mode::kRfc3389Cng) {
308     // Not time to play this packet yet. Wait another round before using this
309     // packet. Keep on playing CNG from previous CNG parameters.
310     return NetEq::Operation::kRfc3389CngNoPacket;
311   } else {
312     // Otherwise, go for the CNG packet now.
313     noise_fast_forward_ = 0;
314     return NetEq::Operation::kRfc3389Cng;
315   }
316 }
317 
NoPacket(NetEqController::NetEqStatus status)318 NetEq::Operation DecisionLogic::NoPacket(NetEqController::NetEqStatus status) {
319   if (cng_state_ == kCngRfc3389On) {
320     // Keep on playing comfort noise.
321     return NetEq::Operation::kRfc3389CngNoPacket;
322   } else if (cng_state_ == kCngInternalOn) {
323     // Keep on playing codec internal comfort noise.
324     return NetEq::Operation::kCodecInternalCng;
325   } else if (status.play_dtmf) {
326     return NetEq::Operation::kDtmf;
327   } else {
328     // Nothing to play, do expand.
329     return NetEq::Operation::kExpand;
330   }
331 }
332 
ExpectedPacketAvailable(NetEqController::NetEqStatus status)333 NetEq::Operation DecisionLogic::ExpectedPacketAvailable(
334     NetEqController::NetEqStatus status) {
335   if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand &&
336       !status.play_dtmf) {
337     if (config_.enable_stable_playout_delay) {
338       const int playout_delay_ms = GetPlayoutDelayMs(status);
339       if (playout_delay_ms >= HighThreshold() << 2) {
340         return NetEq::Operation::kFastAccelerate;
341       }
342       if (TimescaleAllowed()) {
343         if (playout_delay_ms >= HighThreshold()) {
344           return NetEq::Operation::kAccelerate;
345         }
346         if (playout_delay_ms < LowThreshold()) {
347           return NetEq::Operation::kPreemptiveExpand;
348         }
349       }
350     } else {
351       const int target_level_samples = TargetLevelMs() * sample_rate_khz_;
352       const int low_limit = std::max(
353           target_level_samples * 3 / 4,
354           target_level_samples -
355               config_.deceleration_target_level_offset_ms * sample_rate_khz_);
356       const int high_limit = std::max(
357           target_level_samples,
358           low_limit + kDelayAdjustmentGranularityMs * sample_rate_khz_);
359 
360       const int buffer_level_samples =
361           buffer_level_filter_->filtered_current_level();
362       if (buffer_level_samples >= high_limit << 2)
363         return NetEq::Operation::kFastAccelerate;
364       if (TimescaleAllowed()) {
365         if (buffer_level_samples >= high_limit)
366           return NetEq::Operation::kAccelerate;
367         if (buffer_level_samples < low_limit)
368           return NetEq::Operation::kPreemptiveExpand;
369       }
370     }
371   }
372   return NetEq::Operation::kNormal;
373 }
374 
FuturePacketAvailable(NetEqController::NetEqStatus status)375 NetEq::Operation DecisionLogic::FuturePacketAvailable(
376     NetEqController::NetEqStatus status) {
377   // Required packet is not available, but a future packet is.
378   // Check if we should continue with an ongoing expand because the new packet
379   // is too far into the future.
380   if (IsExpand(status.last_mode) && ShouldContinueExpand(status)) {
381     if (status.play_dtmf) {
382       // Still have DTMF to play, so do not do expand.
383       return NetEq::Operation::kDtmf;
384     } else {
385       // Nothing to play.
386       return NetEq::Operation::kExpand;
387     }
388   }
389 
390   if (status.last_mode == NetEq::Mode::kCodecPlc) {
391     return NetEq::Operation::kNormal;
392   }
393 
394   // If previous was comfort noise, then no merge is needed.
395   if (IsCng(status.last_mode)) {
396     uint32_t timestamp_leap =
397         status.next_packet->timestamp - status.target_timestamp;
398     const bool generated_enough_noise =
399         status.generated_noise_samples >= timestamp_leap;
400 
401     int playout_delay_ms = GetNextPacketDelayMs(status);
402     const bool above_target_delay = playout_delay_ms > HighThresholdCng();
403     const bool below_target_delay = playout_delay_ms < LowThresholdCng();
404     // Keep the delay same as before CNG, but make sure that it is within the
405     // target window.
406     if ((generated_enough_noise && !below_target_delay) || above_target_delay) {
407       time_stretched_cn_samples_ =
408           timestamp_leap - status.generated_noise_samples;
409       return NetEq::Operation::kNormal;
410     }
411 
412     if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
413       return NetEq::Operation::kRfc3389CngNoPacket;
414     }
415     return NetEq::Operation::kCodecInternalCng;
416   }
417 
418   // Do not merge unless we have done an expand before.
419   if (status.last_mode == NetEq::Mode::kExpand) {
420     return NetEq::Operation::kMerge;
421   } else if (status.play_dtmf) {
422     // Play DTMF instead of expand.
423     return NetEq::Operation::kDtmf;
424   } else {
425     return NetEq::Operation::kExpand;
426   }
427 }
428 
UnderTargetLevel() const429 bool DecisionLogic::UnderTargetLevel() const {
430   return buffer_level_filter_->filtered_current_level() <
431          TargetLevelMs() * sample_rate_khz_;
432 }
433 
ReinitAfterExpands(uint32_t timestamp_leap) const434 bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
435   return timestamp_leap >= static_cast<uint32_t>(output_size_samples_ *
436                                                  config_.reinit_after_expands);
437 }
438 
PacketTooEarly(uint32_t timestamp_leap) const439 bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
440   return timestamp_leap >
441          static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
442 }
443 
MaxWaitForPacket() const444 bool DecisionLogic::MaxWaitForPacket() const {
445   return num_consecutive_expands_ >= kMaxWaitForPacketTicks;
446 }
447 
ShouldContinueExpand(NetEqController::NetEqStatus status) const448 bool DecisionLogic::ShouldContinueExpand(
449     NetEqController::NetEqStatus status) const {
450   uint32_t timestamp_leap =
451       status.next_packet->timestamp - status.target_timestamp;
452   if (config_.enable_stable_playout_delay) {
453     return GetNextPacketDelayMs(status) < HighThreshold() &&
454            PacketTooEarly(timestamp_leap);
455   }
456   return !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
457          PacketTooEarly(timestamp_leap) && UnderTargetLevel();
458 }
459 
GetNextPacketDelayMs(NetEqController::NetEqStatus status) const460 int DecisionLogic::GetNextPacketDelayMs(
461     NetEqController::NetEqStatus status) const {
462   if (config_.enable_stable_playout_delay) {
463     return packet_arrival_history_.GetDelayMs(
464         status.next_packet->timestamp,
465         tick_timer_->ticks() * tick_timer_->ms_per_tick());
466   }
467   return status.packet_buffer_info.span_samples / sample_rate_khz_;
468 }
469 
GetPlayoutDelayMs(NetEqController::NetEqStatus status) const470 int DecisionLogic::GetPlayoutDelayMs(
471     NetEqController::NetEqStatus status) const {
472   uint32_t playout_timestamp =
473       status.target_timestamp - status.sync_buffer_samples;
474   return packet_arrival_history_.GetDelayMs(
475       playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick());
476 }
477 
LowThreshold() const478 int DecisionLogic::LowThreshold() const {
479   int target_delay_ms = TargetLevelMs();
480   return std::max(
481       target_delay_ms * 3 / 4,
482       target_delay_ms - config_.deceleration_target_level_offset_ms);
483 }
484 
HighThreshold() const485 int DecisionLogic::HighThreshold() const {
486   if (config_.enable_stable_playout_delay) {
487     return std::max(TargetLevelMs(), packet_arrival_history_.GetMaxDelayMs()) +
488            kDelayAdjustmentGranularityMs;
489   }
490   return std::max(TargetLevelMs(),
491                   LowThreshold() + kDelayAdjustmentGranularityMs);
492 }
493 
LowThresholdCng() const494 int DecisionLogic::LowThresholdCng() const {
495   if (config_.enable_stable_playout_delay) {
496     return LowThreshold();
497   }
498   return std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2);
499 }
500 
HighThresholdCng() const501 int DecisionLogic::HighThresholdCng() const {
502   if (config_.enable_stable_playout_delay) {
503     return HighThreshold();
504   }
505   return TargetLevelMs() + kTargetLevelWindowMs / 2;
506 }
507 
508 }  // namespace webrtc
509