xref: /aosp_15_r20/external/webrtc/modules/audio_coding/acm2/acm_receiver.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/acm2/acm_receiver.h"
12 
13 #include <stdlib.h>
14 #include <string.h>
15 
16 #include <cstdint>
17 #include <vector>
18 
19 #include "absl/strings/match.h"
20 #include "api/audio/audio_frame.h"
21 #include "api/audio_codecs/audio_decoder.h"
22 #include "api/neteq/neteq.h"
23 #include "modules/audio_coding/acm2/acm_resampler.h"
24 #include "modules/audio_coding/acm2/call_statistics.h"
25 #include "modules/audio_coding/neteq/default_neteq_factory.h"
26 #include "rtc_base/checks.h"
27 #include "rtc_base/logging.h"
28 #include "rtc_base/numerics/safe_conversions.h"
29 #include "rtc_base/strings/audio_format_to_string.h"
30 #include "system_wrappers/include/clock.h"
31 
32 namespace webrtc {
33 
34 namespace acm2 {
35 
36 namespace {
37 
CreateNetEq(NetEqFactory * neteq_factory,const NetEq::Config & config,Clock * clock,const rtc::scoped_refptr<AudioDecoderFactory> & decoder_factory)38 std::unique_ptr<NetEq> CreateNetEq(
39     NetEqFactory* neteq_factory,
40     const NetEq::Config& config,
41     Clock* clock,
42     const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory) {
43   if (neteq_factory) {
44     return neteq_factory->CreateNetEq(config, decoder_factory, clock);
45   }
46   return DefaultNetEqFactory().CreateNetEq(config, decoder_factory, clock);
47 }
48 
49 }  // namespace
50 
AcmReceiver(const AudioCodingModule::Config & config)51 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
52     : last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
53       neteq_(CreateNetEq(config.neteq_factory,
54                          config.neteq_config,
55                          config.clock,
56                          config.decoder_factory)),
57       clock_(config.clock),
58       resampled_last_output_frame_(true) {
59   RTC_DCHECK(clock_);
60   memset(last_audio_buffer_.get(), 0,
61          sizeof(int16_t) * AudioFrame::kMaxDataSizeSamples);
62 }
63 
64 AcmReceiver::~AcmReceiver() = default;
65 
SetMinimumDelay(int delay_ms)66 int AcmReceiver::SetMinimumDelay(int delay_ms) {
67   if (neteq_->SetMinimumDelay(delay_ms))
68     return 0;
69   RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
70   return -1;
71 }
72 
SetMaximumDelay(int delay_ms)73 int AcmReceiver::SetMaximumDelay(int delay_ms) {
74   if (neteq_->SetMaximumDelay(delay_ms))
75     return 0;
76   RTC_LOG(LS_ERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
77   return -1;
78 }
79 
SetBaseMinimumDelayMs(int delay_ms)80 bool AcmReceiver::SetBaseMinimumDelayMs(int delay_ms) {
81   return neteq_->SetBaseMinimumDelayMs(delay_ms);
82 }
83 
GetBaseMinimumDelayMs() const84 int AcmReceiver::GetBaseMinimumDelayMs() const {
85   return neteq_->GetBaseMinimumDelayMs();
86 }
87 
last_packet_sample_rate_hz() const88 absl::optional<int> AcmReceiver::last_packet_sample_rate_hz() const {
89   MutexLock lock(&mutex_);
90   if (!last_decoder_) {
91     return absl::nullopt;
92   }
93   return last_decoder_->sample_rate_hz;
94 }
95 
last_output_sample_rate_hz() const96 int AcmReceiver::last_output_sample_rate_hz() const {
97   return neteq_->last_output_sample_rate_hz();
98 }
99 
InsertPacket(const RTPHeader & rtp_header,rtc::ArrayView<const uint8_t> incoming_payload)100 int AcmReceiver::InsertPacket(const RTPHeader& rtp_header,
101                               rtc::ArrayView<const uint8_t> incoming_payload) {
102   if (incoming_payload.empty()) {
103     neteq_->InsertEmptyPacket(rtp_header);
104     return 0;
105   }
106 
107   int payload_type = rtp_header.payloadType;
108   auto format = neteq_->GetDecoderFormat(payload_type);
109   if (format && absl::EqualsIgnoreCase(format->sdp_format.name, "red")) {
110     // This is a RED packet. Get the format of the audio codec.
111     payload_type = incoming_payload[0] & 0x7f;
112     format = neteq_->GetDecoderFormat(payload_type);
113   }
114   if (!format) {
115     RTC_LOG_F(LS_ERROR) << "Payload-type " << payload_type
116                         << " is not registered.";
117     return -1;
118   }
119 
120   {
121     MutexLock lock(&mutex_);
122     if (absl::EqualsIgnoreCase(format->sdp_format.name, "cn")) {
123       if (last_decoder_ && last_decoder_->num_channels > 1) {
124         // This is a CNG and the audio codec is not mono, so skip pushing in
125         // packets into NetEq.
126         return 0;
127       }
128     } else {
129       last_decoder_ = DecoderInfo{/*payload_type=*/payload_type,
130                                   /*sample_rate_hz=*/format->sample_rate_hz,
131                                   /*num_channels=*/format->num_channels,
132                                   /*sdp_format=*/std::move(format->sdp_format)};
133     }
134   }  // `mutex_` is released.
135 
136   if (neteq_->InsertPacket(rtp_header, incoming_payload) < 0) {
137     RTC_LOG(LS_ERROR) << "AcmReceiver::InsertPacket "
138                       << static_cast<int>(rtp_header.payloadType)
139                       << " Failed to insert packet";
140     return -1;
141   }
142   return 0;
143 }
144 
GetAudio(int desired_freq_hz,AudioFrame * audio_frame,bool * muted)145 int AcmReceiver::GetAudio(int desired_freq_hz,
146                           AudioFrame* audio_frame,
147                           bool* muted) {
148   RTC_DCHECK(muted);
149 
150   int current_sample_rate_hz = 0;
151   if (neteq_->GetAudio(audio_frame, muted, &current_sample_rate_hz) !=
152       NetEq::kOK) {
153     RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
154     return -1;
155   }
156 
157   RTC_DCHECK_NE(current_sample_rate_hz, 0);
158 
159   // Update if resampling is required.
160   const bool need_resampling =
161       (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
162 
163   // Accessing members, take the lock.
164   MutexLock lock(&mutex_);
165   if (need_resampling && !resampled_last_output_frame_) {
166     // Prime the resampler with the last frame.
167     int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
168     int samples_per_channel_int = resampler_.Resample10Msec(
169         last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
170         audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
171         temp_output);
172     if (samples_per_channel_int < 0) {
173       RTC_LOG(LS_ERROR) << "AcmReceiver::GetAudio - "
174                            "Resampling last_audio_buffer_ failed.";
175       return -1;
176     }
177   }
178 
179   // TODO(bugs.webrtc.org/3923) Glitches in the output may appear if the output
180   // rate from NetEq changes.
181   if (need_resampling) {
182     // TODO(yujo): handle this more efficiently for muted frames.
183     int samples_per_channel_int = resampler_.Resample10Msec(
184         audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
185         audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
186         audio_frame->mutable_data());
187     if (samples_per_channel_int < 0) {
188       RTC_LOG(LS_ERROR)
189           << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
190       return -1;
191     }
192     audio_frame->samples_per_channel_ =
193         static_cast<size_t>(samples_per_channel_int);
194     audio_frame->sample_rate_hz_ = desired_freq_hz;
195     RTC_DCHECK_EQ(
196         audio_frame->sample_rate_hz_,
197         rtc::dchecked_cast<int>(audio_frame->samples_per_channel_ * 100));
198     resampled_last_output_frame_ = true;
199   } else {
200     resampled_last_output_frame_ = false;
201     // We might end up here ONLY if codec is changed.
202   }
203 
204   // Store current audio in `last_audio_buffer_` for next time.
205   memcpy(last_audio_buffer_.get(), audio_frame->data(),
206          sizeof(int16_t) * audio_frame->samples_per_channel_ *
207              audio_frame->num_channels_);
208 
209   call_stats_.DecodedByNetEq(audio_frame->speech_type_, *muted);
210   return 0;
211 }
212 
SetCodecs(const std::map<int,SdpAudioFormat> & codecs)213 void AcmReceiver::SetCodecs(const std::map<int, SdpAudioFormat>& codecs) {
214   neteq_->SetCodecs(codecs);
215 }
216 
FlushBuffers()217 void AcmReceiver::FlushBuffers() {
218   neteq_->FlushBuffers();
219 }
220 
RemoveAllCodecs()221 void AcmReceiver::RemoveAllCodecs() {
222   MutexLock lock(&mutex_);
223   neteq_->RemoveAllPayloadTypes();
224   last_decoder_ = absl::nullopt;
225 }
226 
GetPlayoutTimestamp()227 absl::optional<uint32_t> AcmReceiver::GetPlayoutTimestamp() {
228   return neteq_->GetPlayoutTimestamp();
229 }
230 
FilteredCurrentDelayMs() const231 int AcmReceiver::FilteredCurrentDelayMs() const {
232   return neteq_->FilteredCurrentDelayMs();
233 }
234 
TargetDelayMs() const235 int AcmReceiver::TargetDelayMs() const {
236   return neteq_->TargetDelayMs();
237 }
238 
LastDecoder() const239 absl::optional<std::pair<int, SdpAudioFormat>> AcmReceiver::LastDecoder()
240     const {
241   MutexLock lock(&mutex_);
242   if (!last_decoder_) {
243     return absl::nullopt;
244   }
245   RTC_DCHECK_NE(-1, last_decoder_->payload_type);
246   return std::make_pair(last_decoder_->payload_type, last_decoder_->sdp_format);
247 }
248 
GetNetworkStatistics(NetworkStatistics * acm_stat,bool get_and_clear_legacy_stats) const249 void AcmReceiver::GetNetworkStatistics(
250     NetworkStatistics* acm_stat,
251     bool get_and_clear_legacy_stats /* = true */) const {
252   NetEqNetworkStatistics neteq_stat;
253   if (get_and_clear_legacy_stats) {
254     // NetEq function always returns zero, so we don't check the return value.
255     neteq_->NetworkStatistics(&neteq_stat);
256 
257     acm_stat->currentExpandRate = neteq_stat.expand_rate;
258     acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate;
259     acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate;
260     acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate;
261     acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
262     acm_stat->currentSecondaryDiscardedRate =
263         neteq_stat.secondary_discarded_rate;
264     acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
265     acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
266   } else {
267     neteq_stat = neteq_->CurrentNetworkStatistics();
268     acm_stat->currentExpandRate = 0;
269     acm_stat->currentSpeechExpandRate = 0;
270     acm_stat->currentPreemptiveRate = 0;
271     acm_stat->currentAccelerateRate = 0;
272     acm_stat->currentSecondaryDecodedRate = 0;
273     acm_stat->currentSecondaryDiscardedRate = 0;
274     acm_stat->meanWaitingTimeMs = -1;
275     acm_stat->maxWaitingTimeMs = 1;
276   }
277   acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms;
278   acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
279   acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
280 
281   NetEqLifetimeStatistics neteq_lifetime_stat = neteq_->GetLifetimeStatistics();
282   acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received;
283   acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples;
284   acm_stat->silentConcealedSamples =
285       neteq_lifetime_stat.silent_concealed_samples;
286   acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events;
287   acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms;
288   acm_stat->jitterBufferTargetDelayMs =
289       neteq_lifetime_stat.jitter_buffer_target_delay_ms;
290   acm_stat->jitterBufferMinimumDelayMs =
291       neteq_lifetime_stat.jitter_buffer_minimum_delay_ms;
292   acm_stat->jitterBufferEmittedCount =
293       neteq_lifetime_stat.jitter_buffer_emitted_count;
294   acm_stat->delayedPacketOutageSamples =
295       neteq_lifetime_stat.delayed_packet_outage_samples;
296   acm_stat->relativePacketArrivalDelayMs =
297       neteq_lifetime_stat.relative_packet_arrival_delay_ms;
298   acm_stat->interruptionCount = neteq_lifetime_stat.interruption_count;
299   acm_stat->totalInterruptionDurationMs =
300       neteq_lifetime_stat.total_interruption_duration_ms;
301   acm_stat->insertedSamplesForDeceleration =
302       neteq_lifetime_stat.inserted_samples_for_deceleration;
303   acm_stat->removedSamplesForAcceleration =
304       neteq_lifetime_stat.removed_samples_for_acceleration;
305   acm_stat->fecPacketsReceived = neteq_lifetime_stat.fec_packets_received;
306   acm_stat->fecPacketsDiscarded = neteq_lifetime_stat.fec_packets_discarded;
307   acm_stat->packetsDiscarded = neteq_lifetime_stat.packets_discarded;
308 
309   NetEqOperationsAndState neteq_operations_and_state =
310       neteq_->GetOperationsAndState();
311   acm_stat->packetBufferFlushes =
312       neteq_operations_and_state.packet_buffer_flushes;
313 }
314 
EnableNack(size_t max_nack_list_size)315 int AcmReceiver::EnableNack(size_t max_nack_list_size) {
316   neteq_->EnableNack(max_nack_list_size);
317   return 0;
318 }
319 
DisableNack()320 void AcmReceiver::DisableNack() {
321   neteq_->DisableNack();
322 }
323 
GetNackList(int64_t round_trip_time_ms) const324 std::vector<uint16_t> AcmReceiver::GetNackList(
325     int64_t round_trip_time_ms) const {
326   return neteq_->GetNackList(round_trip_time_ms);
327 }
328 
ResetInitialDelay()329 void AcmReceiver::ResetInitialDelay() {
330   neteq_->SetMinimumDelay(0);
331   // TODO(turajs): Should NetEq Buffer be flushed?
332 }
333 
NowInTimestamp(int decoder_sampling_rate) const334 uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const {
335   // Down-cast the time to (32-6)-bit since we only care about
336   // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms.
337   // We masked 6 most significant bits of 32-bit so there is no overflow in
338   // the conversion from milliseconds to timestamp.
339   const uint32_t now_in_ms =
340       static_cast<uint32_t>(clock_->TimeInMilliseconds() & 0x03ffffff);
341   return static_cast<uint32_t>((decoder_sampling_rate / 1000) * now_in_ms);
342 }
343 
GetDecodingCallStatistics(AudioDecodingCallStats * stats) const344 void AcmReceiver::GetDecodingCallStatistics(
345     AudioDecodingCallStats* stats) const {
346   MutexLock lock(&mutex_);
347   *stats = call_stats_.GetDecodingStatistics();
348 }
349 
350 }  // namespace acm2
351 
352 }  // namespace webrtc
353