xref: /aosp_15_r20/external/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/acm2/acm_receiver.h"
12 
13 #include <algorithm>  // std::min
14 #include <memory>
15 
16 #include "absl/types/optional.h"
17 #include "api/audio_codecs/builtin_audio_decoder_factory.h"
18 #include "api/audio_codecs/builtin_audio_encoder_factory.h"
19 #include "modules/audio_coding/codecs/cng/audio_encoder_cng.h"
20 #include "modules/audio_coding/include/audio_coding_module.h"
21 #include "modules/audio_coding/neteq/tools/rtp_generator.h"
22 #include "modules/include/module_common_types.h"
23 #include "rtc_base/checks.h"
24 #include "rtc_base/numerics/safe_conversions.h"
25 #include "system_wrappers/include/clock.h"
26 #include "test/gtest.h"
27 #include "test/testsupport/file_utils.h"
28 
29 namespace webrtc {
30 
31 namespace acm2 {
32 
33 class AcmReceiverTestOldApi : public AudioPacketizationCallback,
34                               public ::testing::Test {
35  protected:
AcmReceiverTestOldApi()36   AcmReceiverTestOldApi()
37       : timestamp_(0),
38         packet_sent_(false),
39         last_packet_send_timestamp_(timestamp_),
40         last_frame_type_(AudioFrameType::kEmptyFrame) {
41     config_.decoder_factory = decoder_factory_;
42   }
43 
~AcmReceiverTestOldApi()44   ~AcmReceiverTestOldApi() {}
45 
SetUp()46   void SetUp() override {
47     acm_.reset(AudioCodingModule::Create(config_));
48     receiver_.reset(new AcmReceiver(config_));
49     ASSERT_TRUE(receiver_.get() != NULL);
50     ASSERT_TRUE(acm_.get() != NULL);
51     acm_->InitializeReceiver();
52     acm_->RegisterTransportCallback(this);
53 
54     rtp_header_.sequenceNumber = 0;
55     rtp_header_.timestamp = 0;
56     rtp_header_.markerBit = false;
57     rtp_header_.ssrc = 0x12345678;  // Arbitrary.
58     rtp_header_.numCSRCs = 0;
59     rtp_header_.payloadType = 0;
60   }
61 
TearDown()62   void TearDown() override {}
63 
SetEncoder(int payload_type,const SdpAudioFormat & format,const std::map<int,int> cng_payload_types={})64   AudioCodecInfo SetEncoder(int payload_type,
65                             const SdpAudioFormat& format,
66                             const std::map<int, int> cng_payload_types = {}) {
67     // Create the speech encoder.
68     absl::optional<AudioCodecInfo> info =
69         encoder_factory_->QueryAudioEncoder(format);
70     RTC_CHECK(info.has_value());
71     std::unique_ptr<AudioEncoder> enc =
72         encoder_factory_->MakeAudioEncoder(payload_type, format, absl::nullopt);
73 
74     // If we have a compatible CN specification, stack a CNG on top.
75     auto it = cng_payload_types.find(info->sample_rate_hz);
76     if (it != cng_payload_types.end()) {
77       AudioEncoderCngConfig config;
78       config.speech_encoder = std::move(enc);
79       config.num_channels = 1;
80       config.payload_type = it->second;
81       config.vad_mode = Vad::kVadNormal;
82       enc = CreateComfortNoiseEncoder(std::move(config));
83     }
84 
85     // Actually start using the new encoder.
86     acm_->SetEncoder(std::move(enc));
87     return *info;
88   }
89 
InsertOnePacketOfSilence(const AudioCodecInfo & info)90   int InsertOnePacketOfSilence(const AudioCodecInfo& info) {
91     // Frame setup according to the codec.
92     AudioFrame frame;
93     frame.sample_rate_hz_ = info.sample_rate_hz;
94     frame.samples_per_channel_ = info.sample_rate_hz / 100;  // 10 ms.
95     frame.num_channels_ = info.num_channels;
96     frame.Mute();
97     packet_sent_ = false;
98     last_packet_send_timestamp_ = timestamp_;
99     int num_10ms_frames = 0;
100     while (!packet_sent_) {
101       frame.timestamp_ = timestamp_;
102       timestamp_ += rtc::checked_cast<uint32_t>(frame.samples_per_channel_);
103       EXPECT_GE(acm_->Add10MsData(frame), 0);
104       ++num_10ms_frames;
105     }
106     return num_10ms_frames;
107   }
108 
SendData(AudioFrameType frame_type,uint8_t payload_type,uint32_t timestamp,const uint8_t * payload_data,size_t payload_len_bytes,int64_t absolute_capture_timestamp_ms)109   int SendData(AudioFrameType frame_type,
110                uint8_t payload_type,
111                uint32_t timestamp,
112                const uint8_t* payload_data,
113                size_t payload_len_bytes,
114                int64_t absolute_capture_timestamp_ms) override {
115     if (frame_type == AudioFrameType::kEmptyFrame)
116       return 0;
117 
118     rtp_header_.payloadType = payload_type;
119     rtp_header_.timestamp = timestamp;
120 
121     int ret_val = receiver_->InsertPacket(
122         rtp_header_,
123         rtc::ArrayView<const uint8_t>(payload_data, payload_len_bytes));
124     if (ret_val < 0) {
125       RTC_DCHECK_NOTREACHED();
126       return -1;
127     }
128     rtp_header_.sequenceNumber++;
129     packet_sent_ = true;
130     last_frame_type_ = frame_type;
131     return 0;
132   }
133 
134   const rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_ =
135       CreateBuiltinAudioEncoderFactory();
136   const rtc::scoped_refptr<AudioDecoderFactory> decoder_factory_ =
137       CreateBuiltinAudioDecoderFactory();
138   AudioCodingModule::Config config_;
139   std::unique_ptr<AcmReceiver> receiver_;
140   std::unique_ptr<AudioCodingModule> acm_;
141   RTPHeader rtp_header_;
142   uint32_t timestamp_;
143   bool packet_sent_;  // Set when SendData is called reset when inserting audio.
144   uint32_t last_packet_send_timestamp_;
145   AudioFrameType last_frame_type_;
146 };
147 
148 #if defined(WEBRTC_ANDROID)
149 #define MAYBE_SampleRate DISABLED_SampleRate
150 #else
151 #define MAYBE_SampleRate SampleRate
152 #endif
TEST_F(AcmReceiverTestOldApi,MAYBE_SampleRate)153 TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) {
154   const std::map<int, SdpAudioFormat> codecs = {{0, {"OPUS", 48000, 2}}};
155   receiver_->SetCodecs(codecs);
156 
157   constexpr int kOutSampleRateHz = 8000;  // Different than codec sample rate.
158   for (size_t i = 0; i < codecs.size(); ++i) {
159     const int payload_type = rtc::checked_cast<int>(i);
160     const int num_10ms_frames =
161         InsertOnePacketOfSilence(SetEncoder(payload_type, codecs.at(i)));
162     for (int k = 0; k < num_10ms_frames; ++k) {
163       AudioFrame frame;
164       bool muted;
165       EXPECT_EQ(0, receiver_->GetAudio(kOutSampleRateHz, &frame, &muted));
166     }
167     EXPECT_EQ(encoder_factory_->QueryAudioEncoder(codecs.at(i))->sample_rate_hz,
168               receiver_->last_output_sample_rate_hz());
169   }
170 }
171 
172 class AcmReceiverTestFaxModeOldApi : public AcmReceiverTestOldApi {
173  protected:
AcmReceiverTestFaxModeOldApi()174   AcmReceiverTestFaxModeOldApi() {
175     config_.neteq_config.for_test_no_time_stretching = true;
176   }
177 
RunVerifyAudioFrame(const SdpAudioFormat & codec)178   void RunVerifyAudioFrame(const SdpAudioFormat& codec) {
179     // Make sure "fax mode" is enabled. This will avoid delay changes unless the
180     // packet-loss concealment is made. We do this in order to make the
181     // timestamp increments predictable; in normal mode, NetEq may decide to do
182     // accelerate or pre-emptive expand operations after some time, offsetting
183     // the timestamp.
184     EXPECT_TRUE(config_.neteq_config.for_test_no_time_stretching);
185 
186     constexpr int payload_type = 17;
187     receiver_->SetCodecs({{payload_type, codec}});
188 
189     const AudioCodecInfo info = SetEncoder(payload_type, codec);
190     const int output_sample_rate_hz = info.sample_rate_hz;
191     const size_t output_channels = info.num_channels;
192     const size_t samples_per_ms = rtc::checked_cast<size_t>(
193         rtc::CheckedDivExact(output_sample_rate_hz, 1000));
194     const AudioFrame::VADActivity expected_vad_activity =
195         output_sample_rate_hz > 16000 ? AudioFrame::kVadActive
196                                       : AudioFrame::kVadPassive;
197 
198     // Expect the first output timestamp to be 5*fs/8000 samples before the
199     // first inserted timestamp (because of NetEq's look-ahead). (This value is
200     // defined in Expand::overlap_length_.)
201     uint32_t expected_output_ts =
202         last_packet_send_timestamp_ -
203         rtc::CheckedDivExact(5 * output_sample_rate_hz, 8000);
204 
205     AudioFrame frame;
206     bool muted;
207     EXPECT_EQ(0, receiver_->GetAudio(output_sample_rate_hz, &frame, &muted));
208     // Expect timestamp = 0 before first packet is inserted.
209     EXPECT_EQ(0u, frame.timestamp_);
210     for (int i = 0; i < 5; ++i) {
211       const int num_10ms_frames = InsertOnePacketOfSilence(info);
212       for (int k = 0; k < num_10ms_frames; ++k) {
213         EXPECT_EQ(0,
214                   receiver_->GetAudio(output_sample_rate_hz, &frame, &muted));
215         EXPECT_EQ(expected_output_ts, frame.timestamp_);
216         expected_output_ts += rtc::checked_cast<uint32_t>(10 * samples_per_ms);
217         EXPECT_EQ(10 * samples_per_ms, frame.samples_per_channel_);
218         EXPECT_EQ(output_sample_rate_hz, frame.sample_rate_hz_);
219         EXPECT_EQ(output_channels, frame.num_channels_);
220         EXPECT_EQ(AudioFrame::kNormalSpeech, frame.speech_type_);
221         EXPECT_EQ(expected_vad_activity, frame.vad_activity_);
222         EXPECT_FALSE(muted);
223       }
224     }
225   }
226 };
227 
228 #if defined(WEBRTC_ANDROID)
229 #define MAYBE_VerifyAudioFramePCMU DISABLED_VerifyAudioFramePCMU
230 #else
231 #define MAYBE_VerifyAudioFramePCMU VerifyAudioFramePCMU
232 #endif
TEST_F(AcmReceiverTestFaxModeOldApi,MAYBE_VerifyAudioFramePCMU)233 TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFramePCMU) {
234   RunVerifyAudioFrame({"PCMU", 8000, 1});
235 }
236 
237 #if defined(WEBRTC_ANDROID)
238 #define MAYBE_VerifyAudioFrameOpus DISABLED_VerifyAudioFrameOpus
239 #else
240 #define MAYBE_VerifyAudioFrameOpus VerifyAudioFrameOpus
241 #endif
TEST_F(AcmReceiverTestFaxModeOldApi,MAYBE_VerifyAudioFrameOpus)242 TEST_F(AcmReceiverTestFaxModeOldApi, MAYBE_VerifyAudioFrameOpus) {
243   RunVerifyAudioFrame({"opus", 48000, 2});
244 }
245 
246 #if defined(WEBRTC_ANDROID)
247 #define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
248 #else
249 #define MAYBE_PostdecodingVad PostdecodingVad
250 #endif
TEST_F(AcmReceiverTestOldApi,MAYBE_PostdecodingVad)251 TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
252   EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
253   constexpr int payload_type = 34;
254   const SdpAudioFormat codec = {"L16", 16000, 1};
255   const AudioCodecInfo info = SetEncoder(payload_type, codec);
256   receiver_->SetCodecs({{payload_type, codec}});
257   constexpr int kNumPackets = 5;
258   AudioFrame frame;
259   for (int n = 0; n < kNumPackets; ++n) {
260     const int num_10ms_frames = InsertOnePacketOfSilence(info);
261     for (int k = 0; k < num_10ms_frames; ++k) {
262       bool muted;
263       ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
264     }
265   }
266   EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
267 }
268 
269 class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
270  protected:
AcmReceiverTestPostDecodeVadPassiveOldApi()271   AcmReceiverTestPostDecodeVadPassiveOldApi() {
272     config_.neteq_config.enable_post_decode_vad = false;
273   }
274 };
275 
276 #if defined(WEBRTC_ANDROID)
277 #define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
278 #else
279 #define MAYBE_PostdecodingVad PostdecodingVad
280 #endif
TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi,MAYBE_PostdecodingVad)281 TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
282   EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
283   constexpr int payload_type = 34;
284   const SdpAudioFormat codec = {"L16", 16000, 1};
285   const AudioCodecInfo info = SetEncoder(payload_type, codec);
286   auto const value = encoder_factory_->QueryAudioEncoder(codec);
287   ASSERT_TRUE(value.has_value());
288   receiver_->SetCodecs({{payload_type, codec}});
289   const int kNumPackets = 5;
290   AudioFrame frame;
291   for (int n = 0; n < kNumPackets; ++n) {
292     const int num_10ms_frames = InsertOnePacketOfSilence(info);
293     for (int k = 0; k < num_10ms_frames; ++k) {
294       bool muted;
295       ASSERT_EQ(0, receiver_->GetAudio(info.sample_rate_hz, &frame, &muted));
296     }
297   }
298   EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_);
299 }
300 
301 #if defined(WEBRTC_ANDROID)
302 #define MAYBE_LastAudioCodec DISABLED_LastAudioCodec
303 #else
304 #define MAYBE_LastAudioCodec LastAudioCodec
305 #endif
306 #if defined(WEBRTC_CODEC_OPUS)
TEST_F(AcmReceiverTestOldApi,MAYBE_LastAudioCodec)307 TEST_F(AcmReceiverTestOldApi, MAYBE_LastAudioCodec) {
308   const std::map<int, SdpAudioFormat> codecs = {
309       {0, {"PCMU", 8000, 1}}, {1, {"PCMA", 8000, 1}}, {2, {"L16", 32000, 1}}};
310   const std::map<int, int> cng_payload_types = {
311       {8000, 100}, {16000, 101}, {32000, 102}};
312   {
313     std::map<int, SdpAudioFormat> receive_codecs = codecs;
314     for (const auto& cng_type : cng_payload_types) {
315       receive_codecs.emplace(std::make_pair(
316           cng_type.second, SdpAudioFormat("CN", cng_type.first, 1)));
317     }
318     receiver_->SetCodecs(receive_codecs);
319   }
320 
321   // No audio payload is received.
322   EXPECT_EQ(absl::nullopt, receiver_->LastDecoder());
323 
324   // Start with sending DTX.
325   packet_sent_ = false;
326   InsertOnePacketOfSilence(
327       SetEncoder(0, codecs.at(0), cng_payload_types));  // Enough to test
328                                                         // with one codec.
329   ASSERT_TRUE(packet_sent_);
330   EXPECT_EQ(AudioFrameType::kAudioFrameCN, last_frame_type_);
331 
332   // Has received, only, DTX. Last Audio codec is undefined.
333   EXPECT_EQ(absl::nullopt, receiver_->LastDecoder());
334   EXPECT_EQ(absl::nullopt, receiver_->last_packet_sample_rate_hz());
335 
336   for (size_t i = 0; i < codecs.size(); ++i) {
337     // Set DTX off to send audio payload.
338     packet_sent_ = false;
339     const int payload_type = rtc::checked_cast<int>(i);
340     const AudioCodecInfo info_without_cng =
341         SetEncoder(payload_type, codecs.at(i));
342     InsertOnePacketOfSilence(info_without_cng);
343 
344     // Sanity check if Actually an audio payload received, and it should be
345     // of type "speech."
346     ASSERT_TRUE(packet_sent_);
347     ASSERT_EQ(AudioFrameType::kAudioFrameSpeech, last_frame_type_);
348     EXPECT_EQ(info_without_cng.sample_rate_hz,
349               receiver_->last_packet_sample_rate_hz());
350 
351     // Set VAD on to send DTX. Then check if the "Last Audio codec" returns
352     // the expected codec. Encode repeatedly until a DTX is sent.
353     const AudioCodecInfo info_with_cng =
354         SetEncoder(payload_type, codecs.at(i), cng_payload_types);
355     while (last_frame_type_ != AudioFrameType::kAudioFrameCN) {
356       packet_sent_ = false;
357       InsertOnePacketOfSilence(info_with_cng);
358       ASSERT_TRUE(packet_sent_);
359     }
360     EXPECT_EQ(info_with_cng.sample_rate_hz,
361               receiver_->last_packet_sample_rate_hz());
362     EXPECT_EQ(codecs.at(i), receiver_->LastDecoder()->second);
363   }
364 }
365 #endif
366 
367 // Check if the statistics are initialized correctly. Before any call to ACM
368 // all fields have to be zero.
369 #if defined(WEBRTC_ANDROID)
370 #define MAYBE_InitializedToZero DISABLED_InitializedToZero
371 #else
372 #define MAYBE_InitializedToZero InitializedToZero
373 #endif
TEST_F(AcmReceiverTestOldApi,MAYBE_InitializedToZero)374 TEST_F(AcmReceiverTestOldApi, MAYBE_InitializedToZero) {
375   AudioDecodingCallStats stats;
376   receiver_->GetDecodingCallStatistics(&stats);
377   EXPECT_EQ(0, stats.calls_to_neteq);
378   EXPECT_EQ(0, stats.calls_to_silence_generator);
379   EXPECT_EQ(0, stats.decoded_normal);
380   EXPECT_EQ(0, stats.decoded_cng);
381   EXPECT_EQ(0, stats.decoded_neteq_plc);
382   EXPECT_EQ(0, stats.decoded_plc_cng);
383   EXPECT_EQ(0, stats.decoded_muted_output);
384 }
385 
386 // Insert some packets and pull audio. Check statistics are valid. Then,
387 // simulate packet loss and check if PLC and PLC-to-CNG statistics are
388 // correctly updated.
389 #if defined(WEBRTC_ANDROID)
390 #define MAYBE_NetEqCalls DISABLED_NetEqCalls
391 #else
392 #define MAYBE_NetEqCalls NetEqCalls
393 #endif
TEST_F(AcmReceiverTestOldApi,MAYBE_NetEqCalls)394 TEST_F(AcmReceiverTestOldApi, MAYBE_NetEqCalls) {
395   AudioDecodingCallStats stats;
396   const int kNumNormalCalls = 10;
397   const int kSampleRateHz = 16000;
398   const int kNumSamples10ms = kSampleRateHz / 100;
399   const int kFrameSizeMs = 10;  // Multiple of 10.
400   const int kFrameSizeSamples = kFrameSizeMs / 10 * kNumSamples10ms;
401   const int kPayloadSizeBytes = kFrameSizeSamples * sizeof(int16_t);
402   const uint8_t kPayloadType = 111;
403   RTPHeader rtp_header;
404   AudioFrame audio_frame;
405   bool muted;
406 
407   receiver_->SetCodecs(
408       {{kPayloadType, SdpAudioFormat("L16", kSampleRateHz, 1)}});
409   rtp_header.sequenceNumber = 0xABCD;
410   rtp_header.timestamp = 0xABCDEF01;
411   rtp_header.payloadType = kPayloadType;
412   rtp_header.markerBit = false;
413   rtp_header.ssrc = 0x1234;
414   rtp_header.numCSRCs = 0;
415   rtp_header.payload_type_frequency = kSampleRateHz;
416 
417   for (int num_calls = 0; num_calls < kNumNormalCalls; ++num_calls) {
418     const uint8_t kPayload[kPayloadSizeBytes] = {0};
419     ASSERT_EQ(0, receiver_->InsertPacket(rtp_header, kPayload));
420     ++rtp_header.sequenceNumber;
421     rtp_header.timestamp += kFrameSizeSamples;
422     ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted));
423     EXPECT_FALSE(muted);
424   }
425   receiver_->GetDecodingCallStatistics(&stats);
426   EXPECT_EQ(kNumNormalCalls, stats.calls_to_neteq);
427   EXPECT_EQ(0, stats.calls_to_silence_generator);
428   EXPECT_EQ(kNumNormalCalls, stats.decoded_normal);
429   EXPECT_EQ(0, stats.decoded_cng);
430   EXPECT_EQ(0, stats.decoded_neteq_plc);
431   EXPECT_EQ(0, stats.decoded_plc_cng);
432   EXPECT_EQ(0, stats.decoded_muted_output);
433 
434   const int kNumPlc = 3;
435   const int kNumPlcCng = 5;
436 
437   // Simulate packet-loss. NetEq first performs PLC then PLC fades to CNG.
438   for (int n = 0; n < kNumPlc + kNumPlcCng; ++n) {
439     ASSERT_EQ(0, receiver_->GetAudio(-1, &audio_frame, &muted));
440     EXPECT_FALSE(muted);
441   }
442   receiver_->GetDecodingCallStatistics(&stats);
443   EXPECT_EQ(kNumNormalCalls + kNumPlc + kNumPlcCng, stats.calls_to_neteq);
444   EXPECT_EQ(0, stats.calls_to_silence_generator);
445   EXPECT_EQ(kNumNormalCalls, stats.decoded_normal);
446   EXPECT_EQ(0, stats.decoded_cng);
447   EXPECT_EQ(kNumPlc, stats.decoded_neteq_plc);
448   EXPECT_EQ(kNumPlcCng, stats.decoded_plc_cng);
449   EXPECT_EQ(0, stats.decoded_muted_output);
450   // TODO(henrik.lundin) Add a test with muted state enabled.
451 }
452 
453 }  // namespace acm2
454 
455 }  // namespace webrtc
456