xref: /aosp_15_r20/external/webrtc/modules/audio_coding/neteq/neteq_impl.h (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
12 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
13 
14 #include <map>
15 #include <memory>
16 #include <string>
17 #include <utility>
18 #include <vector>
19 
20 #include "absl/types/optional.h"
21 #include "api/audio/audio_frame.h"
22 #include "api/neteq/neteq.h"
23 #include "api/neteq/neteq_controller.h"
24 #include "api/neteq/neteq_controller_factory.h"
25 #include "api/neteq/tick_timer.h"
26 #include "api/rtp_packet_info.h"
27 #include "modules/audio_coding/neteq/audio_multi_vector.h"
28 #include "modules/audio_coding/neteq/expand_uma_logger.h"
29 #include "modules/audio_coding/neteq/packet.h"
30 #include "modules/audio_coding/neteq/random_vector.h"
31 #include "modules/audio_coding/neteq/statistics_calculator.h"
32 #include "rtc_base/synchronization/mutex.h"
33 #include "rtc_base/thread_annotations.h"
34 
35 namespace webrtc {
36 
37 // Forward declarations.
38 class Accelerate;
39 class BackgroundNoise;
40 class Clock;
41 class ComfortNoise;
42 class DecoderDatabase;
43 class DtmfBuffer;
44 class DtmfToneGenerator;
45 class Expand;
46 class Merge;
47 class NackTracker;
48 class Normal;
49 class PacketBuffer;
50 class RedPayloadSplitter;
51 class PostDecodeVad;
52 class PreemptiveExpand;
53 class RandomVector;
54 class SyncBuffer;
55 class TimestampScaler;
56 struct AccelerateFactory;
57 struct DtmfEvent;
58 struct ExpandFactory;
59 struct PreemptiveExpandFactory;
60 
61 class NetEqImpl : public webrtc::NetEq {
62  public:
63   enum class OutputType {
64     kNormalSpeech,
65     kPLC,
66     kCNG,
67     kPLCCNG,
68     kVadPassive,
69     kCodecPLC
70   };
71 
72   enum ErrorCodes {
73     kNoError = 0,
74     kOtherError,
75     kUnknownRtpPayloadType,
76     kDecoderNotFound,
77     kInvalidPointer,
78     kAccelerateError,
79     kPreemptiveExpandError,
80     kComfortNoiseErrorCode,
81     kDecoderErrorCode,
82     kOtherDecoderError,
83     kInvalidOperation,
84     kDtmfParsingError,
85     kDtmfInsertError,
86     kSampleUnderrun,
87     kDecodedTooMuch,
88     kRedundancySplitError,
89     kPacketBufferCorruption
90   };
91 
92   struct Dependencies {
93     // The constructor populates the Dependencies struct with the default
94     // implementations of the objects. They can all be replaced by the user
95     // before sending the struct to the NetEqImpl constructor. However, there
96     // are dependencies between some of the classes inside the struct, so
97     // swapping out one may make it necessary to re-create another one.
98     Dependencies(const NetEq::Config& config,
99                  Clock* clock,
100                  const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
101                  const NetEqControllerFactory& controller_factory);
102     ~Dependencies();
103 
104     Clock* const clock;
105     std::unique_ptr<TickTimer> tick_timer;
106     std::unique_ptr<StatisticsCalculator> stats;
107     std::unique_ptr<DecoderDatabase> decoder_database;
108     std::unique_ptr<DtmfBuffer> dtmf_buffer;
109     std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
110     std::unique_ptr<PacketBuffer> packet_buffer;
111     std::unique_ptr<NetEqController> neteq_controller;
112     std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
113     std::unique_ptr<TimestampScaler> timestamp_scaler;
114     std::unique_ptr<AccelerateFactory> accelerate_factory;
115     std::unique_ptr<ExpandFactory> expand_factory;
116     std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
117   };
118 
119   // Creates a new NetEqImpl object.
120   NetEqImpl(const NetEq::Config& config,
121             Dependencies&& deps,
122             bool create_components = true);
123 
124   ~NetEqImpl() override;
125 
126   NetEqImpl(const NetEqImpl&) = delete;
127   NetEqImpl& operator=(const NetEqImpl&) = delete;
128 
129   // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure.
130   int InsertPacket(const RTPHeader& rtp_header,
131                    rtc::ArrayView<const uint8_t> payload) override;
132 
133   void InsertEmptyPacket(const RTPHeader& rtp_header) override;
134 
135   int GetAudio(
136       AudioFrame* audio_frame,
137       bool* muted,
138       int* current_sample_rate_hz = nullptr,
139       absl::optional<Operation> action_override = absl::nullopt) override;
140 
141   void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
142 
143   bool RegisterPayloadType(int rtp_payload_type,
144                            const SdpAudioFormat& audio_format) override;
145 
146   // Removes `rtp_payload_type` from the codec database. Returns 0 on success,
147   // -1 on failure.
148   int RemovePayloadType(uint8_t rtp_payload_type) override;
149 
150   void RemoveAllPayloadTypes() override;
151 
152   bool SetMinimumDelay(int delay_ms) override;
153 
154   bool SetMaximumDelay(int delay_ms) override;
155 
156   bool SetBaseMinimumDelayMs(int delay_ms) override;
157 
158   int GetBaseMinimumDelayMs() const override;
159 
160   int TargetDelayMs() const override;
161 
162   int FilteredCurrentDelayMs() const override;
163 
164   // Writes the current network statistics to `stats`. The statistics are reset
165   // after the call.
166   int NetworkStatistics(NetEqNetworkStatistics* stats) override;
167 
168   NetEqNetworkStatistics CurrentNetworkStatistics() const override;
169 
170   NetEqLifetimeStatistics GetLifetimeStatistics() const override;
171 
172   NetEqOperationsAndState GetOperationsAndState() const override;
173 
174   // Enables post-decode VAD. When enabled, GetAudio() will return
175   // kOutputVADPassive when the signal contains no speech.
176   void EnableVad() override;
177 
178   // Disables post-decode VAD.
179   void DisableVad() override;
180 
181   absl::optional<uint32_t> GetPlayoutTimestamp() const override;
182 
183   int last_output_sample_rate_hz() const override;
184 
185   absl::optional<DecoderFormat> GetDecoderFormat(
186       int payload_type) const override;
187 
188   // Flushes both the packet buffer and the sync buffer.
189   void FlushBuffers() override;
190 
191   void EnableNack(size_t max_nack_list_size) override;
192 
193   void DisableNack() override;
194 
195   std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
196 
197   int SyncBufferSizeMs() const override;
198 
199   // This accessor method is only intended for testing purposes.
200   const SyncBuffer* sync_buffer_for_test() const;
201   Operation last_operation_for_test() const;
202 
203  protected:
204   static const int kOutputSizeMs = 10;
205   static const size_t kMaxFrameSize = 5760;  // 120 ms @ 48 kHz.
206   // TODO(hlundin): Provide a better value for kSyncBufferSize.
207   // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
208   // calculating correlations of current frame against history.
209   static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
210 
211   // Inserts a new packet into NetEq. This is used by the InsertPacket method
212   // above. Returns 0 on success, otherwise an error code.
213   // TODO(hlundin): Merge this with InsertPacket above?
214   int InsertPacketInternal(const RTPHeader& rtp_header,
215                            rtc::ArrayView<const uint8_t> payload)
216       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
217 
218   // Delivers 10 ms of audio data. The data is written to `audio_frame`.
219   // Returns 0 on success, otherwise an error code.
220   int GetAudioInternal(AudioFrame* audio_frame,
221                        bool* muted,
222                        absl::optional<Operation> action_override)
223       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
224 
225   // Provides a decision to the GetAudioInternal method. The decision what to
226   // do is written to `operation`. Packets to decode are written to
227   // `packet_list`, and a DTMF event to play is written to `dtmf_event`. When
228   // DTMF should be played, `play_dtmf` is set to true by the method.
229   // Returns 0 on success, otherwise an error code.
230   int GetDecision(Operation* operation,
231                   PacketList* packet_list,
232                   DtmfEvent* dtmf_event,
233                   bool* play_dtmf,
234                   absl::optional<Operation> action_override)
235       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
236 
237   // Decodes the speech packets in `packet_list`, and writes the results to
238   // `decoded_buffer`, which is allocated to hold `decoded_buffer_length`
239   // elements. The length of the decoded data is written to `decoded_length`.
240   // The speech type -- speech or (codec-internal) comfort noise -- is written
241   // to `speech_type`. If `packet_list` contains any SID frames for RFC 3389
242   // comfort noise, those are not decoded.
243   int Decode(PacketList* packet_list,
244              Operation* operation,
245              int* decoded_length,
246              AudioDecoder::SpeechType* speech_type)
247       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
248 
249   // Sub-method to Decode(). Performs codec internal CNG.
250   int DecodeCng(AudioDecoder* decoder,
251                 int* decoded_length,
252                 AudioDecoder::SpeechType* speech_type)
253       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
254 
255   // Sub-method to Decode(). Performs the actual decoding.
256   int DecodeLoop(PacketList* packet_list,
257                  const Operation& operation,
258                  AudioDecoder* decoder,
259                  int* decoded_length,
260                  AudioDecoder::SpeechType* speech_type)
261       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
262 
263   // Sub-method which calls the Normal class to perform the normal operation.
264   void DoNormal(const int16_t* decoded_buffer,
265                 size_t decoded_length,
266                 AudioDecoder::SpeechType speech_type,
267                 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
268 
269   // Sub-method which calls the Merge class to perform the merge operation.
270   void DoMerge(int16_t* decoded_buffer,
271                size_t decoded_length,
272                AudioDecoder::SpeechType speech_type,
273                bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
274 
275   bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
276 
277   // Sub-method which calls the Expand class to perform the expand operation.
278   int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
279 
280   // Sub-method which calls the Accelerate class to perform the accelerate
281   // operation.
282   int DoAccelerate(int16_t* decoded_buffer,
283                    size_t decoded_length,
284                    AudioDecoder::SpeechType speech_type,
285                    bool play_dtmf,
286                    bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
287 
288   // Sub-method which calls the PreemptiveExpand class to perform the
289   // preemtive expand operation.
290   int DoPreemptiveExpand(int16_t* decoded_buffer,
291                          size_t decoded_length,
292                          AudioDecoder::SpeechType speech_type,
293                          bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
294 
295   // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
296   // noise. `packet_list` can either contain one SID frame to update the
297   // noise parameters, or no payload at all, in which case the previously
298   // received parameters are used.
299   int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
300       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
301 
302   // Calls the audio decoder to generate codec-internal comfort noise when
303   // no packet was received.
304   void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
305       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
306 
307   // Calls the DtmfToneGenerator class to generate DTMF tones.
308   int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
309       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
310 
311   // Overdub DTMF on top of `output`.
312   int DtmfOverdub(const DtmfEvent& dtmf_event,
313                   size_t num_channels,
314                   int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
315 
316   // Extracts packets from `packet_buffer_` to produce at least
317   // `required_samples` samples. The packets are inserted into `packet_list`.
318   // Returns the number of samples that the packets in the list will produce, or
319   // -1 in case of an error.
320   int ExtractPackets(size_t required_samples, PacketList* packet_list)
321       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
322 
323   // Resets various variables and objects to new values based on the sample rate
324   // `fs_hz` and `channels` number audio channels.
325   void SetSampleRateAndChannels(int fs_hz, size_t channels)
326       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
327 
328   // Returns the output type for the audio produced by the latest call to
329   // GetAudio().
330   OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
331 
332   // Updates Expand and Merge.
333   virtual void UpdatePlcComponents(int fs_hz, size_t channels)
334       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
335 
336   NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const
337       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
338 
339   Clock* const clock_;
340 
341   mutable Mutex mutex_;
342   const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_);
343   const std::unique_ptr<DecoderDatabase> decoder_database_
344       RTC_GUARDED_BY(mutex_);
345   const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_);
346   const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
347       RTC_GUARDED_BY(mutex_);
348   const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_);
349   const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
350       RTC_GUARDED_BY(mutex_);
351   const std::unique_ptr<TimestampScaler> timestamp_scaler_
352       RTC_GUARDED_BY(mutex_);
353   const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
354   const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
355   const std::unique_ptr<AccelerateFactory> accelerate_factory_
356       RTC_GUARDED_BY(mutex_);
357   const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
358       RTC_GUARDED_BY(mutex_);
359   const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_);
360 
361   std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_);
362   std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_);
363   std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_);
364   std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_);
365   std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_);
366   std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_);
367   std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_);
368   std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_);
369   std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_);
370   RandomVector random_vector_ RTC_GUARDED_BY(mutex_);
371   std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_);
372   int fs_hz_ RTC_GUARDED_BY(mutex_);
373   int fs_mult_ RTC_GUARDED_BY(mutex_);
374   int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_);
375   size_t output_size_samples_ RTC_GUARDED_BY(mutex_);
376   size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_);
377   Mode last_mode_ RTC_GUARDED_BY(mutex_);
378   Operation last_operation_ RTC_GUARDED_BY(mutex_);
379   size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_);
380   std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_);
381   uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_);
382   bool new_codec_ RTC_GUARDED_BY(mutex_);
383   uint32_t timestamp_ RTC_GUARDED_BY(mutex_);
384   bool reset_decoder_ RTC_GUARDED_BY(mutex_);
385   absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
386   absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
387   bool first_packet_ RTC_GUARDED_BY(mutex_);
388   bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_);
389   std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
390   bool nack_enabled_ RTC_GUARDED_BY(mutex_);
391   const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
392   AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
393       AudioFrame::kVadPassive;
394   std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
395       RTC_GUARDED_BY(mutex_);
396   std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);
397   ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_);
398   ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_);
399   bool no_time_stretching_ RTC_GUARDED_BY(mutex_);  // Only used for test.
400   rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_);
401 };
402 
403 }  // namespace webrtc
404 #endif  // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
405