1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 12 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 13 14 #include <map> 15 #include <memory> 16 #include <string> 17 #include <utility> 18 #include <vector> 19 20 #include "absl/types/optional.h" 21 #include "api/audio/audio_frame.h" 22 #include "api/neteq/neteq.h" 23 #include "api/neteq/neteq_controller.h" 24 #include "api/neteq/neteq_controller_factory.h" 25 #include "api/neteq/tick_timer.h" 26 #include "api/rtp_packet_info.h" 27 #include "modules/audio_coding/neteq/audio_multi_vector.h" 28 #include "modules/audio_coding/neteq/expand_uma_logger.h" 29 #include "modules/audio_coding/neteq/packet.h" 30 #include "modules/audio_coding/neteq/random_vector.h" 31 #include "modules/audio_coding/neteq/statistics_calculator.h" 32 #include "rtc_base/synchronization/mutex.h" 33 #include "rtc_base/thread_annotations.h" 34 35 namespace webrtc { 36 37 // Forward declarations. 38 class Accelerate; 39 class BackgroundNoise; 40 class Clock; 41 class ComfortNoise; 42 class DecoderDatabase; 43 class DtmfBuffer; 44 class DtmfToneGenerator; 45 class Expand; 46 class Merge; 47 class NackTracker; 48 class Normal; 49 class PacketBuffer; 50 class RedPayloadSplitter; 51 class PostDecodeVad; 52 class PreemptiveExpand; 53 class RandomVector; 54 class SyncBuffer; 55 class TimestampScaler; 56 struct AccelerateFactory; 57 struct DtmfEvent; 58 struct ExpandFactory; 59 struct PreemptiveExpandFactory; 60 61 class NetEqImpl : public webrtc::NetEq { 62 public: 63 enum class OutputType { 64 kNormalSpeech, 65 kPLC, 66 kCNG, 67 kPLCCNG, 68 kVadPassive, 69 kCodecPLC 70 }; 71 72 enum ErrorCodes { 73 kNoError = 0, 74 kOtherError, 75 kUnknownRtpPayloadType, 76 kDecoderNotFound, 77 kInvalidPointer, 78 kAccelerateError, 79 kPreemptiveExpandError, 80 kComfortNoiseErrorCode, 81 kDecoderErrorCode, 82 kOtherDecoderError, 83 kInvalidOperation, 84 kDtmfParsingError, 85 kDtmfInsertError, 86 kSampleUnderrun, 87 kDecodedTooMuch, 88 kRedundancySplitError, 89 kPacketBufferCorruption 90 }; 91 92 struct Dependencies { 93 // The constructor populates the Dependencies struct with the default 94 // implementations of the objects. They can all be replaced by the user 95 // before sending the struct to the NetEqImpl constructor. However, there 96 // are dependencies between some of the classes inside the struct, so 97 // swapping out one may make it necessary to re-create another one. 98 Dependencies(const NetEq::Config& config, 99 Clock* clock, 100 const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory, 101 const NetEqControllerFactory& controller_factory); 102 ~Dependencies(); 103 104 Clock* const clock; 105 std::unique_ptr<TickTimer> tick_timer; 106 std::unique_ptr<StatisticsCalculator> stats; 107 std::unique_ptr<DecoderDatabase> decoder_database; 108 std::unique_ptr<DtmfBuffer> dtmf_buffer; 109 std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator; 110 std::unique_ptr<PacketBuffer> packet_buffer; 111 std::unique_ptr<NetEqController> neteq_controller; 112 std::unique_ptr<RedPayloadSplitter> red_payload_splitter; 113 std::unique_ptr<TimestampScaler> timestamp_scaler; 114 std::unique_ptr<AccelerateFactory> accelerate_factory; 115 std::unique_ptr<ExpandFactory> expand_factory; 116 std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory; 117 }; 118 119 // Creates a new NetEqImpl object. 120 NetEqImpl(const NetEq::Config& config, 121 Dependencies&& deps, 122 bool create_components = true); 123 124 ~NetEqImpl() override; 125 126 NetEqImpl(const NetEqImpl&) = delete; 127 NetEqImpl& operator=(const NetEqImpl&) = delete; 128 129 // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure. 130 int InsertPacket(const RTPHeader& rtp_header, 131 rtc::ArrayView<const uint8_t> payload) override; 132 133 void InsertEmptyPacket(const RTPHeader& rtp_header) override; 134 135 int GetAudio( 136 AudioFrame* audio_frame, 137 bool* muted, 138 int* current_sample_rate_hz = nullptr, 139 absl::optional<Operation> action_override = absl::nullopt) override; 140 141 void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override; 142 143 bool RegisterPayloadType(int rtp_payload_type, 144 const SdpAudioFormat& audio_format) override; 145 146 // Removes `rtp_payload_type` from the codec database. Returns 0 on success, 147 // -1 on failure. 148 int RemovePayloadType(uint8_t rtp_payload_type) override; 149 150 void RemoveAllPayloadTypes() override; 151 152 bool SetMinimumDelay(int delay_ms) override; 153 154 bool SetMaximumDelay(int delay_ms) override; 155 156 bool SetBaseMinimumDelayMs(int delay_ms) override; 157 158 int GetBaseMinimumDelayMs() const override; 159 160 int TargetDelayMs() const override; 161 162 int FilteredCurrentDelayMs() const override; 163 164 // Writes the current network statistics to `stats`. The statistics are reset 165 // after the call. 166 int NetworkStatistics(NetEqNetworkStatistics* stats) override; 167 168 NetEqNetworkStatistics CurrentNetworkStatistics() const override; 169 170 NetEqLifetimeStatistics GetLifetimeStatistics() const override; 171 172 NetEqOperationsAndState GetOperationsAndState() const override; 173 174 // Enables post-decode VAD. When enabled, GetAudio() will return 175 // kOutputVADPassive when the signal contains no speech. 176 void EnableVad() override; 177 178 // Disables post-decode VAD. 179 void DisableVad() override; 180 181 absl::optional<uint32_t> GetPlayoutTimestamp() const override; 182 183 int last_output_sample_rate_hz() const override; 184 185 absl::optional<DecoderFormat> GetDecoderFormat( 186 int payload_type) const override; 187 188 // Flushes both the packet buffer and the sync buffer. 189 void FlushBuffers() override; 190 191 void EnableNack(size_t max_nack_list_size) override; 192 193 void DisableNack() override; 194 195 std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override; 196 197 int SyncBufferSizeMs() const override; 198 199 // This accessor method is only intended for testing purposes. 200 const SyncBuffer* sync_buffer_for_test() const; 201 Operation last_operation_for_test() const; 202 203 protected: 204 static const int kOutputSizeMs = 10; 205 static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz. 206 // TODO(hlundin): Provide a better value for kSyncBufferSize. 207 // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for 208 // calculating correlations of current frame against history. 209 static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48; 210 211 // Inserts a new packet into NetEq. This is used by the InsertPacket method 212 // above. Returns 0 on success, otherwise an error code. 213 // TODO(hlundin): Merge this with InsertPacket above? 214 int InsertPacketInternal(const RTPHeader& rtp_header, 215 rtc::ArrayView<const uint8_t> payload) 216 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 217 218 // Delivers 10 ms of audio data. The data is written to `audio_frame`. 219 // Returns 0 on success, otherwise an error code. 220 int GetAudioInternal(AudioFrame* audio_frame, 221 bool* muted, 222 absl::optional<Operation> action_override) 223 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 224 225 // Provides a decision to the GetAudioInternal method. The decision what to 226 // do is written to `operation`. Packets to decode are written to 227 // `packet_list`, and a DTMF event to play is written to `dtmf_event`. When 228 // DTMF should be played, `play_dtmf` is set to true by the method. 229 // Returns 0 on success, otherwise an error code. 230 int GetDecision(Operation* operation, 231 PacketList* packet_list, 232 DtmfEvent* dtmf_event, 233 bool* play_dtmf, 234 absl::optional<Operation> action_override) 235 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 236 237 // Decodes the speech packets in `packet_list`, and writes the results to 238 // `decoded_buffer`, which is allocated to hold `decoded_buffer_length` 239 // elements. The length of the decoded data is written to `decoded_length`. 240 // The speech type -- speech or (codec-internal) comfort noise -- is written 241 // to `speech_type`. If `packet_list` contains any SID frames for RFC 3389 242 // comfort noise, those are not decoded. 243 int Decode(PacketList* packet_list, 244 Operation* operation, 245 int* decoded_length, 246 AudioDecoder::SpeechType* speech_type) 247 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 248 249 // Sub-method to Decode(). Performs codec internal CNG. 250 int DecodeCng(AudioDecoder* decoder, 251 int* decoded_length, 252 AudioDecoder::SpeechType* speech_type) 253 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 254 255 // Sub-method to Decode(). Performs the actual decoding. 256 int DecodeLoop(PacketList* packet_list, 257 const Operation& operation, 258 AudioDecoder* decoder, 259 int* decoded_length, 260 AudioDecoder::SpeechType* speech_type) 261 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 262 263 // Sub-method which calls the Normal class to perform the normal operation. 264 void DoNormal(const int16_t* decoded_buffer, 265 size_t decoded_length, 266 AudioDecoder::SpeechType speech_type, 267 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 268 269 // Sub-method which calls the Merge class to perform the merge operation. 270 void DoMerge(int16_t* decoded_buffer, 271 size_t decoded_length, 272 AudioDecoder::SpeechType speech_type, 273 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 274 275 bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 276 277 // Sub-method which calls the Expand class to perform the expand operation. 278 int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 279 280 // Sub-method which calls the Accelerate class to perform the accelerate 281 // operation. 282 int DoAccelerate(int16_t* decoded_buffer, 283 size_t decoded_length, 284 AudioDecoder::SpeechType speech_type, 285 bool play_dtmf, 286 bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 287 288 // Sub-method which calls the PreemptiveExpand class to perform the 289 // preemtive expand operation. 290 int DoPreemptiveExpand(int16_t* decoded_buffer, 291 size_t decoded_length, 292 AudioDecoder::SpeechType speech_type, 293 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 294 295 // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort 296 // noise. `packet_list` can either contain one SID frame to update the 297 // noise parameters, or no payload at all, in which case the previously 298 // received parameters are used. 299 int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) 300 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 301 302 // Calls the audio decoder to generate codec-internal comfort noise when 303 // no packet was received. 304 void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) 305 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 306 307 // Calls the DtmfToneGenerator class to generate DTMF tones. 308 int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) 309 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 310 311 // Overdub DTMF on top of `output`. 312 int DtmfOverdub(const DtmfEvent& dtmf_event, 313 size_t num_channels, 314 int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 315 316 // Extracts packets from `packet_buffer_` to produce at least 317 // `required_samples` samples. The packets are inserted into `packet_list`. 318 // Returns the number of samples that the packets in the list will produce, or 319 // -1 in case of an error. 320 int ExtractPackets(size_t required_samples, PacketList* packet_list) 321 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 322 323 // Resets various variables and objects to new values based on the sample rate 324 // `fs_hz` and `channels` number audio channels. 325 void SetSampleRateAndChannels(int fs_hz, size_t channels) 326 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 327 328 // Returns the output type for the audio produced by the latest call to 329 // GetAudio(). 330 OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 331 332 // Updates Expand and Merge. 333 virtual void UpdatePlcComponents(int fs_hz, size_t channels) 334 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 335 336 NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const 337 RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 338 339 Clock* const clock_; 340 341 mutable Mutex mutex_; 342 const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_); 343 const std::unique_ptr<DecoderDatabase> decoder_database_ 344 RTC_GUARDED_BY(mutex_); 345 const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_); 346 const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_ 347 RTC_GUARDED_BY(mutex_); 348 const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_); 349 const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_ 350 RTC_GUARDED_BY(mutex_); 351 const std::unique_ptr<TimestampScaler> timestamp_scaler_ 352 RTC_GUARDED_BY(mutex_); 353 const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_); 354 const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_); 355 const std::unique_ptr<AccelerateFactory> accelerate_factory_ 356 RTC_GUARDED_BY(mutex_); 357 const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_ 358 RTC_GUARDED_BY(mutex_); 359 const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_); 360 361 std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_); 362 std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_); 363 std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_); 364 std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_); 365 std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_); 366 std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_); 367 std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_); 368 std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_); 369 std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_); 370 RandomVector random_vector_ RTC_GUARDED_BY(mutex_); 371 std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_); 372 int fs_hz_ RTC_GUARDED_BY(mutex_); 373 int fs_mult_ RTC_GUARDED_BY(mutex_); 374 int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_); 375 size_t output_size_samples_ RTC_GUARDED_BY(mutex_); 376 size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_); 377 Mode last_mode_ RTC_GUARDED_BY(mutex_); 378 Operation last_operation_ RTC_GUARDED_BY(mutex_); 379 size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_); 380 std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_); 381 uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_); 382 bool new_codec_ RTC_GUARDED_BY(mutex_); 383 uint32_t timestamp_ RTC_GUARDED_BY(mutex_); 384 bool reset_decoder_ RTC_GUARDED_BY(mutex_); 385 absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_); 386 absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_); 387 bool first_packet_ RTC_GUARDED_BY(mutex_); 388 bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_); 389 std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_); 390 bool nack_enabled_ RTC_GUARDED_BY(mutex_); 391 const bool enable_muted_state_ RTC_GUARDED_BY(mutex_); 392 AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) = 393 AudioFrame::kVadPassive; 394 std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_ 395 RTC_GUARDED_BY(mutex_); 396 std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_); 397 ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_); 398 ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_); 399 bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test. 400 rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_); 401 }; 402 403 } // namespace webrtc 404 #endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_ 405