1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef API_NETEQ_NETEQ_H_ 12 #define API_NETEQ_NETEQ_H_ 13 14 #include <stddef.h> // Provide access to size_t. 15 16 #include <map> 17 #include <string> 18 #include <vector> 19 20 #include "absl/types/optional.h" 21 #include "api/audio_codecs/audio_codec_pair_id.h" 22 #include "api/audio_codecs/audio_decoder.h" 23 #include "api/audio_codecs/audio_format.h" 24 #include "api/rtp_headers.h" 25 #include "api/scoped_refptr.h" 26 27 namespace webrtc { 28 29 // Forward declarations. 30 class AudioFrame; 31 class AudioDecoderFactory; 32 class Clock; 33 34 struct NetEqNetworkStatistics { 35 uint16_t current_buffer_size_ms; // Current jitter buffer size in ms. 36 uint16_t preferred_buffer_size_ms; // Target buffer size in ms. 37 uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky 38 // jitter; 0 otherwise. 39 uint16_t expand_rate; // Fraction (of original stream) of synthesized 40 // audio inserted through expansion (in Q14). 41 uint16_t speech_expand_rate; // Fraction (of original stream) of synthesized 42 // speech inserted through expansion (in Q14). 43 uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive 44 // expansion (in Q14). 45 uint16_t accelerate_rate; // Fraction of data removed through acceleration 46 // (in Q14). 47 uint16_t secondary_decoded_rate; // Fraction of data coming from FEC/RED 48 // decoding (in Q14). 49 uint16_t secondary_discarded_rate; // Fraction of discarded FEC/RED data (in 50 // Q14). 51 // Statistics for packet waiting times, i.e., the time between a packet 52 // arrives until it is decoded. 53 int mean_waiting_time_ms; 54 int median_waiting_time_ms; 55 int min_waiting_time_ms; 56 int max_waiting_time_ms; 57 }; 58 59 // NetEq statistics that persist over the lifetime of the class. 60 // These metrics are never reset. 61 struct NetEqLifetimeStatistics { 62 // Stats below correspond to similarly-named fields in the WebRTC stats spec. 63 // https://w3c.github.io/webrtc-stats/#dom-rtcinboundrtpstreamstats 64 uint64_t total_samples_received = 0; 65 uint64_t concealed_samples = 0; 66 uint64_t concealment_events = 0; 67 uint64_t jitter_buffer_delay_ms = 0; 68 uint64_t jitter_buffer_emitted_count = 0; 69 uint64_t jitter_buffer_target_delay_ms = 0; 70 uint64_t jitter_buffer_minimum_delay_ms = 0; 71 uint64_t inserted_samples_for_deceleration = 0; 72 uint64_t removed_samples_for_acceleration = 0; 73 uint64_t silent_concealed_samples = 0; 74 uint64_t fec_packets_received = 0; 75 uint64_t fec_packets_discarded = 0; 76 uint64_t packets_discarded = 0; 77 // Below stats are not part of the spec. 78 uint64_t delayed_packet_outage_samples = 0; 79 // This is sum of relative packet arrival delays of received packets so far. 80 // Since end-to-end delay of a packet is difficult to measure and is not 81 // necessarily useful for measuring jitter buffer performance, we report a 82 // relative packet arrival delay. The relative packet arrival delay of a 83 // packet is defined as the arrival delay compared to the first packet 84 // received, given that it had zero delay. To avoid clock drift, the "first" 85 // packet can be made dynamic. 86 uint64_t relative_packet_arrival_delay_ms = 0; 87 uint64_t jitter_buffer_packets_received = 0; 88 // An interruption is a loss-concealment event lasting at least 150 ms. The 89 // two stats below count the number os such events and the total duration of 90 // these events. 91 int32_t interruption_count = 0; 92 int32_t total_interruption_duration_ms = 0; 93 // Total number of comfort noise samples generated during DTX. 94 uint64_t generated_noise_samples = 0; 95 }; 96 97 // Metrics that describe the operations performed in NetEq, and the internal 98 // state. 99 struct NetEqOperationsAndState { 100 // These sample counters are cumulative, and don't reset. As a reference, the 101 // total number of output samples can be found in 102 // NetEqLifetimeStatistics::total_samples_received. 103 uint64_t preemptive_samples = 0; 104 uint64_t accelerate_samples = 0; 105 // Count of the number of buffer flushes. 106 uint64_t packet_buffer_flushes = 0; 107 // The statistics below are not cumulative. 108 // The waiting time of the last decoded packet. 109 uint64_t last_waiting_time_ms = 0; 110 // The sum of the packet and jitter buffer size in ms. 111 uint64_t current_buffer_size_ms = 0; 112 // The current frame size in ms. 113 uint64_t current_frame_size_ms = 0; 114 // Flag to indicate that the next packet is available. 115 bool next_packet_available = false; 116 }; 117 118 // This is the interface class for NetEq. 119 class NetEq { 120 public: 121 struct Config { 122 Config(); 123 Config(const Config&); 124 Config(Config&&); 125 ~Config(); 126 Config& operator=(const Config&); 127 Config& operator=(Config&&); 128 129 std::string ToString() const; 130 131 int sample_rate_hz = 48000; // Initial value. Will change with input data. 132 bool enable_post_decode_vad = false; 133 size_t max_packets_in_buffer = 200; 134 int max_delay_ms = 0; 135 int min_delay_ms = 0; 136 bool enable_fast_accelerate = false; 137 bool enable_muted_state = false; 138 bool enable_rtx_handling = false; 139 absl::optional<AudioCodecPairId> codec_pair_id; 140 bool for_test_no_time_stretching = false; // Use only for testing. 141 }; 142 143 enum ReturnCodes { kOK = 0, kFail = -1 }; 144 145 enum class Operation { 146 kNormal, 147 kMerge, 148 kExpand, 149 kAccelerate, 150 kFastAccelerate, 151 kPreemptiveExpand, 152 kRfc3389Cng, 153 kRfc3389CngNoPacket, 154 kCodecInternalCng, 155 kDtmf, 156 kUndefined, 157 }; 158 159 enum class Mode { 160 kNormal, 161 kExpand, 162 kMerge, 163 kAccelerateSuccess, 164 kAccelerateLowEnergy, 165 kAccelerateFail, 166 kPreemptiveExpandSuccess, 167 kPreemptiveExpandLowEnergy, 168 kPreemptiveExpandFail, 169 kRfc3389Cng, 170 kCodecInternalCng, 171 kCodecPlc, 172 kDtmf, 173 kError, 174 kUndefined, 175 }; 176 177 // Return type for GetDecoderFormat. 178 struct DecoderFormat { 179 int sample_rate_hz; 180 int num_channels; 181 SdpAudioFormat sdp_format; 182 }; 183 ~NetEq()184 virtual ~NetEq() {} 185 186 // Inserts a new packet into NetEq. 187 // Returns 0 on success, -1 on failure. 188 virtual int InsertPacket(const RTPHeader& rtp_header, 189 rtc::ArrayView<const uint8_t> payload) = 0; 190 191 // Lets NetEq know that a packet arrived with an empty payload. This typically 192 // happens when empty packets are used for probing the network channel, and 193 // these packets use RTP sequence numbers from the same series as the actual 194 // audio packets. 195 virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0; 196 197 // Instructs NetEq to deliver 10 ms of audio data. The data is written to 198 // `audio_frame`. All data in `audio_frame` is wiped; `data_`, `speech_type_`, 199 // `num_channels_`, `sample_rate_hz_`, `samples_per_channel_`, and 200 // `vad_activity_` are updated upon success. If an error is returned, some 201 // fields may not have been updated, or may contain inconsistent values. 202 // If muted state is enabled (through Config::enable_muted_state), `muted` 203 // may be set to true after a prolonged expand period. When this happens, the 204 // `data_` in `audio_frame` is not written, but should be interpreted as being 205 // all zeros. For testing purposes, an override can be supplied in the 206 // `action_override` argument, which will cause NetEq to take this action 207 // next, instead of the action it would normally choose. An optional output 208 // argument for fetching the current sample rate can be provided, which 209 // will return the same value as last_output_sample_rate_hz() but will avoid 210 // additional synchronization. 211 // Returns kOK on success, or kFail in case of an error. 212 virtual int GetAudio( 213 AudioFrame* audio_frame, 214 bool* muted, 215 int* current_sample_rate_hz = nullptr, 216 absl::optional<Operation> action_override = absl::nullopt) = 0; 217 218 // Replaces the current set of decoders with the given one. 219 virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0; 220 221 // Associates `rtp_payload_type` with the given codec, which NetEq will 222 // instantiate when it needs it. Returns true iff successful. 223 virtual bool RegisterPayloadType(int rtp_payload_type, 224 const SdpAudioFormat& audio_format) = 0; 225 226 // Removes `rtp_payload_type` from the codec database. Returns 0 on success, 227 // -1 on failure. Removing a payload type that is not registered is ok and 228 // will not result in an error. 229 virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0; 230 231 // Removes all payload types from the codec database. 232 virtual void RemoveAllPayloadTypes() = 0; 233 234 // Sets a minimum delay in millisecond for packet buffer. The minimum is 235 // maintained unless a higher latency is dictated by channel condition. 236 // Returns true if the minimum is successfully applied, otherwise false is 237 // returned. 238 virtual bool SetMinimumDelay(int delay_ms) = 0; 239 240 // Sets a maximum delay in milliseconds for packet buffer. The latency will 241 // not exceed the given value, even required delay (given the channel 242 // conditions) is higher. Calling this method has the same effect as setting 243 // the `max_delay_ms` value in the NetEq::Config struct. 244 virtual bool SetMaximumDelay(int delay_ms) = 0; 245 246 // Sets a base minimum delay in milliseconds for packet buffer. The minimum 247 // delay which is set via `SetMinimumDelay` can't be lower than base minimum 248 // delay. Calling this method is similar to setting the `min_delay_ms` value 249 // in the NetEq::Config struct. Returns true if the base minimum is 250 // successfully applied, otherwise false is returned. 251 virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0; 252 253 // Returns current value of base minimum delay in milliseconds. 254 virtual int GetBaseMinimumDelayMs() const = 0; 255 256 // Returns the current target delay in ms. This includes any extra delay 257 // requested through SetMinimumDelay. 258 virtual int TargetDelayMs() const = 0; 259 260 // Returns the current total delay (packet buffer and sync buffer) in ms, 261 // with smoothing applied to even out short-time fluctuations due to jitter. 262 // The packet buffer part of the delay is not updated during DTX/CNG periods. 263 virtual int FilteredCurrentDelayMs() const = 0; 264 265 // Writes the current network statistics to `stats`. The statistics are reset 266 // after the call. 267 virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0; 268 269 // Current values only, not resetting any state. 270 virtual NetEqNetworkStatistics CurrentNetworkStatistics() const = 0; 271 272 // Returns a copy of this class's lifetime statistics. These statistics are 273 // never reset. 274 virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0; 275 276 // Returns statistics about the performed operations and internal state. These 277 // statistics are never reset. 278 virtual NetEqOperationsAndState GetOperationsAndState() const = 0; 279 280 // Enables post-decode VAD. When enabled, GetAudio() will return 281 // kOutputVADPassive when the signal contains no speech. 282 virtual void EnableVad() = 0; 283 284 // Disables post-decode VAD. 285 virtual void DisableVad() = 0; 286 287 // Returns the RTP timestamp for the last sample delivered by GetAudio(). 288 // The return value will be empty if no valid timestamp is available. 289 virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0; 290 291 // Returns the sample rate in Hz of the audio produced in the last GetAudio 292 // call. If GetAudio has not been called yet, the configured sample rate 293 // (Config::sample_rate_hz) is returned. 294 virtual int last_output_sample_rate_hz() const = 0; 295 296 // Returns the decoder info for the given payload type. Returns empty if no 297 // such payload type was registered. 298 virtual absl::optional<DecoderFormat> GetDecoderFormat( 299 int payload_type) const = 0; 300 301 // Flushes both the packet buffer and the sync buffer. 302 virtual void FlushBuffers() = 0; 303 304 // Enables NACK and sets the maximum size of the NACK list, which should be 305 // positive and no larger than Nack::kNackListSizeLimit. If NACK is already 306 // enabled then the maximum NACK list size is modified accordingly. 307 virtual void EnableNack(size_t max_nack_list_size) = 0; 308 309 virtual void DisableNack() = 0; 310 311 // Returns a list of RTP sequence numbers corresponding to packets to be 312 // retransmitted, given an estimate of the round-trip time in milliseconds. 313 virtual std::vector<uint16_t> GetNackList( 314 int64_t round_trip_time_ms) const = 0; 315 316 // Returns the length of the audio yet to play in the sync buffer. 317 // Mainly intended for testing. 318 virtual int SyncBufferSizeMs() const = 0; 319 }; 320 321 } // namespace webrtc 322 #endif // API_NETEQ_NETEQ_H_ 323