1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h"
12
13 #include <string.h>
14
15 #include <memory>
16 #include <utility>
17
18 #include "absl/strings/match.h"
19 #include "absl/types/optional.h"
20 #include "api/audio_codecs/audio_format.h"
21 #include "api/rtp_headers.h"
22 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
24 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
27 #include "modules/rtp_rtcp/source/rtp_packet.h"
28 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
29 #include "modules/rtp_rtcp/source/time_util.h"
30 #include "rtc_base/checks.h"
31 #include "rtc_base/logging.h"
32 #include "rtc_base/trace_event.h"
33 #include "system_wrappers/include/ntp_time.h"
34
35 namespace webrtc {
36
37 namespace {
FrameTypeToString(AudioFrameType frame_type)38 [[maybe_unused]] const char* FrameTypeToString(AudioFrameType frame_type) {
39 switch (frame_type) {
40 case AudioFrameType::kEmptyFrame:
41 return "empty";
42 case AudioFrameType::kAudioFrameSpeech:
43 return "audio_speech";
44 case AudioFrameType::kAudioFrameCN:
45 return "audio_cn";
46 }
47 RTC_CHECK_NOTREACHED();
48 }
49
50 constexpr char kIncludeCaptureClockOffset[] =
51 "WebRTC-IncludeCaptureClockOffset";
52
53 } // namespace
54
RTPSenderAudio(Clock * clock,RTPSender * rtp_sender)55 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
56 : clock_(clock),
57 rtp_sender_(rtp_sender),
58 absolute_capture_time_sender_(clock),
59 include_capture_clock_offset_(
60 !absl::StartsWith(field_trials_.Lookup(kIncludeCaptureClockOffset),
61 "Disabled")) {
62 RTC_DCHECK(clock_);
63 }
64
~RTPSenderAudio()65 RTPSenderAudio::~RTPSenderAudio() {}
66
RegisterAudioPayload(absl::string_view payload_name,const int8_t payload_type,const uint32_t frequency,const size_t channels,const uint32_t rate)67 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name,
68 const int8_t payload_type,
69 const uint32_t frequency,
70 const size_t channels,
71 const uint32_t rate) {
72 if (absl::EqualsIgnoreCase(payload_name, "cn")) {
73 MutexLock lock(&send_audio_mutex_);
74 // we can have multiple CNG payload types
75 switch (frequency) {
76 case 8000:
77 cngnb_payload_type_ = payload_type;
78 break;
79 case 16000:
80 cngwb_payload_type_ = payload_type;
81 break;
82 case 32000:
83 cngswb_payload_type_ = payload_type;
84 break;
85 case 48000:
86 cngfb_payload_type_ = payload_type;
87 break;
88 default:
89 return -1;
90 }
91 } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) {
92 MutexLock lock(&send_audio_mutex_);
93 // Don't add it to the list
94 // we dont want to allow send with a DTMF payloadtype
95 dtmf_payload_type_ = payload_type;
96 dtmf_payload_freq_ = frequency;
97 return 0;
98 } else if (payload_name == "audio") {
99 MutexLock lock(&send_audio_mutex_);
100 encoder_rtp_timestamp_frequency_ = frequency;
101 return 0;
102 }
103 return 0;
104 }
105
MarkerBit(AudioFrameType frame_type,int8_t payload_type)106 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) {
107 MutexLock lock(&send_audio_mutex_);
108 // for audio true for first packet in a speech burst
109 bool marker_bit = false;
110 if (last_payload_type_ != payload_type) {
111 if (payload_type != -1 && (cngnb_payload_type_ == payload_type ||
112 cngwb_payload_type_ == payload_type ||
113 cngswb_payload_type_ == payload_type ||
114 cngfb_payload_type_ == payload_type)) {
115 // Only set a marker bit when we change payload type to a non CNG
116 return false;
117 }
118
119 // payload_type differ
120 if (last_payload_type_ == -1) {
121 if (frame_type != AudioFrameType::kAudioFrameCN) {
122 // first packet and NOT CNG
123 return true;
124 } else {
125 // first packet and CNG
126 inband_vad_active_ = true;
127 return false;
128 }
129 }
130
131 // not first packet AND
132 // not CNG AND
133 // payload_type changed
134
135 // set a marker bit when we change payload type
136 marker_bit = true;
137 }
138
139 // For G.723 G.729, AMR etc we can have inband VAD
140 if (frame_type == AudioFrameType::kAudioFrameCN) {
141 inband_vad_active_ = true;
142 } else if (inband_vad_active_) {
143 inband_vad_active_ = false;
144 marker_bit = true;
145 }
146 return marker_bit;
147 }
148
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size)149 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
150 int8_t payload_type,
151 uint32_t rtp_timestamp,
152 const uint8_t* payload_data,
153 size_t payload_size) {
154 return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
155 payload_size,
156 // TODO(bugs.webrtc.org/10739) replace once plumbed.
157 /*absolute_capture_timestamp_ms=*/-1);
158 }
159
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size,int64_t absolute_capture_timestamp_ms)160 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
161 int8_t payload_type,
162 uint32_t rtp_timestamp,
163 const uint8_t* payload_data,
164 size_t payload_size,
165 int64_t absolute_capture_timestamp_ms) {
166 TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
167 FrameTypeToString(frame_type));
168
169 // From RFC 4733:
170 // A source has wide latitude as to how often it sends event updates. A
171 // natural interval is the spacing between non-event audio packets. [...]
172 // Alternatively, a source MAY decide to use a different spacing for event
173 // updates, with a value of 50 ms RECOMMENDED.
174 constexpr int kDtmfIntervalTimeMs = 50;
175 uint8_t audio_level_dbov = 0;
176 uint32_t dtmf_payload_freq = 0;
177 absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
178 {
179 MutexLock lock(&send_audio_mutex_);
180 audio_level_dbov = audio_level_dbov_;
181 dtmf_payload_freq = dtmf_payload_freq_;
182 encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
183 }
184
185 // Check if we have pending DTMFs to send
186 if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) {
187 if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) >
188 kDtmfIntervalTimeMs) {
189 // New tone to play
190 dtmf_timestamp_ = rtp_timestamp;
191 if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) {
192 dtmf_event_first_packet_sent_ = false;
193 dtmf_length_samples_ =
194 dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000);
195 dtmf_event_is_on_ = true;
196 }
197 }
198 }
199
200 // A source MAY send events and coded audio packets for the same time
201 // but we don't support it
202 if (dtmf_event_is_on_) {
203 if (frame_type == AudioFrameType::kEmptyFrame) {
204 // kEmptyFrame is used to drive the DTMF when in CN mode
205 // it can be triggered more frequently than we want to send the
206 // DTMF packets.
207 const unsigned int dtmf_interval_time_rtp =
208 dtmf_payload_freq * kDtmfIntervalTimeMs / 1000;
209 if ((rtp_timestamp - dtmf_timestamp_last_sent_) <
210 dtmf_interval_time_rtp) {
211 // not time to send yet
212 return true;
213 }
214 }
215 dtmf_timestamp_last_sent_ = rtp_timestamp;
216 uint32_t dtmf_duration_samples = rtp_timestamp - dtmf_timestamp_;
217 bool ended = false;
218 bool send = true;
219
220 if (dtmf_length_samples_ > dtmf_duration_samples) {
221 if (dtmf_duration_samples <= 0) {
222 // Skip send packet at start, since we shouldn't use duration 0
223 send = false;
224 }
225 } else {
226 ended = true;
227 dtmf_event_is_on_ = false;
228 dtmf_time_last_sent_ = clock_->TimeInMilliseconds();
229 }
230 if (send) {
231 if (dtmf_duration_samples > 0xffff) {
232 // RFC 4733 2.5.2.3 Long-Duration Events
233 SendTelephoneEventPacket(ended, dtmf_timestamp_,
234 static_cast<uint16_t>(0xffff), false);
235
236 // set new timestap for this segment
237 dtmf_timestamp_ = rtp_timestamp;
238 dtmf_duration_samples -= 0xffff;
239 dtmf_length_samples_ -= 0xffff;
240
241 return SendTelephoneEventPacket(
242 ended, dtmf_timestamp_,
243 static_cast<uint16_t>(dtmf_duration_samples), false);
244 } else {
245 if (!SendTelephoneEventPacket(ended, dtmf_timestamp_,
246 dtmf_duration_samples,
247 !dtmf_event_first_packet_sent_)) {
248 return false;
249 }
250 dtmf_event_first_packet_sent_ = true;
251 return true;
252 }
253 }
254 return true;
255 }
256 if (payload_size == 0 || payload_data == NULL) {
257 if (frame_type == AudioFrameType::kEmptyFrame) {
258 // we don't send empty audio RTP packets
259 // no error since we use it to either drive DTMF when we use VAD, or
260 // enter DTX.
261 return true;
262 }
263 return false;
264 }
265
266 std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
267 packet->SetMarker(MarkerBit(frame_type, payload_type));
268 packet->SetPayloadType(payload_type);
269 packet->SetTimestamp(rtp_timestamp);
270 packet->set_capture_time(clock_->CurrentTime());
271 // Update audio level extension, if included.
272 packet->SetExtension<AudioLevel>(
273 frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
274
275 if (absolute_capture_timestamp_ms > 0) {
276 // Send absolute capture time periodically in order to optimize and save
277 // network traffic. Missing absolute capture times can be interpolated on
278 // the receiving end if sending intervals are small enough.
279 auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
280 AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
281 packet->Timestamp(),
282 // Replace missing value with 0 (invalid frequency), this will trigger
283 // absolute capture time sending.
284 encoder_rtp_timestamp_frequency.value_or(0),
285 Int64MsToUQ32x32(clock_->ConvertTimestampToNtpTimeInMilliseconds(
286 absolute_capture_timestamp_ms)),
287 /*estimated_capture_clock_offset=*/
288 include_capture_clock_offset_ ? absl::make_optional(0) : absl::nullopt);
289 if (absolute_capture_time) {
290 // It also checks that extension was registered during SDP negotiation. If
291 // not then setter won't do anything.
292 packet->SetExtension<AbsoluteCaptureTimeExtension>(
293 *absolute_capture_time);
294 }
295 }
296
297 uint8_t* payload = packet->AllocatePayload(payload_size);
298 if (!payload) // Too large payload buffer.
299 return false;
300 memcpy(payload, payload_data, payload_size);
301
302 {
303 MutexLock lock(&send_audio_mutex_);
304 last_payload_type_ = payload_type;
305 }
306 TRACE_EVENT_ASYNC_END2("webrtc", "Audio", rtp_timestamp, "timestamp",
307 packet->Timestamp(), "seqnum",
308 packet->SequenceNumber());
309 packet->set_packet_type(RtpPacketMediaType::kAudio);
310 packet->set_allow_retransmission(true);
311 bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
312 if (first_packet_sent_()) {
313 RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
314 }
315 return send_result;
316 }
317
318 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dbov)319 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) {
320 if (level_dbov > 127) {
321 return -1;
322 }
323 MutexLock lock(&send_audio_mutex_);
324 audio_level_dbov_ = level_dbov;
325 return 0;
326 }
327
328 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)329 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
330 uint16_t time_ms,
331 uint8_t level) {
332 DtmfQueue::Event event;
333 {
334 MutexLock lock(&send_audio_mutex_);
335 if (dtmf_payload_type_ < 0) {
336 // TelephoneEvent payloadtype not configured
337 return -1;
338 }
339 event.payload_type = dtmf_payload_type_;
340 }
341 event.key = key;
342 event.duration_ms = time_ms;
343 event.level = level;
344 return dtmf_queue_.AddDtmf(event) ? 0 : -1;
345 }
346
SendTelephoneEventPacket(bool ended,uint32_t dtmf_timestamp,uint16_t duration,bool marker_bit)347 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended,
348 uint32_t dtmf_timestamp,
349 uint16_t duration,
350 bool marker_bit) {
351 uint8_t send_count = 1;
352 bool result = true;
353
354 if (ended) {
355 // resend last packet in an event 3 times
356 send_count = 3;
357 }
358 do {
359 // Send DTMF data.
360 constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
361 constexpr size_t kDtmfSize = 4;
362 std::unique_ptr<RtpPacketToSend> packet(
363 new RtpPacketToSend(kNoExtensions, kRtpHeaderSize + kDtmfSize));
364 packet->SetPayloadType(dtmf_current_event_.payload_type);
365 packet->SetMarker(marker_bit);
366 packet->SetSsrc(rtp_sender_->SSRC());
367 packet->SetTimestamp(dtmf_timestamp);
368 packet->set_capture_time(clock_->CurrentTime());
369
370 // Create DTMF data.
371 uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize);
372 RTC_DCHECK(dtmfbuffer);
373 /* From RFC 2833:
374 0 1 2 3
375 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
376 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
377 | event |E|R| volume | duration |
378 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
379 */
380 // R bit always cleared
381 uint8_t R = 0x00;
382 uint8_t volume = dtmf_current_event_.level;
383
384 // First packet un-ended
385 uint8_t E = ended ? 0x80 : 0x00;
386
387 // First byte is Event number, equals key number
388 dtmfbuffer[0] = dtmf_current_event_.key;
389 dtmfbuffer[1] = E | R | volume;
390 ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration);
391
392 packet->set_packet_type(RtpPacketMediaType::kAudio);
393 packet->set_allow_retransmission(true);
394 result = rtp_sender_->SendToNetwork(std::move(packet));
395 send_count--;
396 } while (send_count > 0 && result);
397
398 return result;
399 }
400 } // namespace webrtc
401