xref: /aosp_15_r20/external/webrtc/modules/rtp_rtcp/source/rtp_sender_audio.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_sender_audio.h"
12 
13 #include <string.h>
14 
15 #include <memory>
16 #include <utility>
17 
18 #include "absl/strings/match.h"
19 #include "absl/types/optional.h"
20 #include "api/audio_codecs/audio_format.h"
21 #include "api/rtp_headers.h"
22 #include "modules/audio_coding/include/audio_coding_module_typedefs.h"
23 #include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
24 #include "modules/rtp_rtcp/source/absolute_capture_time_sender.h"
25 #include "modules/rtp_rtcp/source/byte_io.h"
26 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
27 #include "modules/rtp_rtcp/source/rtp_packet.h"
28 #include "modules/rtp_rtcp/source/rtp_packet_to_send.h"
29 #include "modules/rtp_rtcp/source/time_util.h"
30 #include "rtc_base/checks.h"
31 #include "rtc_base/logging.h"
32 #include "rtc_base/trace_event.h"
33 #include "system_wrappers/include/ntp_time.h"
34 
35 namespace webrtc {
36 
37 namespace {
FrameTypeToString(AudioFrameType frame_type)38 [[maybe_unused]] const char* FrameTypeToString(AudioFrameType frame_type) {
39   switch (frame_type) {
40     case AudioFrameType::kEmptyFrame:
41       return "empty";
42     case AudioFrameType::kAudioFrameSpeech:
43       return "audio_speech";
44     case AudioFrameType::kAudioFrameCN:
45       return "audio_cn";
46   }
47   RTC_CHECK_NOTREACHED();
48 }
49 
50 constexpr char kIncludeCaptureClockOffset[] =
51     "WebRTC-IncludeCaptureClockOffset";
52 
53 }  // namespace
54 
RTPSenderAudio(Clock * clock,RTPSender * rtp_sender)55 RTPSenderAudio::RTPSenderAudio(Clock* clock, RTPSender* rtp_sender)
56     : clock_(clock),
57       rtp_sender_(rtp_sender),
58       absolute_capture_time_sender_(clock),
59       include_capture_clock_offset_(
60           !absl::StartsWith(field_trials_.Lookup(kIncludeCaptureClockOffset),
61                             "Disabled")) {
62   RTC_DCHECK(clock_);
63 }
64 
~RTPSenderAudio()65 RTPSenderAudio::~RTPSenderAudio() {}
66 
RegisterAudioPayload(absl::string_view payload_name,const int8_t payload_type,const uint32_t frequency,const size_t channels,const uint32_t rate)67 int32_t RTPSenderAudio::RegisterAudioPayload(absl::string_view payload_name,
68                                              const int8_t payload_type,
69                                              const uint32_t frequency,
70                                              const size_t channels,
71                                              const uint32_t rate) {
72   if (absl::EqualsIgnoreCase(payload_name, "cn")) {
73     MutexLock lock(&send_audio_mutex_);
74     //  we can have multiple CNG payload types
75     switch (frequency) {
76       case 8000:
77         cngnb_payload_type_ = payload_type;
78         break;
79       case 16000:
80         cngwb_payload_type_ = payload_type;
81         break;
82       case 32000:
83         cngswb_payload_type_ = payload_type;
84         break;
85       case 48000:
86         cngfb_payload_type_ = payload_type;
87         break;
88       default:
89         return -1;
90     }
91   } else if (absl::EqualsIgnoreCase(payload_name, "telephone-event")) {
92     MutexLock lock(&send_audio_mutex_);
93     // Don't add it to the list
94     // we dont want to allow send with a DTMF payloadtype
95     dtmf_payload_type_ = payload_type;
96     dtmf_payload_freq_ = frequency;
97     return 0;
98   } else if (payload_name == "audio") {
99     MutexLock lock(&send_audio_mutex_);
100     encoder_rtp_timestamp_frequency_ = frequency;
101     return 0;
102   }
103   return 0;
104 }
105 
MarkerBit(AudioFrameType frame_type,int8_t payload_type)106 bool RTPSenderAudio::MarkerBit(AudioFrameType frame_type, int8_t payload_type) {
107   MutexLock lock(&send_audio_mutex_);
108   // for audio true for first packet in a speech burst
109   bool marker_bit = false;
110   if (last_payload_type_ != payload_type) {
111     if (payload_type != -1 && (cngnb_payload_type_ == payload_type ||
112                                cngwb_payload_type_ == payload_type ||
113                                cngswb_payload_type_ == payload_type ||
114                                cngfb_payload_type_ == payload_type)) {
115       // Only set a marker bit when we change payload type to a non CNG
116       return false;
117     }
118 
119     // payload_type differ
120     if (last_payload_type_ == -1) {
121       if (frame_type != AudioFrameType::kAudioFrameCN) {
122         // first packet and NOT CNG
123         return true;
124       } else {
125         // first packet and CNG
126         inband_vad_active_ = true;
127         return false;
128       }
129     }
130 
131     // not first packet AND
132     // not CNG AND
133     // payload_type changed
134 
135     // set a marker bit when we change payload type
136     marker_bit = true;
137   }
138 
139   // For G.723 G.729, AMR etc we can have inband VAD
140   if (frame_type == AudioFrameType::kAudioFrameCN) {
141     inband_vad_active_ = true;
142   } else if (inband_vad_active_) {
143     inband_vad_active_ = false;
144     marker_bit = true;
145   }
146   return marker_bit;
147 }
148 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size)149 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
150                                int8_t payload_type,
151                                uint32_t rtp_timestamp,
152                                const uint8_t* payload_data,
153                                size_t payload_size) {
154   return SendAudio(frame_type, payload_type, rtp_timestamp, payload_data,
155                    payload_size,
156                    // TODO(bugs.webrtc.org/10739) replace once plumbed.
157                    /*absolute_capture_timestamp_ms=*/-1);
158 }
159 
SendAudio(AudioFrameType frame_type,int8_t payload_type,uint32_t rtp_timestamp,const uint8_t * payload_data,size_t payload_size,int64_t absolute_capture_timestamp_ms)160 bool RTPSenderAudio::SendAudio(AudioFrameType frame_type,
161                                int8_t payload_type,
162                                uint32_t rtp_timestamp,
163                                const uint8_t* payload_data,
164                                size_t payload_size,
165                                int64_t absolute_capture_timestamp_ms) {
166   TRACE_EVENT_ASYNC_STEP1("webrtc", "Audio", rtp_timestamp, "Send", "type",
167                           FrameTypeToString(frame_type));
168 
169   // From RFC 4733:
170   // A source has wide latitude as to how often it sends event updates. A
171   // natural interval is the spacing between non-event audio packets. [...]
172   // Alternatively, a source MAY decide to use a different spacing for event
173   // updates, with a value of 50 ms RECOMMENDED.
174   constexpr int kDtmfIntervalTimeMs = 50;
175   uint8_t audio_level_dbov = 0;
176   uint32_t dtmf_payload_freq = 0;
177   absl::optional<uint32_t> encoder_rtp_timestamp_frequency;
178   {
179     MutexLock lock(&send_audio_mutex_);
180     audio_level_dbov = audio_level_dbov_;
181     dtmf_payload_freq = dtmf_payload_freq_;
182     encoder_rtp_timestamp_frequency = encoder_rtp_timestamp_frequency_;
183   }
184 
185   // Check if we have pending DTMFs to send
186   if (!dtmf_event_is_on_ && dtmf_queue_.PendingDtmf()) {
187     if ((clock_->TimeInMilliseconds() - dtmf_time_last_sent_) >
188         kDtmfIntervalTimeMs) {
189       // New tone to play
190       dtmf_timestamp_ = rtp_timestamp;
191       if (dtmf_queue_.NextDtmf(&dtmf_current_event_)) {
192         dtmf_event_first_packet_sent_ = false;
193         dtmf_length_samples_ =
194             dtmf_current_event_.duration_ms * (dtmf_payload_freq / 1000);
195         dtmf_event_is_on_ = true;
196       }
197     }
198   }
199 
200   // A source MAY send events and coded audio packets for the same time
201   // but we don't support it
202   if (dtmf_event_is_on_) {
203     if (frame_type == AudioFrameType::kEmptyFrame) {
204       // kEmptyFrame is used to drive the DTMF when in CN mode
205       // it can be triggered more frequently than we want to send the
206       // DTMF packets.
207       const unsigned int dtmf_interval_time_rtp =
208           dtmf_payload_freq * kDtmfIntervalTimeMs / 1000;
209       if ((rtp_timestamp - dtmf_timestamp_last_sent_) <
210           dtmf_interval_time_rtp) {
211         // not time to send yet
212         return true;
213       }
214     }
215     dtmf_timestamp_last_sent_ = rtp_timestamp;
216     uint32_t dtmf_duration_samples = rtp_timestamp - dtmf_timestamp_;
217     bool ended = false;
218     bool send = true;
219 
220     if (dtmf_length_samples_ > dtmf_duration_samples) {
221       if (dtmf_duration_samples <= 0) {
222         // Skip send packet at start, since we shouldn't use duration 0
223         send = false;
224       }
225     } else {
226       ended = true;
227       dtmf_event_is_on_ = false;
228       dtmf_time_last_sent_ = clock_->TimeInMilliseconds();
229     }
230     if (send) {
231       if (dtmf_duration_samples > 0xffff) {
232         // RFC 4733 2.5.2.3 Long-Duration Events
233         SendTelephoneEventPacket(ended, dtmf_timestamp_,
234                                  static_cast<uint16_t>(0xffff), false);
235 
236         // set new timestap for this segment
237         dtmf_timestamp_ = rtp_timestamp;
238         dtmf_duration_samples -= 0xffff;
239         dtmf_length_samples_ -= 0xffff;
240 
241         return SendTelephoneEventPacket(
242             ended, dtmf_timestamp_,
243             static_cast<uint16_t>(dtmf_duration_samples), false);
244       } else {
245         if (!SendTelephoneEventPacket(ended, dtmf_timestamp_,
246                                       dtmf_duration_samples,
247                                       !dtmf_event_first_packet_sent_)) {
248           return false;
249         }
250         dtmf_event_first_packet_sent_ = true;
251         return true;
252       }
253     }
254     return true;
255   }
256   if (payload_size == 0 || payload_data == NULL) {
257     if (frame_type == AudioFrameType::kEmptyFrame) {
258       // we don't send empty audio RTP packets
259       // no error since we use it to either drive DTMF when we use VAD, or
260       // enter DTX.
261       return true;
262     }
263     return false;
264   }
265 
266   std::unique_ptr<RtpPacketToSend> packet = rtp_sender_->AllocatePacket();
267   packet->SetMarker(MarkerBit(frame_type, payload_type));
268   packet->SetPayloadType(payload_type);
269   packet->SetTimestamp(rtp_timestamp);
270   packet->set_capture_time(clock_->CurrentTime());
271   // Update audio level extension, if included.
272   packet->SetExtension<AudioLevel>(
273       frame_type == AudioFrameType::kAudioFrameSpeech, audio_level_dbov);
274 
275   if (absolute_capture_timestamp_ms > 0) {
276     // Send absolute capture time periodically in order to optimize and save
277     // network traffic. Missing absolute capture times can be interpolated on
278     // the receiving end if sending intervals are small enough.
279     auto absolute_capture_time = absolute_capture_time_sender_.OnSendPacket(
280         AbsoluteCaptureTimeSender::GetSource(packet->Ssrc(), packet->Csrcs()),
281         packet->Timestamp(),
282         // Replace missing value with 0 (invalid frequency), this will trigger
283         // absolute capture time sending.
284         encoder_rtp_timestamp_frequency.value_or(0),
285         Int64MsToUQ32x32(clock_->ConvertTimestampToNtpTimeInMilliseconds(
286             absolute_capture_timestamp_ms)),
287         /*estimated_capture_clock_offset=*/
288         include_capture_clock_offset_ ? absl::make_optional(0) : absl::nullopt);
289     if (absolute_capture_time) {
290       // It also checks that extension was registered during SDP negotiation. If
291       // not then setter won't do anything.
292       packet->SetExtension<AbsoluteCaptureTimeExtension>(
293           *absolute_capture_time);
294     }
295   }
296 
297   uint8_t* payload = packet->AllocatePayload(payload_size);
298   if (!payload)  // Too large payload buffer.
299     return false;
300   memcpy(payload, payload_data, payload_size);
301 
302   {
303     MutexLock lock(&send_audio_mutex_);
304     last_payload_type_ = payload_type;
305   }
306   TRACE_EVENT_ASYNC_END2("webrtc", "Audio", rtp_timestamp, "timestamp",
307                          packet->Timestamp(), "seqnum",
308                          packet->SequenceNumber());
309   packet->set_packet_type(RtpPacketMediaType::kAudio);
310   packet->set_allow_retransmission(true);
311   bool send_result = rtp_sender_->SendToNetwork(std::move(packet));
312   if (first_packet_sent_()) {
313     RTC_LOG(LS_INFO) << "First audio RTP packet sent to pacer";
314   }
315   return send_result;
316 }
317 
318 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dbov)319 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dbov) {
320   if (level_dbov > 127) {
321     return -1;
322   }
323   MutexLock lock(&send_audio_mutex_);
324   audio_level_dbov_ = level_dbov;
325   return 0;
326 }
327 
328 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)329 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
330                                            uint16_t time_ms,
331                                            uint8_t level) {
332   DtmfQueue::Event event;
333   {
334     MutexLock lock(&send_audio_mutex_);
335     if (dtmf_payload_type_ < 0) {
336       // TelephoneEvent payloadtype not configured
337       return -1;
338     }
339     event.payload_type = dtmf_payload_type_;
340   }
341   event.key = key;
342   event.duration_ms = time_ms;
343   event.level = level;
344   return dtmf_queue_.AddDtmf(event) ? 0 : -1;
345 }
346 
SendTelephoneEventPacket(bool ended,uint32_t dtmf_timestamp,uint16_t duration,bool marker_bit)347 bool RTPSenderAudio::SendTelephoneEventPacket(bool ended,
348                                               uint32_t dtmf_timestamp,
349                                               uint16_t duration,
350                                               bool marker_bit) {
351   uint8_t send_count = 1;
352   bool result = true;
353 
354   if (ended) {
355     // resend last packet in an event 3 times
356     send_count = 3;
357   }
358   do {
359     // Send DTMF data.
360     constexpr RtpPacketToSend::ExtensionManager* kNoExtensions = nullptr;
361     constexpr size_t kDtmfSize = 4;
362     std::unique_ptr<RtpPacketToSend> packet(
363         new RtpPacketToSend(kNoExtensions, kRtpHeaderSize + kDtmfSize));
364     packet->SetPayloadType(dtmf_current_event_.payload_type);
365     packet->SetMarker(marker_bit);
366     packet->SetSsrc(rtp_sender_->SSRC());
367     packet->SetTimestamp(dtmf_timestamp);
368     packet->set_capture_time(clock_->CurrentTime());
369 
370     // Create DTMF data.
371     uint8_t* dtmfbuffer = packet->AllocatePayload(kDtmfSize);
372     RTC_DCHECK(dtmfbuffer);
373     /*    From RFC 2833:
374      0                   1                   2                   3
375      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
376     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
377     |     event     |E|R| volume    |          duration             |
378     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
379     */
380     // R bit always cleared
381     uint8_t R = 0x00;
382     uint8_t volume = dtmf_current_event_.level;
383 
384     // First packet un-ended
385     uint8_t E = ended ? 0x80 : 0x00;
386 
387     // First byte is Event number, equals key number
388     dtmfbuffer[0] = dtmf_current_event_.key;
389     dtmfbuffer[1] = E | R | volume;
390     ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 2, duration);
391 
392     packet->set_packet_type(RtpPacketMediaType::kAudio);
393     packet->set_allow_retransmission(true);
394     result = rtp_sender_->SendToNetwork(std::move(packet));
395     send_count--;
396   } while (send_count > 0 && result);
397 
398   return result;
399 }
400 }  // namespace webrtc
401