xref: /aosp_15_r20/external/webrtc/modules/rtp_rtcp/source/video_rtp_depacketizer_av1.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h"
12 
13 #include <stddef.h>
14 #include <stdint.h>
15 
16 #include <utility>
17 
18 #include "modules/rtp_rtcp/source/rtp_video_header.h"
19 #include "rtc_base/byte_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/logging.h"
22 #include "rtc_base/numerics/safe_conversions.h"
23 
24 namespace webrtc {
25 namespace {
26 // AV1 format:
27 //
28 // RTP payload syntax:
29 //     0 1 2 3 4 5 6 7
30 //    +-+-+-+-+-+-+-+-+
31 //    |Z|Y| W |N|-|-|-| (REQUIRED)
32 //    +=+=+=+=+=+=+=+=+ (REPEATED W-1 times, or any times if W = 0)
33 //    |1|             |
34 //    +-+ OBU fragment|
35 //    |1|             | (REQUIRED, leb128 encoded)
36 //    +-+    size     |
37 //    |0|             |
38 //    +-+-+-+-+-+-+-+-+
39 //    |  OBU fragment |
40 //    |     ...       |
41 //    +=+=+=+=+=+=+=+=+
42 //    |     ...       |
43 //    +=+=+=+=+=+=+=+=+ if W > 0, last fragment MUST NOT have size field
44 //    |  OBU fragment |
45 //    |     ...       |
46 //    +=+=+=+=+=+=+=+=+
47 //
48 //
49 // OBU syntax:
50 //     0 1 2 3 4 5 6 7
51 //    +-+-+-+-+-+-+-+-+
52 //    |0| type  |X|S|-| (REQUIRED)
53 //    +-+-+-+-+-+-+-+-+
54 // X: | TID |SID|-|-|-| (OPTIONAL)
55 //    +-+-+-+-+-+-+-+-+
56 //    |1|             |
57 //    +-+ OBU payload |
58 // S: |1|             | (OPTIONAL, variable length leb128 encoded)
59 //    +-+    size     |
60 //    |0|             |
61 //    +-+-+-+-+-+-+-+-+
62 //    |  OBU payload  |
63 //    |     ...       |
64 class ArrayOfArrayViews {
65  public:
66   class const_iterator;
67   ArrayOfArrayViews() = default;
68   ArrayOfArrayViews(const ArrayOfArrayViews&) = default;
69   ArrayOfArrayViews& operator=(const ArrayOfArrayViews&) = default;
70   ~ArrayOfArrayViews() = default;
71 
72   const_iterator begin() const;
73   const_iterator end() const;
empty() const74   bool empty() const { return data_.empty(); }
size() const75   size_t size() const { return size_; }
76   void CopyTo(uint8_t* destination, const_iterator first) const;
77 
Append(const uint8_t * data,size_t size)78   void Append(const uint8_t* data, size_t size) {
79     data_.emplace_back(data, size);
80     size_ += size;
81   }
82 
83  private:
84   using Storage = absl::InlinedVector<rtc::ArrayView<const uint8_t>, 2>;
85 
86   size_t size_ = 0;
87   Storage data_;
88 };
89 
90 class ArrayOfArrayViews::const_iterator {
91  public:
92   const_iterator() = default;
93   const_iterator(const const_iterator&) = default;
94   const_iterator& operator=(const const_iterator&) = default;
95 
operator ++()96   const_iterator& operator++() {
97     if (++inner_ == outer_->size()) {
98       ++outer_;
99       inner_ = 0;
100     }
101     return *this;
102   }
operator *() const103   uint8_t operator*() const { return (*outer_)[inner_]; }
104 
operator ==(const const_iterator & lhs,const const_iterator & rhs)105   friend bool operator==(const const_iterator& lhs, const const_iterator& rhs) {
106     return lhs.outer_ == rhs.outer_ && lhs.inner_ == rhs.inner_;
107   }
108 
109  private:
110   friend ArrayOfArrayViews;
const_iterator(ArrayOfArrayViews::Storage::const_iterator outer,size_t inner)111   const_iterator(ArrayOfArrayViews::Storage::const_iterator outer, size_t inner)
112       : outer_(outer), inner_(inner) {}
113 
114   Storage::const_iterator outer_;
115   size_t inner_;
116 };
117 
begin() const118 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::begin() const {
119   return const_iterator(data_.begin(), 0);
120 }
121 
end() const122 ArrayOfArrayViews::const_iterator ArrayOfArrayViews::end() const {
123   return const_iterator(data_.end(), 0);
124 }
125 
CopyTo(uint8_t * destination,const_iterator first) const126 void ArrayOfArrayViews::CopyTo(uint8_t* destination,
127                                const_iterator first) const {
128   if (first == end()) {
129     // Empty OBU payload. E.g. Temporal Delimiters are always empty.
130     return;
131   }
132   size_t first_chunk_size = first.outer_->size() - first.inner_;
133   memcpy(destination, first.outer_->data() + first.inner_, first_chunk_size);
134   destination += first_chunk_size;
135   for (auto it = std::next(first.outer_); it != data_.end(); ++it) {
136     memcpy(destination, it->data(), it->size());
137     destination += it->size();
138   }
139 }
140 
141 struct ObuInfo {
142   // Size of the obu_header and obu_size fields in the ouput frame.
143   size_t prefix_size = 0;
144   // obu_header() and obu_size (leb128 encoded payload_size).
145   // obu_header can be up to 2 bytes, obu_size - up to 5.
146   std::array<uint8_t, 7> prefix;
147   // Size of the obu payload in the output frame, i.e. excluding header
148   size_t payload_size = 0;
149   // iterator pointing to the beginning of the obu payload.
150   ArrayOfArrayViews::const_iterator payload_offset;
151   // OBU payloads as written in the rtp packet payloads.
152   ArrayOfArrayViews data;
153 };
154 // Expect that majority of the frame won't use more than 4 obus.
155 // In a simple stream delta frame consist of single Frame OBU, while key frame
156 // also has Sequence Header OBU.
157 using VectorObuInfo = absl::InlinedVector<ObuInfo, 4>;
158 
159 constexpr uint8_t kObuSizePresentBit = 0b0'0000'010;
160 
ObuHasExtension(uint8_t obu_header)161 bool ObuHasExtension(uint8_t obu_header) {
162   return obu_header & 0b0'0000'100u;
163 }
164 
ObuHasSize(uint8_t obu_header)165 bool ObuHasSize(uint8_t obu_header) {
166   return obu_header & kObuSizePresentBit;
167 }
168 
RtpStartsWithFragment(uint8_t aggregation_header)169 bool RtpStartsWithFragment(uint8_t aggregation_header) {
170   return aggregation_header & 0b1000'0000u;
171 }
RtpEndsWithFragment(uint8_t aggregation_header)172 bool RtpEndsWithFragment(uint8_t aggregation_header) {
173   return aggregation_header & 0b0100'0000u;
174 }
RtpNumObus(uint8_t aggregation_header)175 int RtpNumObus(uint8_t aggregation_header) {  // 0 for any number of obus.
176   return (aggregation_header & 0b0011'0000u) >> 4;
177 }
RtpStartsNewCodedVideoSequence(uint8_t aggregation_header)178 int RtpStartsNewCodedVideoSequence(uint8_t aggregation_header) {
179   return aggregation_header & 0b0000'1000u;
180 }
181 
182 // Reorgonizes array of rtp payloads into array of obus:
183 // fills ObuInfo::data field.
184 // Returns empty vector on error.
ParseObus(rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads)185 VectorObuInfo ParseObus(
186     rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) {
187   VectorObuInfo obu_infos;
188   bool expect_continues_obu = false;
189   for (rtc::ArrayView<const uint8_t> rtp_payload : rtp_payloads) {
190     rtc::ByteBufferReader payload(
191         reinterpret_cast<const char*>(rtp_payload.data()), rtp_payload.size());
192     uint8_t aggregation_header;
193     if (!payload.ReadUInt8(&aggregation_header)) {
194       RTC_DLOG(LS_WARNING)
195           << "Failed to find aggregation header in the packet.";
196       return {};
197     }
198     // Z-bit: 1 if the first OBU contained in the packet is a continuation of a
199     // previous OBU.
200     bool continues_obu = RtpStartsWithFragment(aggregation_header);
201     if (continues_obu != expect_continues_obu) {
202       RTC_DLOG(LS_WARNING) << "Unexpected Z-bit " << continues_obu;
203       return {};
204     }
205     int num_expected_obus = RtpNumObus(aggregation_header);
206     if (payload.Length() == 0) {
207       // rtp packet has just the aggregation header. That may be valid only when
208       // there is exactly one fragment in the packet of size 0.
209       if (num_expected_obus != 1) {
210         RTC_DLOG(LS_WARNING)
211             << "Invalid packet with just an aggregation header.";
212         return {};
213       }
214       if (!continues_obu) {
215         // Empty packet just to notify there is a new OBU.
216         obu_infos.emplace_back();
217       }
218       expect_continues_obu = RtpEndsWithFragment(aggregation_header);
219       continue;
220     }
221 
222     for (int obu_index = 1; payload.Length() > 0; ++obu_index) {
223       ObuInfo& obu_info = (obu_index == 1 && continues_obu)
224                               ? obu_infos.back()
225                               : obu_infos.emplace_back();
226       uint64_t fragment_size;
227       // When num_expected_obus > 0, last OBU (fragment) is not preceeded by
228       // the size field. See W field in
229       // https://aomediacodec.github.io/av1-rtp-spec/#43-av1-aggregation-header
230       bool has_fragment_size = (obu_index != num_expected_obus);
231       if (has_fragment_size) {
232         if (!payload.ReadUVarint(&fragment_size)) {
233           RTC_DLOG(LS_WARNING) << "Failed to read fragment size for obu #"
234                                << obu_index << "/" << num_expected_obus;
235           return {};
236         }
237         if (fragment_size > payload.Length()) {
238           // Malformed input: written size is larger than remaining buffer.
239           RTC_DLOG(LS_WARNING) << "Malformed fragment size " << fragment_size
240                                << " is larger than remaining size "
241                                << payload.Length() << " while reading obu #"
242                                << obu_index << "/" << num_expected_obus;
243           return {};
244         }
245       } else {
246         fragment_size = payload.Length();
247       }
248       // While it is in-practical to pass empty fragments, it is still possible.
249       if (fragment_size > 0) {
250         obu_info.data.Append(reinterpret_cast<const uint8_t*>(payload.Data()),
251                              fragment_size);
252         payload.Consume(fragment_size);
253       }
254     }
255     // Z flag should be same as Y flag of the next packet.
256     expect_continues_obu = RtpEndsWithFragment(aggregation_header);
257   }
258   if (expect_continues_obu) {
259     RTC_DLOG(LS_WARNING) << "Last packet shouldn't have last obu fragmented.";
260     return {};
261   }
262   return obu_infos;
263 }
264 
265 // Returns number of bytes consumed.
WriteLeb128(uint32_t value,uint8_t * buffer)266 int WriteLeb128(uint32_t value, uint8_t* buffer) {
267   int size = 0;
268   while (value >= 0x80) {
269     buffer[size] = 0x80 | (value & 0x7F);
270     ++size;
271     value >>= 7;
272   }
273   buffer[size] = value;
274   ++size;
275   return size;
276 }
277 
278 // Calculates sizes for the Obu, i.e. base on ObuInfo::data field calculates
279 // all other fields in the ObuInfo structure.
280 // Returns false if obu found to be misformed.
CalculateObuSizes(ObuInfo * obu_info)281 bool CalculateObuSizes(ObuInfo* obu_info) {
282   if (obu_info->data.empty()) {
283     RTC_DLOG(LS_WARNING) << "Invalid bitstream: empty obu provided.";
284     return false;
285   }
286   auto it = obu_info->data.begin();
287   uint8_t obu_header = *it;
288   obu_info->prefix[0] = obu_header | kObuSizePresentBit;
289   obu_info->prefix_size = 1;
290   ++it;
291   if (ObuHasExtension(obu_header)) {
292     if (it == obu_info->data.end()) {
293       return false;
294     }
295     obu_info->prefix[1] = *it;  // obu_extension_header
296     obu_info->prefix_size = 2;
297     ++it;
298   }
299   // Read, validate, and skip size, if present.
300   if (!ObuHasSize(obu_header)) {
301     obu_info->payload_size = obu_info->data.size() - obu_info->prefix_size;
302   } else {
303     // Read leb128 encoded field obu_size.
304     uint64_t obu_size_bytes = 0;
305     // Number of bytes obu_size field occupy in the bitstream.
306     int size_of_obu_size_bytes = 0;
307     uint8_t leb128_byte;
308     do {
309       if (it == obu_info->data.end() || size_of_obu_size_bytes >= 8) {
310         RTC_DLOG(LS_WARNING)
311             << "Failed to read obu_size. obu_size field is too long: "
312             << size_of_obu_size_bytes << " bytes processed.";
313         return false;
314       }
315       leb128_byte = *it;
316       obu_size_bytes |= uint64_t{leb128_byte & 0x7Fu}
317                         << (size_of_obu_size_bytes * 7);
318       ++size_of_obu_size_bytes;
319       ++it;
320     } while ((leb128_byte & 0x80) != 0);
321 
322     obu_info->payload_size =
323         obu_info->data.size() - obu_info->prefix_size - size_of_obu_size_bytes;
324     if (obu_size_bytes != obu_info->payload_size) {
325       // obu_size was present in the bitstream and mismatches calculated size.
326       RTC_DLOG(LS_WARNING) << "Mismatch in obu_size. signaled: "
327                            << obu_size_bytes
328                            << ", actual: " << obu_info->payload_size;
329       return false;
330     }
331   }
332   obu_info->payload_offset = it;
333   obu_info->prefix_size +=
334       WriteLeb128(rtc::dchecked_cast<uint32_t>(obu_info->payload_size),
335                   obu_info->prefix.data() + obu_info->prefix_size);
336   return true;
337 }
338 
339 }  // namespace
340 
AssembleFrame(rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads)341 rtc::scoped_refptr<EncodedImageBuffer> VideoRtpDepacketizerAv1::AssembleFrame(
342     rtc::ArrayView<const rtc::ArrayView<const uint8_t>> rtp_payloads) {
343   VectorObuInfo obu_infos = ParseObus(rtp_payloads);
344   if (obu_infos.empty()) {
345     return nullptr;
346   }
347 
348   size_t frame_size = 0;
349   for (ObuInfo& obu_info : obu_infos) {
350     if (!CalculateObuSizes(&obu_info)) {
351       return nullptr;
352     }
353     frame_size += (obu_info.prefix_size + obu_info.payload_size);
354   }
355 
356   rtc::scoped_refptr<EncodedImageBuffer> bitstream =
357       EncodedImageBuffer::Create(frame_size);
358   uint8_t* write_at = bitstream->data();
359   for (const ObuInfo& obu_info : obu_infos) {
360     // Copy the obu_header and obu_size fields.
361     memcpy(write_at, obu_info.prefix.data(), obu_info.prefix_size);
362     write_at += obu_info.prefix_size;
363     // Copy the obu payload.
364     obu_info.data.CopyTo(write_at, obu_info.payload_offset);
365     write_at += obu_info.payload_size;
366   }
367   RTC_CHECK_EQ(write_at - bitstream->data(), bitstream->size());
368   return bitstream;
369 }
370 
371 absl::optional<VideoRtpDepacketizer::ParsedRtpPayload>
Parse(rtc::CopyOnWriteBuffer rtp_payload)372 VideoRtpDepacketizerAv1::Parse(rtc::CopyOnWriteBuffer rtp_payload) {
373   if (rtp_payload.size() == 0) {
374     RTC_DLOG(LS_ERROR) << "Empty rtp payload.";
375     return absl::nullopt;
376   }
377   uint8_t aggregation_header = rtp_payload.cdata()[0];
378   if (RtpStartsNewCodedVideoSequence(aggregation_header) &&
379       RtpStartsWithFragment(aggregation_header)) {
380     // new coded video sequence can't start from an OBU fragment.
381     return absl::nullopt;
382   }
383   absl::optional<ParsedRtpPayload> parsed(absl::in_place);
384 
385   // To assemble frame, all of the rtp payload is required, including
386   // aggregation header.
387   parsed->video_payload = std::move(rtp_payload);
388 
389   parsed->video_header.codec = VideoCodecType::kVideoCodecAV1;
390   // These are not accurate since frame may consist of several packet aligned
391   // chunks of obus, but should be good enough for most cases. It might produce
392   // frame that do not map to any real frame, but av1 decoder should be able to
393   // handle it since it promise to handle individual obus rather than full
394   // frames.
395   parsed->video_header.is_first_packet_in_frame =
396       !RtpStartsWithFragment(aggregation_header);
397   parsed->video_header.is_last_packet_in_frame =
398       !RtpEndsWithFragment(aggregation_header);
399 
400   parsed->video_header.frame_type =
401       RtpStartsNewCodedVideoSequence(aggregation_header)
402           ? VideoFrameType::kVideoFrameKey
403           : VideoFrameType::kVideoFrameDelta;
404   return parsed;
405 }
406 
407 }  // namespace webrtc
408