xref: /aosp_15_r20/external/webrtc/modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.h"
12 
13 #include <stddef.h>
14 #include <stdint.h>
15 
16 #include "absl/algorithm/container.h"
17 #include "api/video/video_layers_allocation.h"
18 #include "modules/rtp_rtcp/source/byte_io.h"
19 #include "rtc_base/checks.h"
20 
21 namespace webrtc {
22 
23 constexpr RTPExtensionType RtpVideoLayersAllocationExtension::kId;
24 
25 namespace {
26 
27 constexpr int kMaxNumRtpStreams = 4;
28 
29 // TODO(bugs.webrtc.org/12000): share Leb128 functions with av1 packetizer.
30 // Returns minimum number of bytes required to store `value`.
Leb128Size(uint32_t value)31 int Leb128Size(uint32_t value) {
32   int size = 0;
33   while (value >= 0x80) {
34     ++size;
35     value >>= 7;
36   }
37   return size + 1;
38 }
39 
40 // Returns number of bytes consumed.
WriteLeb128(uint32_t value,uint8_t * buffer)41 int WriteLeb128(uint32_t value, uint8_t* buffer) {
42   int size = 0;
43   while (value >= 0x80) {
44     buffer[size] = 0x80 | (value & 0x7F);
45     ++size;
46     value >>= 7;
47   }
48   buffer[size] = value;
49   ++size;
50   return size;
51 }
52 
53 // Reads leb128 encoded value and advance read_at by number of bytes consumed.
54 // Sets read_at to nullptr on error.
ReadLeb128(const uint8_t * & read_at,const uint8_t * end)55 uint64_t ReadLeb128(const uint8_t*& read_at, const uint8_t* end) {
56   uint64_t value = 0;
57   int fill_bits = 0;
58   while (read_at != end && fill_bits < 64 - 7) {
59     uint8_t leb128_byte = *read_at;
60     value |= uint64_t{leb128_byte & 0x7Fu} << fill_bits;
61     ++read_at;
62     fill_bits += 7;
63     if ((leb128_byte & 0x80) == 0) {
64       return value;
65     }
66   }
67   // Failed to find terminator leb128 byte.
68   read_at = nullptr;
69   return 0;
70 }
71 
AllocationIsValid(const VideoLayersAllocation & allocation)72 bool AllocationIsValid(const VideoLayersAllocation& allocation) {
73   // Since all multivalue fields are stored in (rtp_stream_id, spatial_id) order
74   // assume `allocation.active_spatial_layers` is already sorted. It is simpler
75   // to assemble it in the sorted way than to resort during serialization.
76   if (!absl::c_is_sorted(
77           allocation.active_spatial_layers,
78           [](const VideoLayersAllocation::SpatialLayer& lhs,
79              const VideoLayersAllocation::SpatialLayer& rhs) {
80             return std::make_tuple(lhs.rtp_stream_index, lhs.spatial_id) <
81                    std::make_tuple(rhs.rtp_stream_index, rhs.spatial_id);
82           })) {
83     return false;
84   }
85 
86   int max_rtp_stream_idx = 0;
87   for (const auto& spatial_layer : allocation.active_spatial_layers) {
88     if (spatial_layer.rtp_stream_index < 0 ||
89         spatial_layer.rtp_stream_index >= 4) {
90       return false;
91     }
92     if (spatial_layer.spatial_id < 0 || spatial_layer.spatial_id >= 4) {
93       return false;
94     }
95     if (spatial_layer.target_bitrate_per_temporal_layer.empty() ||
96         spatial_layer.target_bitrate_per_temporal_layer.size() > 4) {
97       return false;
98     }
99     if (max_rtp_stream_idx < spatial_layer.rtp_stream_index) {
100       max_rtp_stream_idx = spatial_layer.rtp_stream_index;
101     }
102     if (allocation.resolution_and_frame_rate_is_valid) {
103       // TODO(danilchap): Add check width and height are no more than 0x10000
104       // when width and height become larger type and thus would support maximum
105       // resolution.
106       if (spatial_layer.width <= 0) {
107         return false;
108       }
109       if (spatial_layer.height <= 0) {
110         return false;
111       }
112       if (spatial_layer.frame_rate_fps > 255) {
113         return false;
114       }
115     }
116   }
117   if (allocation.rtp_stream_index < 0 ||
118       (!allocation.active_spatial_layers.empty() &&
119        allocation.rtp_stream_index > max_rtp_stream_idx)) {
120     return false;
121   }
122   return true;
123 }
124 
125 struct SpatialLayersBitmasks {
126   int max_rtp_stream_id = 0;
127   uint8_t spatial_layer_bitmask[kMaxNumRtpStreams] = {};
128   bool bitmasks_are_the_same = true;
129 };
130 
SpatialLayersBitmasksPerRtpStream(const VideoLayersAllocation & allocation)131 SpatialLayersBitmasks SpatialLayersBitmasksPerRtpStream(
132     const VideoLayersAllocation& allocation) {
133   RTC_DCHECK(AllocationIsValid(allocation));
134   SpatialLayersBitmasks result;
135   for (const auto& layer : allocation.active_spatial_layers) {
136     result.spatial_layer_bitmask[layer.rtp_stream_index] |=
137         (1u << layer.spatial_id);
138     if (result.max_rtp_stream_id < layer.rtp_stream_index) {
139       result.max_rtp_stream_id = layer.rtp_stream_index;
140     }
141   }
142   for (int i = 1; i <= result.max_rtp_stream_id; ++i) {
143     if (result.spatial_layer_bitmask[i] != result.spatial_layer_bitmask[0]) {
144       result.bitmasks_are_the_same = false;
145       break;
146     }
147   }
148   return result;
149 }
150 
151 }  // namespace
152 
153 // See /docs/native-code/rtp-rtpext/video-layers-allocation00/README.md
154 // for the description of the format.
155 
Write(rtc::ArrayView<uint8_t> data,const VideoLayersAllocation & allocation)156 bool RtpVideoLayersAllocationExtension::Write(
157     rtc::ArrayView<uint8_t> data,
158     const VideoLayersAllocation& allocation) {
159   RTC_DCHECK(AllocationIsValid(allocation));
160   RTC_DCHECK_GE(data.size(), ValueSize(allocation));
161 
162   if (allocation.active_spatial_layers.empty()) {
163     data[0] = 0;
164     return true;
165   }
166 
167   SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
168   uint8_t* write_at = data.data();
169   // First half of the header byte.
170   *write_at = (allocation.rtp_stream_index << 6);
171   // number of rtp stream - 1 is the same as the maximum rtp_stream_id.
172   *write_at |= slb.max_rtp_stream_id << 4;
173   if (slb.bitmasks_are_the_same) {
174     // Second half of the header byte.
175     *write_at |= slb.spatial_layer_bitmask[0];
176   } else {
177     // spatial layer bitmasks when they are different for different RTP streams.
178     *++write_at =
179         (slb.spatial_layer_bitmask[0] << 4) | slb.spatial_layer_bitmask[1];
180     if (slb.max_rtp_stream_id >= 2) {
181       *++write_at =
182           (slb.spatial_layer_bitmask[2] << 4) | slb.spatial_layer_bitmask[3];
183     }
184   }
185   ++write_at;
186 
187   {  // Number of temporal layers.
188     int bit_offset = 8;
189     *write_at = 0;
190     for (const auto& layer : allocation.active_spatial_layers) {
191       if (bit_offset == 0) {
192         bit_offset = 6;
193         *++write_at = 0;
194       } else {
195         bit_offset -= 2;
196       }
197       *write_at |=
198           ((layer.target_bitrate_per_temporal_layer.size() - 1) << bit_offset);
199     }
200     ++write_at;
201   }
202 
203   // Target bitrates.
204   for (const auto& spatial_layer : allocation.active_spatial_layers) {
205     for (const DataRate& bitrate :
206          spatial_layer.target_bitrate_per_temporal_layer) {
207       write_at += WriteLeb128(bitrate.kbps(), write_at);
208     }
209   }
210 
211   if (allocation.resolution_and_frame_rate_is_valid) {
212     for (const auto& spatial_layer : allocation.active_spatial_layers) {
213       ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.width - 1);
214       write_at += 2;
215       ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.height - 1);
216       write_at += 2;
217       *write_at = spatial_layer.frame_rate_fps;
218       ++write_at;
219     }
220   }
221   RTC_DCHECK_EQ(write_at - data.data(), ValueSize(allocation));
222   return true;
223 }
224 
Parse(rtc::ArrayView<const uint8_t> data,VideoLayersAllocation * allocation)225 bool RtpVideoLayersAllocationExtension::Parse(
226     rtc::ArrayView<const uint8_t> data,
227     VideoLayersAllocation* allocation) {
228   if (data.empty() || allocation == nullptr) {
229     return false;
230   }
231 
232   allocation->active_spatial_layers.clear();
233 
234   const uint8_t* read_at = data.data();
235   const uint8_t* const end = data.data() + data.size();
236 
237   if (data.size() == 1 && *read_at == 0) {
238     allocation->rtp_stream_index = 0;
239     allocation->resolution_and_frame_rate_is_valid = true;
240     return AllocationIsValid(*allocation);
241   }
242 
243   // Header byte.
244   allocation->rtp_stream_index = *read_at >> 6;
245   int num_rtp_streams = 1 + ((*read_at >> 4) & 0b11);
246   uint8_t spatial_layers_bitmasks[kMaxNumRtpStreams];
247   spatial_layers_bitmasks[0] = *read_at & 0b1111;
248 
249   if (spatial_layers_bitmasks[0] != 0) {
250     for (int i = 1; i < num_rtp_streams; ++i) {
251       spatial_layers_bitmasks[i] = spatial_layers_bitmasks[0];
252     }
253   } else {
254     // Spatial layer bitmasks when they are different for different RTP streams.
255     if (++read_at == end) {
256       return false;
257     }
258     spatial_layers_bitmasks[0] = *read_at >> 4;
259     spatial_layers_bitmasks[1] = *read_at & 0b1111;
260     if (num_rtp_streams > 2) {
261       if (++read_at == end) {
262         return false;
263       }
264       spatial_layers_bitmasks[2] = *read_at >> 4;
265       spatial_layers_bitmasks[3] = *read_at & 0b1111;
266     }
267   }
268   if (++read_at == end) {
269     return false;
270   }
271 
272   // Read number of temporal layers,
273   // Create `allocation->active_spatial_layers` while iterating though it.
274   int bit_offset = 8;
275   for (int stream_idx = 0; stream_idx < num_rtp_streams; ++stream_idx) {
276     for (int sid = 0; sid < VideoLayersAllocation::kMaxSpatialIds; ++sid) {
277       if ((spatial_layers_bitmasks[stream_idx] & (1 << sid)) == 0) {
278         continue;
279       }
280 
281       if (bit_offset == 0) {
282         bit_offset = 6;
283         if (++read_at == end) {
284           return false;
285         }
286       } else {
287         bit_offset -= 2;
288       }
289       int num_temporal_layers = 1 + ((*read_at >> bit_offset) & 0b11);
290       allocation->active_spatial_layers.emplace_back();
291       auto& layer = allocation->active_spatial_layers.back();
292       layer.rtp_stream_index = stream_idx;
293       layer.spatial_id = sid;
294       layer.target_bitrate_per_temporal_layer.resize(num_temporal_layers,
295                                                      DataRate::Zero());
296     }
297   }
298   if (++read_at == end) {
299     return false;
300   }
301 
302   // Target bitrates.
303   for (auto& layer : allocation->active_spatial_layers) {
304     for (DataRate& rate : layer.target_bitrate_per_temporal_layer) {
305       uint64_t bitrate_kbps = ReadLeb128(read_at, end);
306       // bitrate_kbps might represent larger values than DataRate type,
307       // discard unreasonably large values.
308       if (read_at == nullptr || bitrate_kbps > 1'000'000) {
309         return false;
310       }
311       rate = DataRate::KilobitsPerSec(bitrate_kbps);
312     }
313   }
314 
315   if (read_at == end) {
316     allocation->resolution_and_frame_rate_is_valid = false;
317     return AllocationIsValid(*allocation);
318   }
319 
320   if (read_at + 5 * allocation->active_spatial_layers.size() != end) {
321     // data is left, but it size is not what can be used for resolutions and
322     // framerates.
323     return false;
324   }
325   allocation->resolution_and_frame_rate_is_valid = true;
326   for (auto& layer : allocation->active_spatial_layers) {
327     layer.width = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
328     read_at += 2;
329     layer.height = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
330     read_at += 2;
331     layer.frame_rate_fps = *read_at;
332     ++read_at;
333   }
334 
335   return AllocationIsValid(*allocation);
336 }
337 
ValueSize(const VideoLayersAllocation & allocation)338 size_t RtpVideoLayersAllocationExtension::ValueSize(
339     const VideoLayersAllocation& allocation) {
340   if (allocation.active_spatial_layers.empty()) {
341     return 1;
342   }
343   size_t result = 1;  // header
344   SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
345   if (!slb.bitmasks_are_the_same) {
346     ++result;
347     if (slb.max_rtp_stream_id >= 2) {
348       ++result;
349     }
350   }
351   // 2 bits per active spatial layer, rounded up to full byte, i.e.
352   // 0.25 byte per active spatial layer.
353   result += (allocation.active_spatial_layers.size() + 3) / 4;
354   for (const auto& spatial_layer : allocation.active_spatial_layers) {
355     for (DataRate value : spatial_layer.target_bitrate_per_temporal_layer) {
356       result += Leb128Size(value.kbps());
357     }
358   }
359   if (allocation.resolution_and_frame_rate_is_valid) {
360     result += 5 * allocation.active_spatial_layers.size();
361   }
362   return result;
363 }
364 
365 }  // namespace webrtc
366