1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/rtp_rtcp/source/rtp_video_layers_allocation_extension.h"
12
13 #include <stddef.h>
14 #include <stdint.h>
15
16 #include "absl/algorithm/container.h"
17 #include "api/video/video_layers_allocation.h"
18 #include "modules/rtp_rtcp/source/byte_io.h"
19 #include "rtc_base/checks.h"
20
21 namespace webrtc {
22
23 constexpr RTPExtensionType RtpVideoLayersAllocationExtension::kId;
24
25 namespace {
26
27 constexpr int kMaxNumRtpStreams = 4;
28
29 // TODO(bugs.webrtc.org/12000): share Leb128 functions with av1 packetizer.
30 // Returns minimum number of bytes required to store `value`.
Leb128Size(uint32_t value)31 int Leb128Size(uint32_t value) {
32 int size = 0;
33 while (value >= 0x80) {
34 ++size;
35 value >>= 7;
36 }
37 return size + 1;
38 }
39
40 // Returns number of bytes consumed.
WriteLeb128(uint32_t value,uint8_t * buffer)41 int WriteLeb128(uint32_t value, uint8_t* buffer) {
42 int size = 0;
43 while (value >= 0x80) {
44 buffer[size] = 0x80 | (value & 0x7F);
45 ++size;
46 value >>= 7;
47 }
48 buffer[size] = value;
49 ++size;
50 return size;
51 }
52
53 // Reads leb128 encoded value and advance read_at by number of bytes consumed.
54 // Sets read_at to nullptr on error.
ReadLeb128(const uint8_t * & read_at,const uint8_t * end)55 uint64_t ReadLeb128(const uint8_t*& read_at, const uint8_t* end) {
56 uint64_t value = 0;
57 int fill_bits = 0;
58 while (read_at != end && fill_bits < 64 - 7) {
59 uint8_t leb128_byte = *read_at;
60 value |= uint64_t{leb128_byte & 0x7Fu} << fill_bits;
61 ++read_at;
62 fill_bits += 7;
63 if ((leb128_byte & 0x80) == 0) {
64 return value;
65 }
66 }
67 // Failed to find terminator leb128 byte.
68 read_at = nullptr;
69 return 0;
70 }
71
AllocationIsValid(const VideoLayersAllocation & allocation)72 bool AllocationIsValid(const VideoLayersAllocation& allocation) {
73 // Since all multivalue fields are stored in (rtp_stream_id, spatial_id) order
74 // assume `allocation.active_spatial_layers` is already sorted. It is simpler
75 // to assemble it in the sorted way than to resort during serialization.
76 if (!absl::c_is_sorted(
77 allocation.active_spatial_layers,
78 [](const VideoLayersAllocation::SpatialLayer& lhs,
79 const VideoLayersAllocation::SpatialLayer& rhs) {
80 return std::make_tuple(lhs.rtp_stream_index, lhs.spatial_id) <
81 std::make_tuple(rhs.rtp_stream_index, rhs.spatial_id);
82 })) {
83 return false;
84 }
85
86 int max_rtp_stream_idx = 0;
87 for (const auto& spatial_layer : allocation.active_spatial_layers) {
88 if (spatial_layer.rtp_stream_index < 0 ||
89 spatial_layer.rtp_stream_index >= 4) {
90 return false;
91 }
92 if (spatial_layer.spatial_id < 0 || spatial_layer.spatial_id >= 4) {
93 return false;
94 }
95 if (spatial_layer.target_bitrate_per_temporal_layer.empty() ||
96 spatial_layer.target_bitrate_per_temporal_layer.size() > 4) {
97 return false;
98 }
99 if (max_rtp_stream_idx < spatial_layer.rtp_stream_index) {
100 max_rtp_stream_idx = spatial_layer.rtp_stream_index;
101 }
102 if (allocation.resolution_and_frame_rate_is_valid) {
103 // TODO(danilchap): Add check width and height are no more than 0x10000
104 // when width and height become larger type and thus would support maximum
105 // resolution.
106 if (spatial_layer.width <= 0) {
107 return false;
108 }
109 if (spatial_layer.height <= 0) {
110 return false;
111 }
112 if (spatial_layer.frame_rate_fps > 255) {
113 return false;
114 }
115 }
116 }
117 if (allocation.rtp_stream_index < 0 ||
118 (!allocation.active_spatial_layers.empty() &&
119 allocation.rtp_stream_index > max_rtp_stream_idx)) {
120 return false;
121 }
122 return true;
123 }
124
125 struct SpatialLayersBitmasks {
126 int max_rtp_stream_id = 0;
127 uint8_t spatial_layer_bitmask[kMaxNumRtpStreams] = {};
128 bool bitmasks_are_the_same = true;
129 };
130
SpatialLayersBitmasksPerRtpStream(const VideoLayersAllocation & allocation)131 SpatialLayersBitmasks SpatialLayersBitmasksPerRtpStream(
132 const VideoLayersAllocation& allocation) {
133 RTC_DCHECK(AllocationIsValid(allocation));
134 SpatialLayersBitmasks result;
135 for (const auto& layer : allocation.active_spatial_layers) {
136 result.spatial_layer_bitmask[layer.rtp_stream_index] |=
137 (1u << layer.spatial_id);
138 if (result.max_rtp_stream_id < layer.rtp_stream_index) {
139 result.max_rtp_stream_id = layer.rtp_stream_index;
140 }
141 }
142 for (int i = 1; i <= result.max_rtp_stream_id; ++i) {
143 if (result.spatial_layer_bitmask[i] != result.spatial_layer_bitmask[0]) {
144 result.bitmasks_are_the_same = false;
145 break;
146 }
147 }
148 return result;
149 }
150
151 } // namespace
152
153 // See /docs/native-code/rtp-rtpext/video-layers-allocation00/README.md
154 // for the description of the format.
155
Write(rtc::ArrayView<uint8_t> data,const VideoLayersAllocation & allocation)156 bool RtpVideoLayersAllocationExtension::Write(
157 rtc::ArrayView<uint8_t> data,
158 const VideoLayersAllocation& allocation) {
159 RTC_DCHECK(AllocationIsValid(allocation));
160 RTC_DCHECK_GE(data.size(), ValueSize(allocation));
161
162 if (allocation.active_spatial_layers.empty()) {
163 data[0] = 0;
164 return true;
165 }
166
167 SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
168 uint8_t* write_at = data.data();
169 // First half of the header byte.
170 *write_at = (allocation.rtp_stream_index << 6);
171 // number of rtp stream - 1 is the same as the maximum rtp_stream_id.
172 *write_at |= slb.max_rtp_stream_id << 4;
173 if (slb.bitmasks_are_the_same) {
174 // Second half of the header byte.
175 *write_at |= slb.spatial_layer_bitmask[0];
176 } else {
177 // spatial layer bitmasks when they are different for different RTP streams.
178 *++write_at =
179 (slb.spatial_layer_bitmask[0] << 4) | slb.spatial_layer_bitmask[1];
180 if (slb.max_rtp_stream_id >= 2) {
181 *++write_at =
182 (slb.spatial_layer_bitmask[2] << 4) | slb.spatial_layer_bitmask[3];
183 }
184 }
185 ++write_at;
186
187 { // Number of temporal layers.
188 int bit_offset = 8;
189 *write_at = 0;
190 for (const auto& layer : allocation.active_spatial_layers) {
191 if (bit_offset == 0) {
192 bit_offset = 6;
193 *++write_at = 0;
194 } else {
195 bit_offset -= 2;
196 }
197 *write_at |=
198 ((layer.target_bitrate_per_temporal_layer.size() - 1) << bit_offset);
199 }
200 ++write_at;
201 }
202
203 // Target bitrates.
204 for (const auto& spatial_layer : allocation.active_spatial_layers) {
205 for (const DataRate& bitrate :
206 spatial_layer.target_bitrate_per_temporal_layer) {
207 write_at += WriteLeb128(bitrate.kbps(), write_at);
208 }
209 }
210
211 if (allocation.resolution_and_frame_rate_is_valid) {
212 for (const auto& spatial_layer : allocation.active_spatial_layers) {
213 ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.width - 1);
214 write_at += 2;
215 ByteWriter<uint16_t>::WriteBigEndian(write_at, spatial_layer.height - 1);
216 write_at += 2;
217 *write_at = spatial_layer.frame_rate_fps;
218 ++write_at;
219 }
220 }
221 RTC_DCHECK_EQ(write_at - data.data(), ValueSize(allocation));
222 return true;
223 }
224
Parse(rtc::ArrayView<const uint8_t> data,VideoLayersAllocation * allocation)225 bool RtpVideoLayersAllocationExtension::Parse(
226 rtc::ArrayView<const uint8_t> data,
227 VideoLayersAllocation* allocation) {
228 if (data.empty() || allocation == nullptr) {
229 return false;
230 }
231
232 allocation->active_spatial_layers.clear();
233
234 const uint8_t* read_at = data.data();
235 const uint8_t* const end = data.data() + data.size();
236
237 if (data.size() == 1 && *read_at == 0) {
238 allocation->rtp_stream_index = 0;
239 allocation->resolution_and_frame_rate_is_valid = true;
240 return AllocationIsValid(*allocation);
241 }
242
243 // Header byte.
244 allocation->rtp_stream_index = *read_at >> 6;
245 int num_rtp_streams = 1 + ((*read_at >> 4) & 0b11);
246 uint8_t spatial_layers_bitmasks[kMaxNumRtpStreams];
247 spatial_layers_bitmasks[0] = *read_at & 0b1111;
248
249 if (spatial_layers_bitmasks[0] != 0) {
250 for (int i = 1; i < num_rtp_streams; ++i) {
251 spatial_layers_bitmasks[i] = spatial_layers_bitmasks[0];
252 }
253 } else {
254 // Spatial layer bitmasks when they are different for different RTP streams.
255 if (++read_at == end) {
256 return false;
257 }
258 spatial_layers_bitmasks[0] = *read_at >> 4;
259 spatial_layers_bitmasks[1] = *read_at & 0b1111;
260 if (num_rtp_streams > 2) {
261 if (++read_at == end) {
262 return false;
263 }
264 spatial_layers_bitmasks[2] = *read_at >> 4;
265 spatial_layers_bitmasks[3] = *read_at & 0b1111;
266 }
267 }
268 if (++read_at == end) {
269 return false;
270 }
271
272 // Read number of temporal layers,
273 // Create `allocation->active_spatial_layers` while iterating though it.
274 int bit_offset = 8;
275 for (int stream_idx = 0; stream_idx < num_rtp_streams; ++stream_idx) {
276 for (int sid = 0; sid < VideoLayersAllocation::kMaxSpatialIds; ++sid) {
277 if ((spatial_layers_bitmasks[stream_idx] & (1 << sid)) == 0) {
278 continue;
279 }
280
281 if (bit_offset == 0) {
282 bit_offset = 6;
283 if (++read_at == end) {
284 return false;
285 }
286 } else {
287 bit_offset -= 2;
288 }
289 int num_temporal_layers = 1 + ((*read_at >> bit_offset) & 0b11);
290 allocation->active_spatial_layers.emplace_back();
291 auto& layer = allocation->active_spatial_layers.back();
292 layer.rtp_stream_index = stream_idx;
293 layer.spatial_id = sid;
294 layer.target_bitrate_per_temporal_layer.resize(num_temporal_layers,
295 DataRate::Zero());
296 }
297 }
298 if (++read_at == end) {
299 return false;
300 }
301
302 // Target bitrates.
303 for (auto& layer : allocation->active_spatial_layers) {
304 for (DataRate& rate : layer.target_bitrate_per_temporal_layer) {
305 uint64_t bitrate_kbps = ReadLeb128(read_at, end);
306 // bitrate_kbps might represent larger values than DataRate type,
307 // discard unreasonably large values.
308 if (read_at == nullptr || bitrate_kbps > 1'000'000) {
309 return false;
310 }
311 rate = DataRate::KilobitsPerSec(bitrate_kbps);
312 }
313 }
314
315 if (read_at == end) {
316 allocation->resolution_and_frame_rate_is_valid = false;
317 return AllocationIsValid(*allocation);
318 }
319
320 if (read_at + 5 * allocation->active_spatial_layers.size() != end) {
321 // data is left, but it size is not what can be used for resolutions and
322 // framerates.
323 return false;
324 }
325 allocation->resolution_and_frame_rate_is_valid = true;
326 for (auto& layer : allocation->active_spatial_layers) {
327 layer.width = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
328 read_at += 2;
329 layer.height = 1 + ByteReader<uint16_t, 2>::ReadBigEndian(read_at);
330 read_at += 2;
331 layer.frame_rate_fps = *read_at;
332 ++read_at;
333 }
334
335 return AllocationIsValid(*allocation);
336 }
337
ValueSize(const VideoLayersAllocation & allocation)338 size_t RtpVideoLayersAllocationExtension::ValueSize(
339 const VideoLayersAllocation& allocation) {
340 if (allocation.active_spatial_layers.empty()) {
341 return 1;
342 }
343 size_t result = 1; // header
344 SpatialLayersBitmasks slb = SpatialLayersBitmasksPerRtpStream(allocation);
345 if (!slb.bitmasks_are_the_same) {
346 ++result;
347 if (slb.max_rtp_stream_id >= 2) {
348 ++result;
349 }
350 }
351 // 2 bits per active spatial layer, rounded up to full byte, i.e.
352 // 0.25 byte per active spatial layer.
353 result += (allocation.active_spatial_layers.size() + 3) / 4;
354 for (const auto& spatial_layer : allocation.active_spatial_layers) {
355 for (DataRate value : spatial_layer.target_bitrate_per_temporal_layer) {
356 result += Leb128Size(value.kbps());
357 }
358 }
359 if (allocation.resolution_and_frame_rate_is_valid) {
360 result += 5 * allocation.active_spatial_layers.size();
361 }
362 return result;
363 }
364
365 } // namespace webrtc
366