1 /*
2 * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "modules/video_coding/svc/scalability_structure_full_svc.h"
11
12 #include <utility>
13 #include <vector>
14
15 #include "absl/strings/string_view.h"
16 #include "absl/types/optional.h"
17 #include "api/transport/rtp/dependency_descriptor.h"
18 #include "rtc_base/checks.h"
19 #include "rtc_base/logging.h"
20
21 namespace webrtc {
22
23 constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers;
24 constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers;
25 constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[];
26
ScalabilityStructureFullSvc(int num_spatial_layers,int num_temporal_layers,ScalingFactor resolution_factor)27 ScalabilityStructureFullSvc::ScalabilityStructureFullSvc(
28 int num_spatial_layers,
29 int num_temporal_layers,
30 ScalingFactor resolution_factor)
31 : num_spatial_layers_(num_spatial_layers),
32 num_temporal_layers_(num_temporal_layers),
33 resolution_factor_(resolution_factor),
34 active_decode_targets_(
35 (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
36 RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
37 RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
38 }
39
40 ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default;
41
42 ScalabilityStructureFullSvc::StreamLayersConfig
StreamConfig() const43 ScalabilityStructureFullSvc::StreamConfig() const {
44 StreamLayersConfig result;
45 result.num_spatial_layers = num_spatial_layers_;
46 result.num_temporal_layers = num_temporal_layers_;
47 result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
48 result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
49 for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
50 result.scaling_factor_num[sid - 1] =
51 resolution_factor_.num * result.scaling_factor_num[sid];
52 result.scaling_factor_den[sid - 1] =
53 resolution_factor_.den * result.scaling_factor_den[sid];
54 }
55 result.uses_reference_scaling = num_spatial_layers_ > 1;
56 return result;
57 }
58
TemporalLayerIsActive(int tid) const59 bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const {
60 if (tid >= num_temporal_layers_) {
61 return false;
62 }
63 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
64 if (DecodeTargetIsActive(sid, tid)) {
65 return true;
66 }
67 }
68 return false;
69 }
70
Dti(int sid,int tid,const LayerFrameConfig & config)71 DecodeTargetIndication ScalabilityStructureFullSvc::Dti(
72 int sid,
73 int tid,
74 const LayerFrameConfig& config) {
75 if (sid < config.SpatialId() || tid < config.TemporalId()) {
76 return DecodeTargetIndication::kNotPresent;
77 }
78 if (sid == config.SpatialId()) {
79 if (tid == 0) {
80 RTC_DCHECK_EQ(config.TemporalId(), 0);
81 return DecodeTargetIndication::kSwitch;
82 }
83 if (tid == config.TemporalId()) {
84 return DecodeTargetIndication::kDiscardable;
85 }
86 if (tid > config.TemporalId()) {
87 RTC_DCHECK_GT(tid, config.TemporalId());
88 return DecodeTargetIndication::kSwitch;
89 }
90 }
91 RTC_DCHECK_GT(sid, config.SpatialId());
92 RTC_DCHECK_GE(tid, config.TemporalId());
93 if (config.IsKeyframe() || config.Id() == kKey) {
94 return DecodeTargetIndication::kSwitch;
95 }
96 return DecodeTargetIndication::kRequired;
97 }
98
99 ScalabilityStructureFullSvc::FramePattern
NextPattern() const100 ScalabilityStructureFullSvc::NextPattern() const {
101 switch (last_pattern_) {
102 case kNone:
103 return kKey;
104 case kDeltaT2B:
105 return kDeltaT0;
106 case kDeltaT2A:
107 if (TemporalLayerIsActive(1)) {
108 return kDeltaT1;
109 }
110 return kDeltaT0;
111 case kDeltaT1:
112 if (TemporalLayerIsActive(2)) {
113 return kDeltaT2B;
114 }
115 return kDeltaT0;
116 case kKey:
117 case kDeltaT0:
118 if (TemporalLayerIsActive(2)) {
119 return kDeltaT2A;
120 }
121 if (TemporalLayerIsActive(1)) {
122 return kDeltaT1;
123 }
124 return kDeltaT0;
125 }
126 RTC_DCHECK_NOTREACHED();
127 return kNone;
128 }
129
130 std::vector<ScalableVideoController::LayerFrameConfig>
NextFrameConfig(bool restart)131 ScalabilityStructureFullSvc::NextFrameConfig(bool restart) {
132 std::vector<LayerFrameConfig> configs;
133 if (active_decode_targets_.none()) {
134 last_pattern_ = kNone;
135 return configs;
136 }
137 configs.reserve(num_spatial_layers_);
138
139 if (last_pattern_ == kNone || restart) {
140 can_reference_t0_frame_for_spatial_id_.reset();
141 last_pattern_ = kNone;
142 }
143 FramePattern current_pattern = NextPattern();
144
145 absl::optional<int> spatial_dependency_buffer_id;
146 switch (current_pattern) {
147 case kDeltaT0:
148 case kKey:
149 // Disallow temporal references cross T0 on higher temporal layers.
150 can_reference_t1_frame_for_spatial_id_.reset();
151 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
152 if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
153 // Next frame from the spatial layer `sid` shouldn't depend on
154 // potentially old previous frame from the spatial layer `sid`.
155 can_reference_t0_frame_for_spatial_id_.reset(sid);
156 continue;
157 }
158 configs.emplace_back();
159 ScalableVideoController::LayerFrameConfig& config = configs.back();
160 config.Id(current_pattern).S(sid).T(0);
161
162 if (spatial_dependency_buffer_id) {
163 config.Reference(*spatial_dependency_buffer_id);
164 } else if (current_pattern == kKey) {
165 config.Keyframe();
166 }
167
168 if (can_reference_t0_frame_for_spatial_id_[sid]) {
169 config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0));
170 } else {
171 // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame
172 // to ChainDiffCalculator
173 config.Update(BufferIndex(sid, /*tid=*/0));
174 }
175
176 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
177 }
178 break;
179 case kDeltaT1:
180 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
181 if (!DecodeTargetIsActive(sid, /*tid=*/1) ||
182 !can_reference_t0_frame_for_spatial_id_[sid]) {
183 continue;
184 }
185 configs.emplace_back();
186 ScalableVideoController::LayerFrameConfig& config = configs.back();
187 config.Id(current_pattern).S(sid).T(1);
188 // Temporal reference.
189 config.Reference(BufferIndex(sid, /*tid=*/0));
190 // Spatial reference unless this is the lowest active spatial layer.
191 if (spatial_dependency_buffer_id) {
192 config.Reference(*spatial_dependency_buffer_id);
193 }
194 // No frame reference top layer frame, so no need save it into a buffer.
195 if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) {
196 config.Update(BufferIndex(sid, /*tid=*/1));
197 }
198 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1);
199 }
200 break;
201 case kDeltaT2A:
202 case kDeltaT2B:
203 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
204 if (!DecodeTargetIsActive(sid, /*tid=*/2) ||
205 !can_reference_t0_frame_for_spatial_id_[sid]) {
206 continue;
207 }
208 configs.emplace_back();
209 ScalableVideoController::LayerFrameConfig& config = configs.back();
210 config.Id(current_pattern).S(sid).T(2);
211 // Temporal reference.
212 if (current_pattern == kDeltaT2B &&
213 can_reference_t1_frame_for_spatial_id_[sid]) {
214 config.Reference(BufferIndex(sid, /*tid=*/1));
215 } else {
216 config.Reference(BufferIndex(sid, /*tid=*/0));
217 }
218 // Spatial reference unless this is the lowest active spatial layer.
219 if (spatial_dependency_buffer_id) {
220 config.Reference(*spatial_dependency_buffer_id);
221 }
222 // No frame reference top layer frame, so no need save it into a buffer.
223 if (sid < num_spatial_layers_ - 1) {
224 config.Update(BufferIndex(sid, /*tid=*/2));
225 }
226 spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2);
227 }
228 break;
229 case kNone:
230 RTC_DCHECK_NOTREACHED();
231 break;
232 }
233
234 if (configs.empty() && !restart) {
235 RTC_LOG(LS_WARNING) << "Failed to generate configuration for L"
236 << num_spatial_layers_ << "T" << num_temporal_layers_
237 << " with active decode targets "
238 << active_decode_targets_.to_string('-').substr(
239 active_decode_targets_.size() -
240 num_spatial_layers_ * num_temporal_layers_)
241 << " and transition from "
242 << kFramePatternNames[last_pattern_] << " to "
243 << kFramePatternNames[current_pattern]
244 << ". Resetting.";
245 return NextFrameConfig(/*restart=*/true);
246 }
247
248 return configs;
249 }
250
OnEncodeDone(const LayerFrameConfig & config)251 GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone(
252 const LayerFrameConfig& config) {
253 // When encoder drops all frames for a temporal unit, it is better to reuse
254 // old temporal pattern rather than switch to next one, thus switch to next
255 // pattern defered here from the `NextFrameConfig`.
256 // In particular creating VP9 references rely on this behavior.
257 last_pattern_ = static_cast<FramePattern>(config.Id());
258 if (config.TemporalId() == 0) {
259 can_reference_t0_frame_for_spatial_id_.set(config.SpatialId());
260 }
261 if (config.TemporalId() == 1) {
262 can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
263 }
264
265 GenericFrameInfo frame_info;
266 frame_info.spatial_id = config.SpatialId();
267 frame_info.temporal_id = config.TemporalId();
268 frame_info.encoder_buffers = config.Buffers();
269 frame_info.decode_target_indications.reserve(num_spatial_layers_ *
270 num_temporal_layers_);
271 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
272 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
273 frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
274 }
275 }
276 if (config.TemporalId() == 0) {
277 frame_info.part_of_chain.resize(num_spatial_layers_);
278 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
279 frame_info.part_of_chain[sid] = config.SpatialId() <= sid;
280 }
281 } else {
282 frame_info.part_of_chain.assign(num_spatial_layers_, false);
283 }
284 frame_info.active_decode_targets = active_decode_targets_;
285 return frame_info;
286 }
287
OnRatesUpdated(const VideoBitrateAllocation & bitrates)288 void ScalabilityStructureFullSvc::OnRatesUpdated(
289 const VideoBitrateAllocation& bitrates) {
290 for (int sid = 0; sid < num_spatial_layers_; ++sid) {
291 // Enable/disable spatial layers independetely.
292 bool active = true;
293 for (int tid = 0; tid < num_temporal_layers_; ++tid) {
294 // To enable temporal layer, require bitrates for lower temporal layers.
295 active = active && bitrates.GetBitrate(sid, tid) > 0;
296 SetDecodeTargetIsActive(sid, tid, active);
297 }
298 }
299 }
300
DependencyStructure() const301 FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const {
302 FrameDependencyStructure structure;
303 structure.num_decode_targets = 2;
304 structure.num_chains = 1;
305 structure.decode_target_protected_by_chain = {0, 0};
306 structure.templates.resize(3);
307 structure.templates[0].T(0).Dtis("SS").ChainDiffs({0});
308 structure.templates[1].T(0).Dtis("SS").ChainDiffs({2}).FrameDiffs({2});
309 structure.templates[2].T(1).Dtis("-D").ChainDiffs({1}).FrameDiffs({1});
310 return structure;
311 }
312
DependencyStructure() const313 FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const {
314 FrameDependencyStructure structure;
315 structure.num_decode_targets = 3;
316 structure.num_chains = 1;
317 structure.decode_target_protected_by_chain = {0, 0, 0};
318 structure.templates.resize(5);
319 structure.templates[0].T(0).Dtis("SSS").ChainDiffs({0});
320 structure.templates[1].T(0).Dtis("SSS").ChainDiffs({4}).FrameDiffs({4});
321 structure.templates[2].T(1).Dtis("-DS").ChainDiffs({2}).FrameDiffs({2});
322 structure.templates[3].T(2).Dtis("--D").ChainDiffs({1}).FrameDiffs({1});
323 structure.templates[4].T(2).Dtis("--D").ChainDiffs({3}).FrameDiffs({1});
324 return structure;
325 }
326
DependencyStructure() const327 FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const {
328 FrameDependencyStructure structure;
329 structure.num_decode_targets = 2;
330 structure.num_chains = 2;
331 structure.decode_target_protected_by_chain = {0, 1};
332 structure.templates.resize(4);
333 structure.templates[0].S(0).Dtis("SR").ChainDiffs({2, 1}).FrameDiffs({2});
334 structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
335 structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({2, 1});
336 structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
337 return structure;
338 }
339
DependencyStructure() const340 FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const {
341 FrameDependencyStructure structure;
342 structure.num_decode_targets = 4;
343 structure.num_chains = 2;
344 structure.decode_target_protected_by_chain = {0, 0, 1, 1};
345 structure.templates.resize(6);
346 auto& templates = structure.templates;
347 templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
348 templates[1].S(0).T(0).Dtis("SSRR").ChainDiffs({4, 3}).FrameDiffs({4});
349 templates[2].S(0).T(1).Dtis("-D-R").ChainDiffs({2, 1}).FrameDiffs({2});
350 templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
351 templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({4, 1});
352 templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2, 1});
353 return structure;
354 }
355
DependencyStructure() const356 FrameDependencyStructure ScalabilityStructureL2T3::DependencyStructure() const {
357 FrameDependencyStructure structure;
358 structure.num_decode_targets = 6;
359 structure.num_chains = 2;
360 structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1};
361 auto& t = structure.templates;
362 t.resize(10);
363 t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0});
364 t[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1});
365 t[3].S(0).T(2).Dtis("--D--R").ChainDiffs({2, 1}).FrameDiffs({2});
366 t[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2, 1});
367 t[2].S(0).T(1).Dtis("-DS-RR").ChainDiffs({4, 3}).FrameDiffs({4});
368 t[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4, 1});
369 t[4].S(0).T(2).Dtis("--D--R").ChainDiffs({6, 5}).FrameDiffs({2});
370 t[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2, 1});
371 t[0].S(0).T(0).Dtis("SSSRRR").ChainDiffs({8, 7}).FrameDiffs({8});
372 t[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({8, 1});
373 return structure;
374 }
375
DependencyStructure() const376 FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const {
377 FrameDependencyStructure structure;
378 structure.num_decode_targets = 3;
379 structure.num_chains = 3;
380 structure.decode_target_protected_by_chain = {0, 1, 2};
381 auto& templates = structure.templates;
382 templates.resize(6);
383 templates[0].S(0).Dtis("SRR").ChainDiffs({3, 2, 1}).FrameDiffs({3});
384 templates[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0});
385 templates[2].S(1).Dtis("-SR").ChainDiffs({1, 1, 1}).FrameDiffs({3, 1});
386 templates[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
387 templates[4].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({3, 1});
388 templates[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1});
389 return structure;
390 }
391
DependencyStructure() const392 FrameDependencyStructure ScalabilityStructureL3T2::DependencyStructure() const {
393 FrameDependencyStructure structure;
394 structure.num_decode_targets = 6;
395 structure.num_chains = 3;
396 structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2};
397 auto& t = structure.templates;
398 t.resize(9);
399 // Templates are shown in the order frames following them appear in the
400 // stream, but in `structure.templates` array templates are sorted by
401 // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
402 // requirement.
403 t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0});
404 t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
405 t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
406 t[2].S(0).T(1).Dtis("-D-R-R").ChainDiffs({3, 2, 1}).FrameDiffs({3});
407 t[5].S(1).T(1).Dtis("---D-R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1});
408 t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1});
409 t[0].S(0).T(0).Dtis("SSRRRR").ChainDiffs({6, 5, 4}).FrameDiffs({6});
410 t[3].S(1).T(0).Dtis("--SSRR").ChainDiffs({1, 1, 1}).FrameDiffs({6, 1});
411 t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({6, 1});
412 return structure;
413 }
414
DependencyStructure() const415 FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const {
416 FrameDependencyStructure structure;
417 structure.num_decode_targets = 9;
418 structure.num_chains = 3;
419 structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
420 auto& t = structure.templates;
421 t.resize(15);
422 // Templates are shown in the order frames following them appear in the
423 // stream, but in `structure.templates` array templates are sorted by
424 // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
425 // requirement. Indexes are written in hex for nicer alignment.
426 t[0x1].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
427 t[0x6].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
428 t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
429 t[0x3].S(0).T(2).Dtis("--D--R--R").ChainDiffs({3, 2, 1}).FrameDiffs({3});
430 t[0x8].S(1).T(2).Dtis("-----D--R").ChainDiffs({4, 3, 2}).FrameDiffs({3, 1});
431 t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3, 1});
432 t[0x2].S(0).T(1).Dtis("-DS-RR-RR").ChainDiffs({6, 5, 4}).FrameDiffs({6});
433 t[0x7].S(1).T(1).Dtis("----DS-RR").ChainDiffs({7, 6, 5}).FrameDiffs({6, 1});
434 t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6, 1});
435 t[0x4].S(0).T(2).Dtis("--D--R--R").ChainDiffs({9, 8, 7}).FrameDiffs({3});
436 t[0x9].S(1).T(2).Dtis("-----D--R").ChainDiffs({10, 9, 8}).FrameDiffs({3, 1});
437 t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3, 1});
438 t[0x0].S(0).T(0).Dtis("SSSRRRRRR").ChainDiffs({12, 11, 10}).FrameDiffs({12});
439 t[0x5].S(1).T(0).Dtis("---SSSRRR").ChainDiffs({1, 1, 1}).FrameDiffs({12, 1});
440 t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({12, 1});
441 return structure;
442 }
443
444 } // namespace webrtc
445