xref: /aosp_15_r20/external/webrtc/modules/video_coding/svc/scalability_structure_key_svc.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "modules/video_coding/svc/scalability_structure_key_svc.h"
11 
12 #include <bitset>
13 #include <utility>
14 #include <vector>
15 
16 #include "absl/types/optional.h"
17 #include "api/transport/rtp/dependency_descriptor.h"
18 #include "api/video/video_bitrate_allocation.h"
19 #include "common_video/generic_frame_descriptor/generic_frame_info.h"
20 #include "modules/video_coding/svc/scalable_video_controller.h"
21 #include "rtc_base/checks.h"
22 #include "rtc_base/logging.h"
23 
24 namespace webrtc {
25 
26 constexpr int ScalabilityStructureKeySvc::kMaxNumSpatialLayers;
27 constexpr int ScalabilityStructureKeySvc::kMaxNumTemporalLayers;
28 
ScalabilityStructureKeySvc(int num_spatial_layers,int num_temporal_layers)29 ScalabilityStructureKeySvc::ScalabilityStructureKeySvc(int num_spatial_layers,
30                                                        int num_temporal_layers)
31     : num_spatial_layers_(num_spatial_layers),
32       num_temporal_layers_(num_temporal_layers),
33       active_decode_targets_(
34           (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) {
35   // There is no point to use this structure without spatial scalability.
36   RTC_DCHECK_GT(num_spatial_layers, 1);
37   RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers);
38   RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers);
39 }
40 
41 ScalabilityStructureKeySvc::~ScalabilityStructureKeySvc() = default;
42 
43 ScalableVideoController::StreamLayersConfig
StreamConfig() const44 ScalabilityStructureKeySvc::StreamConfig() const {
45   StreamLayersConfig result;
46   result.num_spatial_layers = num_spatial_layers_;
47   result.num_temporal_layers = num_temporal_layers_;
48   result.scaling_factor_num[num_spatial_layers_ - 1] = 1;
49   result.scaling_factor_den[num_spatial_layers_ - 1] = 1;
50   for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) {
51     result.scaling_factor_num[sid - 1] = 1;
52     result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid];
53   }
54   result.uses_reference_scaling = true;
55   return result;
56 }
57 
TemporalLayerIsActive(int tid) const58 bool ScalabilityStructureKeySvc::TemporalLayerIsActive(int tid) const {
59   if (tid >= num_temporal_layers_) {
60     return false;
61   }
62   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
63     if (DecodeTargetIsActive(sid, tid)) {
64       return true;
65     }
66   }
67   return false;
68 }
69 
Dti(int sid,int tid,const LayerFrameConfig & config)70 DecodeTargetIndication ScalabilityStructureKeySvc::Dti(
71     int sid,
72     int tid,
73     const LayerFrameConfig& config) {
74   if (config.IsKeyframe() || config.Id() == kKey) {
75     RTC_DCHECK_EQ(config.TemporalId(), 0);
76     return sid < config.SpatialId() ? DecodeTargetIndication::kNotPresent
77                                     : DecodeTargetIndication::kSwitch;
78   }
79 
80   if (sid != config.SpatialId() || tid < config.TemporalId()) {
81     return DecodeTargetIndication::kNotPresent;
82   }
83   if (tid == config.TemporalId() && tid > 0) {
84     return DecodeTargetIndication::kDiscardable;
85   }
86   return DecodeTargetIndication::kSwitch;
87 }
88 
89 std::vector<ScalableVideoController::LayerFrameConfig>
KeyframeConfig()90 ScalabilityStructureKeySvc::KeyframeConfig() {
91   std::vector<LayerFrameConfig> configs;
92   configs.reserve(num_spatial_layers_);
93   absl::optional<int> spatial_dependency_buffer_id;
94   spatial_id_is_enabled_.reset();
95   // Disallow temporal references cross T0 on higher temporal layers.
96   can_reference_t1_frame_for_spatial_id_.reset();
97   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
98     if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
99       continue;
100     }
101     configs.emplace_back();
102     ScalableVideoController::LayerFrameConfig& config = configs.back();
103     config.Id(kKey).S(sid).T(0);
104 
105     if (spatial_dependency_buffer_id) {
106       config.Reference(*spatial_dependency_buffer_id);
107     } else {
108       config.Keyframe();
109     }
110     config.Update(BufferIndex(sid, /*tid=*/0));
111 
112     spatial_id_is_enabled_.set(sid);
113     spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0);
114   }
115   return configs;
116 }
117 
118 std::vector<ScalableVideoController::LayerFrameConfig>
T0Config()119 ScalabilityStructureKeySvc::T0Config() {
120   std::vector<LayerFrameConfig> configs;
121   configs.reserve(num_spatial_layers_);
122   // Disallow temporal references cross T0 on higher temporal layers.
123   can_reference_t1_frame_for_spatial_id_.reset();
124   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
125     if (!DecodeTargetIsActive(sid, /*tid=*/0)) {
126       spatial_id_is_enabled_.reset(sid);
127       continue;
128     }
129     configs.emplace_back();
130     configs.back().Id(kDeltaT0).S(sid).T(0).ReferenceAndUpdate(
131         BufferIndex(sid, /*tid=*/0));
132   }
133   return configs;
134 }
135 
136 std::vector<ScalableVideoController::LayerFrameConfig>
T1Config()137 ScalabilityStructureKeySvc::T1Config() {
138   std::vector<LayerFrameConfig> configs;
139   configs.reserve(num_spatial_layers_);
140   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
141     if (!DecodeTargetIsActive(sid, /*tid=*/1)) {
142       continue;
143     }
144     configs.emplace_back();
145     ScalableVideoController::LayerFrameConfig& config = configs.back();
146     config.Id(kDeltaT1).S(sid).T(1).Reference(BufferIndex(sid, /*tid=*/0));
147     if (num_temporal_layers_ > 2) {
148       config.Update(BufferIndex(sid, /*tid=*/1));
149     }
150   }
151   return configs;
152 }
153 
154 std::vector<ScalableVideoController::LayerFrameConfig>
T2Config(FramePattern pattern)155 ScalabilityStructureKeySvc::T2Config(FramePattern pattern) {
156   std::vector<LayerFrameConfig> configs;
157   configs.reserve(num_spatial_layers_);
158   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
159     if (!DecodeTargetIsActive(sid, /*tid=*/2)) {
160       continue;
161     }
162     configs.emplace_back();
163     ScalableVideoController::LayerFrameConfig& config = configs.back();
164     config.Id(pattern).S(sid).T(2);
165     if (can_reference_t1_frame_for_spatial_id_[sid]) {
166       config.Reference(BufferIndex(sid, /*tid=*/1));
167     } else {
168       config.Reference(BufferIndex(sid, /*tid=*/0));
169     }
170   }
171   return configs;
172 }
173 
174 ScalabilityStructureKeySvc::FramePattern
NextPattern(FramePattern last_pattern) const175 ScalabilityStructureKeySvc::NextPattern(FramePattern last_pattern) const {
176   switch (last_pattern) {
177     case kNone:
178       return kKey;
179     case kDeltaT2B:
180       return kDeltaT0;
181     case kDeltaT2A:
182       if (TemporalLayerIsActive(1)) {
183         return kDeltaT1;
184       }
185       return kDeltaT0;
186     case kDeltaT1:
187       if (TemporalLayerIsActive(2)) {
188         return kDeltaT2B;
189       }
190       return kDeltaT0;
191     case kDeltaT0:
192     case kKey:
193       if (TemporalLayerIsActive(2)) {
194         return kDeltaT2A;
195       }
196       if (TemporalLayerIsActive(1)) {
197         return kDeltaT1;
198       }
199       return kDeltaT0;
200   }
201   RTC_DCHECK_NOTREACHED();
202   return kNone;
203 }
204 
205 std::vector<ScalableVideoController::LayerFrameConfig>
NextFrameConfig(bool restart)206 ScalabilityStructureKeySvc::NextFrameConfig(bool restart) {
207   if (active_decode_targets_.none()) {
208     last_pattern_ = kNone;
209     return {};
210   }
211 
212   if (restart) {
213     last_pattern_ = kNone;
214   }
215 
216   FramePattern current_pattern = NextPattern(last_pattern_);
217   switch (current_pattern) {
218     case kKey:
219       return KeyframeConfig();
220     case kDeltaT0:
221       return T0Config();
222     case kDeltaT1:
223       return T1Config();
224     case kDeltaT2A:
225     case kDeltaT2B:
226       return T2Config(current_pattern);
227     case kNone:
228       break;
229   }
230   RTC_DCHECK_NOTREACHED();
231   return {};
232 }
233 
OnEncodeDone(const LayerFrameConfig & config)234 GenericFrameInfo ScalabilityStructureKeySvc::OnEncodeDone(
235     const LayerFrameConfig& config) {
236   // When encoder drops all frames for a temporal unit, it is better to reuse
237   // old temporal pattern rather than switch to next one, thus switch to next
238   // pattern defered here from the `NextFrameConfig`.
239   // In particular creating VP9 references rely on this behavior.
240   last_pattern_ = static_cast<FramePattern>(config.Id());
241   if (config.TemporalId() == 1) {
242     can_reference_t1_frame_for_spatial_id_.set(config.SpatialId());
243   }
244 
245   GenericFrameInfo frame_info;
246   frame_info.spatial_id = config.SpatialId();
247   frame_info.temporal_id = config.TemporalId();
248   frame_info.encoder_buffers = config.Buffers();
249   frame_info.decode_target_indications.reserve(num_spatial_layers_ *
250                                                num_temporal_layers_);
251   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
252     for (int tid = 0; tid < num_temporal_layers_; ++tid) {
253       frame_info.decode_target_indications.push_back(Dti(sid, tid, config));
254     }
255   }
256   frame_info.part_of_chain.assign(num_spatial_layers_, false);
257   if (config.IsKeyframe() || config.Id() == kKey) {
258     RTC_DCHECK_EQ(config.TemporalId(), 0);
259     for (int sid = config.SpatialId(); sid < num_spatial_layers_; ++sid) {
260       frame_info.part_of_chain[sid] = true;
261     }
262   } else if (config.TemporalId() == 0) {
263     frame_info.part_of_chain[config.SpatialId()] = true;
264   }
265   frame_info.active_decode_targets = active_decode_targets_;
266   return frame_info;
267 }
268 
OnRatesUpdated(const VideoBitrateAllocation & bitrates)269 void ScalabilityStructureKeySvc::OnRatesUpdated(
270     const VideoBitrateAllocation& bitrates) {
271   for (int sid = 0; sid < num_spatial_layers_; ++sid) {
272     // Enable/disable spatial layers independetely.
273     bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0;
274     SetDecodeTargetIsActive(sid, /*tid=*/0, active);
275     if (!spatial_id_is_enabled_[sid] && active) {
276       // Key frame is required to reenable any spatial layer.
277       last_pattern_ = kNone;
278     }
279 
280     for (int tid = 1; tid < num_temporal_layers_; ++tid) {
281       // To enable temporal layer, require bitrates for lower temporal layers.
282       active = active && bitrates.GetBitrate(sid, tid) > 0;
283       SetDecodeTargetIsActive(sid, tid, active);
284     }
285   }
286 }
287 
288 ScalabilityStructureL2T1Key::~ScalabilityStructureL2T1Key() = default;
289 
DependencyStructure() const290 FrameDependencyStructure ScalabilityStructureL2T1Key::DependencyStructure()
291     const {
292   FrameDependencyStructure structure;
293   structure.num_decode_targets = 2;
294   structure.num_chains = 2;
295   structure.decode_target_protected_by_chain = {0, 1};
296   structure.templates.resize(4);
297   structure.templates[0].S(0).Dtis("S-").ChainDiffs({2, 1}).FrameDiffs({2});
298   structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0});
299   structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 2}).FrameDiffs({2});
300   structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1});
301   return structure;
302 }
303 
304 ScalabilityStructureL2T2Key::~ScalabilityStructureL2T2Key() = default;
305 
DependencyStructure() const306 FrameDependencyStructure ScalabilityStructureL2T2Key::DependencyStructure()
307     const {
308   FrameDependencyStructure structure;
309   structure.num_decode_targets = 4;
310   structure.num_chains = 2;
311   structure.decode_target_protected_by_chain = {0, 0, 1, 1};
312   structure.templates.resize(6);
313   auto& templates = structure.templates;
314   templates[0].S(0).T(0).Dtis("SSSS").ChainDiffs({0, 0});
315   templates[1].S(0).T(0).Dtis("SS--").ChainDiffs({4, 3}).FrameDiffs({4});
316   templates[2].S(0).T(1).Dtis("-D--").ChainDiffs({2, 1}).FrameDiffs({2});
317   templates[3].S(1).T(0).Dtis("--SS").ChainDiffs({1, 1}).FrameDiffs({1});
318   templates[4].S(1).T(0).Dtis("--SS").ChainDiffs({1, 4}).FrameDiffs({4});
319   templates[5].S(1).T(1).Dtis("---D").ChainDiffs({3, 2}).FrameDiffs({2});
320   return structure;
321 }
322 
323 ScalabilityStructureL2T3Key::~ScalabilityStructureL2T3Key() = default;
324 
DependencyStructure() const325 FrameDependencyStructure ScalabilityStructureL2T3Key::DependencyStructure()
326     const {
327   FrameDependencyStructure structure;
328   structure.num_decode_targets = 6;
329   structure.num_chains = 2;
330   structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1};
331   auto& templates = structure.templates;
332   templates.resize(10);
333   templates[0].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0});
334   templates[1].S(0).T(0).Dtis("SSS---").ChainDiffs({8, 7}).FrameDiffs({8});
335   templates[2].S(0).T(1).Dtis("-DS---").ChainDiffs({4, 3}).FrameDiffs({4});
336   templates[3].S(0).T(2).Dtis("--D---").ChainDiffs({2, 1}).FrameDiffs({2});
337   templates[4].S(0).T(2).Dtis("--D---").ChainDiffs({6, 5}).FrameDiffs({2});
338   templates[5].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 1}).FrameDiffs({1});
339   templates[6].S(1).T(0).Dtis("---SSS").ChainDiffs({1, 8}).FrameDiffs({8});
340   templates[7].S(1).T(1).Dtis("----DS").ChainDiffs({5, 4}).FrameDiffs({4});
341   templates[8].S(1).T(2).Dtis("-----D").ChainDiffs({3, 2}).FrameDiffs({2});
342   templates[9].S(1).T(2).Dtis("-----D").ChainDiffs({7, 6}).FrameDiffs({2});
343   return structure;
344 }
345 
346 ScalabilityStructureL3T1Key::~ScalabilityStructureL3T1Key() = default;
347 
DependencyStructure() const348 FrameDependencyStructure ScalabilityStructureL3T1Key::DependencyStructure()
349     const {
350   FrameDependencyStructure structure;
351   structure.num_decode_targets = 3;
352   structure.num_chains = 3;
353   structure.decode_target_protected_by_chain = {0, 1, 2};
354   auto& t = structure.templates;
355   t.resize(6);
356   // Templates are shown in the order frames following them appear in the
357   // stream, but in `structure.templates` array templates are sorted by
358   // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
359   // requirement.
360   t[1].S(0).Dtis("SSS").ChainDiffs({0, 0, 0});
361   t[3].S(1).Dtis("-SS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
362   t[5].S(2).Dtis("--S").ChainDiffs({2, 1, 1}).FrameDiffs({1});
363   t[0].S(0).Dtis("S--").ChainDiffs({3, 2, 1}).FrameDiffs({3});
364   t[2].S(1).Dtis("-S-").ChainDiffs({1, 3, 2}).FrameDiffs({3});
365   t[4].S(2).Dtis("--S").ChainDiffs({2, 1, 3}).FrameDiffs({3});
366   return structure;
367 }
368 
369 ScalabilityStructureL3T2Key::~ScalabilityStructureL3T2Key() = default;
370 
DependencyStructure() const371 FrameDependencyStructure ScalabilityStructureL3T2Key::DependencyStructure()
372     const {
373   FrameDependencyStructure structure;
374   structure.num_decode_targets = 6;
375   structure.num_chains = 3;
376   structure.decode_target_protected_by_chain = {0, 0, 1, 1, 2, 2};
377   auto& t = structure.templates;
378   t.resize(9);
379   // Templates are shown in the order frames following them appear in the
380   // stream, but in `structure.templates` array templates are sorted by
381   // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
382   // requirement.
383   t[1].S(0).T(0).Dtis("SSSSSS").ChainDiffs({0, 0, 0});
384   t[4].S(1).T(0).Dtis("--SSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
385   t[7].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
386   t[2].S(0).T(1).Dtis("-D----").ChainDiffs({3, 2, 1}).FrameDiffs({3});
387   t[5].S(1).T(1).Dtis("---D--").ChainDiffs({4, 3, 2}).FrameDiffs({3});
388   t[8].S(2).T(1).Dtis("-----D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
389   t[0].S(0).T(0).Dtis("SS----").ChainDiffs({6, 5, 4}).FrameDiffs({6});
390   t[3].S(1).T(0).Dtis("--SS--").ChainDiffs({1, 6, 5}).FrameDiffs({6});
391   t[6].S(2).T(0).Dtis("----SS").ChainDiffs({2, 1, 6}).FrameDiffs({6});
392   return structure;
393 }
394 
395 ScalabilityStructureL3T3Key::~ScalabilityStructureL3T3Key() = default;
396 
DependencyStructure() const397 FrameDependencyStructure ScalabilityStructureL3T3Key::DependencyStructure()
398     const {
399   FrameDependencyStructure structure;
400   structure.num_decode_targets = 9;
401   structure.num_chains = 3;
402   structure.decode_target_protected_by_chain = {0, 0, 0, 1, 1, 1, 2, 2, 2};
403   auto& t = structure.templates;
404   t.resize(15);
405   // Templates are shown in the order frames following them appear in the
406   // stream, but in `structure.templates` array templates are sorted by
407   // (`spatial_id`, `temporal_id`) since that is a dependency descriptor
408   // requirement. Indexes are written in hex for nicer alignment.
409   t[0x0].S(0).T(0).Dtis("SSSSSSSSS").ChainDiffs({0, 0, 0});
410   t[0x5].S(1).T(0).Dtis("---SSSSSS").ChainDiffs({1, 1, 1}).FrameDiffs({1});
411   t[0xA].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 1}).FrameDiffs({1});
412   t[0x3].S(0).T(2).Dtis("--D------").ChainDiffs({3, 2, 1}).FrameDiffs({3});
413   t[0x8].S(1).T(2).Dtis("-----D---").ChainDiffs({4, 3, 2}).FrameDiffs({3});
414   t[0xD].S(2).T(2).Dtis("--------D").ChainDiffs({5, 4, 3}).FrameDiffs({3});
415   t[0x2].S(0).T(1).Dtis("-DS------").ChainDiffs({6, 5, 4}).FrameDiffs({6});
416   t[0x7].S(1).T(1).Dtis("----DS---").ChainDiffs({7, 6, 5}).FrameDiffs({6});
417   t[0xC].S(2).T(1).Dtis("-------DS").ChainDiffs({8, 7, 6}).FrameDiffs({6});
418   t[0x4].S(0).T(2).Dtis("--D------").ChainDiffs({9, 8, 7}).FrameDiffs({3});
419   t[0x9].S(1).T(2).Dtis("-----D---").ChainDiffs({10, 9, 8}).FrameDiffs({3});
420   t[0xE].S(2).T(2).Dtis("--------D").ChainDiffs({11, 10, 9}).FrameDiffs({3});
421   t[0x1].S(0).T(0).Dtis("SSS------").ChainDiffs({12, 11, 10}).FrameDiffs({12});
422   t[0x6].S(1).T(0).Dtis("---SSS---").ChainDiffs({1, 12, 11}).FrameDiffs({12});
423   t[0xB].S(2).T(0).Dtis("------SSS").ChainDiffs({2, 1, 12}).FrameDiffs({12});
424   return structure;
425 }
426 
427 }  // namespace webrtc
428