xref: /aosp_15_r20/external/webrtc/modules/video_coding/video_codec_initializer.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/include/video_codec_initializer.h"
12 
13 #include <stdint.h>
14 #include <string.h>
15 
16 #include <algorithm>
17 
18 #include "absl/types/optional.h"
19 #include "api/scoped_refptr.h"
20 #include "api/units/data_rate.h"
21 #include "api/video/video_bitrate_allocation.h"
22 #include "api/video_codecs/video_encoder.h"
23 #include "modules/video_coding/codecs/av1/av1_svc_config.h"
24 #include "modules/video_coding/codecs/vp8/vp8_scalability.h"
25 #include "modules/video_coding/codecs/vp9/svc_config.h"
26 #include "modules/video_coding/include/video_coding_defines.h"
27 #include "modules/video_coding/svc/scalability_mode_util.h"
28 #include "rtc_base/checks.h"
29 #include "rtc_base/experiments/min_video_bitrate_experiment.h"
30 #include "rtc_base/logging.h"
31 #include "rtc_base/numerics/safe_conversions.h"
32 
33 namespace webrtc {
34 
SetupCodec(const VideoEncoderConfig & config,const std::vector<VideoStream> & streams,VideoCodec * codec)35 bool VideoCodecInitializer::SetupCodec(const VideoEncoderConfig& config,
36                                        const std::vector<VideoStream>& streams,
37                                        VideoCodec* codec) {
38   if (config.codec_type == kVideoCodecMultiplex) {
39     VideoEncoderConfig associated_config = config.Copy();
40     associated_config.codec_type = kVideoCodecVP9;
41     if (!SetupCodec(associated_config, streams, codec)) {
42       RTC_LOG(LS_ERROR) << "Failed to create stereo encoder configuration.";
43       return false;
44     }
45     codec->codecType = kVideoCodecMultiplex;
46     return true;
47   }
48 
49   *codec = VideoEncoderConfigToVideoCodec(config, streams);
50   return true;
51 }
52 
53 // TODO(sprang): Split this up and separate the codec specific parts.
VideoEncoderConfigToVideoCodec(const VideoEncoderConfig & config,const std::vector<VideoStream> & streams)54 VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec(
55     const VideoEncoderConfig& config,
56     const std::vector<VideoStream>& streams) {
57   static const int kEncoderMinBitrateKbps = 30;
58   RTC_DCHECK(!streams.empty());
59   RTC_DCHECK_GE(config.min_transmit_bitrate_bps, 0);
60 
61   VideoCodec video_codec;
62   video_codec.codecType = config.codec_type;
63 
64   switch (config.content_type) {
65     case VideoEncoderConfig::ContentType::kRealtimeVideo:
66       video_codec.mode = VideoCodecMode::kRealtimeVideo;
67       break;
68     case VideoEncoderConfig::ContentType::kScreen:
69       video_codec.mode = VideoCodecMode::kScreensharing;
70       break;
71   }
72 
73   video_codec.legacy_conference_mode =
74       config.content_type == VideoEncoderConfig::ContentType::kScreen &&
75       config.legacy_conference_mode;
76 
77   video_codec.SetFrameDropEnabled(config.frame_drop_enabled);
78   video_codec.numberOfSimulcastStreams =
79       static_cast<unsigned char>(streams.size());
80   video_codec.minBitrate = streams[0].min_bitrate_bps / 1000;
81   bool codec_active = false;
82   // Active configuration might not be fully copied to `streams` for SVC yet.
83   // Therefore the `config` is checked here.
84   for (const VideoStream& stream : config.simulcast_layers) {
85     if (stream.active) {
86       codec_active = true;
87       break;
88     }
89   }
90   // Set active for the entire video codec for the non simulcast case.
91   video_codec.active = codec_active;
92   if (video_codec.minBitrate < kEncoderMinBitrateKbps)
93     video_codec.minBitrate = kEncoderMinBitrateKbps;
94   video_codec.timing_frame_thresholds = {kDefaultTimingFramesDelayMs,
95                                          kDefaultOutlierFrameSizePercent};
96   RTC_DCHECK_LE(streams.size(), kMaxSimulcastStreams);
97 
98   int max_framerate = 0;
99 
100   absl::optional<ScalabilityMode> scalability_mode =
101       streams[0].scalability_mode;
102   for (size_t i = 0; i < streams.size(); ++i) {
103     SimulcastStream* sim_stream = &video_codec.simulcastStream[i];
104     RTC_DCHECK_GT(streams[i].width, 0);
105     RTC_DCHECK_GT(streams[i].height, 0);
106     RTC_DCHECK_GT(streams[i].max_framerate, 0);
107     RTC_DCHECK_GE(streams[i].min_bitrate_bps, 0);
108     RTC_DCHECK_GE(streams[i].target_bitrate_bps, streams[i].min_bitrate_bps);
109     RTC_DCHECK_GE(streams[i].max_bitrate_bps, streams[i].target_bitrate_bps);
110     RTC_DCHECK_GE(streams[i].max_qp, 0);
111 
112     sim_stream->width = static_cast<uint16_t>(streams[i].width);
113     sim_stream->height = static_cast<uint16_t>(streams[i].height);
114     sim_stream->maxFramerate = streams[i].max_framerate;
115     sim_stream->minBitrate = streams[i].min_bitrate_bps / 1000;
116     sim_stream->targetBitrate = streams[i].target_bitrate_bps / 1000;
117     sim_stream->maxBitrate = streams[i].max_bitrate_bps / 1000;
118     sim_stream->qpMax = streams[i].max_qp;
119 
120     int num_temporal_layers =
121         streams[i].scalability_mode.has_value()
122             ? ScalabilityModeToNumTemporalLayers(*streams[i].scalability_mode)
123             : streams[i].num_temporal_layers.value_or(1);
124 
125     sim_stream->numberOfTemporalLayers =
126         static_cast<unsigned char>(num_temporal_layers);
127     sim_stream->active = streams[i].active;
128 
129     video_codec.width =
130         std::max(video_codec.width, static_cast<uint16_t>(streams[i].width));
131     video_codec.height =
132         std::max(video_codec.height, static_cast<uint16_t>(streams[i].height));
133     video_codec.minBitrate =
134         std::min(static_cast<uint16_t>(video_codec.minBitrate),
135                  static_cast<uint16_t>(streams[i].min_bitrate_bps / 1000));
136     video_codec.maxBitrate += streams[i].max_bitrate_bps / 1000;
137     video_codec.qpMax = std::max(video_codec.qpMax,
138                                  static_cast<unsigned int>(streams[i].max_qp));
139     max_framerate = std::max(max_framerate, streams[i].max_framerate);
140 
141     // TODO(bugs.webrtc.org/11607): Since scalability mode is a top-level
142     // setting on VideoCodec, setting it makes sense only if it is the same for
143     // all simulcast streams.
144     if (streams[0].scalability_mode != streams[i].scalability_mode) {
145       scalability_mode.reset();
146       // For VP8, top-level scalability mode doesn't matter, since configuration
147       // is based on the per-simulcast stream configuration of temporal layers.
148       if (video_codec.codecType != kVideoCodecVP8) {
149         RTC_LOG(LS_WARNING) << "Inconsistent scalability modes configured.";
150       }
151     }
152   }
153 
154   if (scalability_mode.has_value()) {
155     video_codec.SetScalabilityMode(*scalability_mode);
156   }
157 
158   if (video_codec.maxBitrate == 0) {
159     // Unset max bitrate -> cap to one bit per pixel.
160     video_codec.maxBitrate =
161         (video_codec.width * video_codec.height * video_codec.maxFramerate) /
162         1000;
163   }
164   if (video_codec.maxBitrate < kEncoderMinBitrateKbps)
165     video_codec.maxBitrate = kEncoderMinBitrateKbps;
166 
167   video_codec.maxFramerate = max_framerate;
168   video_codec.spatialLayers[0] = {0};
169   video_codec.spatialLayers[0].width = video_codec.width;
170   video_codec.spatialLayers[0].height = video_codec.height;
171   video_codec.spatialLayers[0].maxFramerate = max_framerate;
172   video_codec.spatialLayers[0].numberOfTemporalLayers =
173       streams[0].scalability_mode.has_value()
174           ? ScalabilityModeToNumTemporalLayers(*streams[0].scalability_mode)
175           : streams[0].num_temporal_layers.value_or(1);
176 
177   // Set codec specific options
178   if (config.encoder_specific_settings)
179     config.encoder_specific_settings->FillEncoderSpecificSettings(&video_codec);
180 
181   switch (video_codec.codecType) {
182     case kVideoCodecVP8: {
183       if (!config.encoder_specific_settings) {
184         *video_codec.VP8() = VideoEncoder::GetDefaultVp8Settings();
185       }
186 
187       // Validate specified scalability modes. If some layer has an unsupported
188       // mode, store it as the top-level scalability mode, which will make
189       // InitEncode fail with an appropriate error.
190       for (const auto& stream : streams) {
191         if (stream.scalability_mode.has_value() &&
192             !VP8SupportsScalabilityMode(*stream.scalability_mode)) {
193           RTC_LOG(LS_WARNING)
194               << "Invalid scalability mode for VP8: "
195               << ScalabilityModeToString(*stream.scalability_mode);
196           video_codec.SetScalabilityMode(*stream.scalability_mode);
197           break;
198         }
199       }
200       video_codec.VP8()->numberOfTemporalLayers =
201           streams.back().scalability_mode.has_value()
202               ? ScalabilityModeToNumTemporalLayers(
203                     *streams.back().scalability_mode)
204               : streams.back().num_temporal_layers.value_or(
205                     video_codec.VP8()->numberOfTemporalLayers);
206 
207       RTC_DCHECK_GE(video_codec.VP8()->numberOfTemporalLayers, 1);
208       RTC_DCHECK_LE(video_codec.VP8()->numberOfTemporalLayers,
209                     kMaxTemporalStreams);
210 
211       break;
212     }
213     case kVideoCodecVP9: {
214       // Force the first stream to always be active.
215       video_codec.simulcastStream[0].active = codec_active;
216 
217       if (!config.encoder_specific_settings) {
218         *video_codec.VP9() = VideoEncoder::GetDefaultVp9Settings();
219       }
220 
221       video_codec.VP9()->numberOfTemporalLayers = static_cast<unsigned char>(
222           streams.back().num_temporal_layers.value_or(
223               video_codec.VP9()->numberOfTemporalLayers));
224       RTC_DCHECK_GE(video_codec.VP9()->numberOfTemporalLayers, 1);
225       RTC_DCHECK_LE(video_codec.VP9()->numberOfTemporalLayers,
226                     kMaxTemporalStreams);
227 
228       RTC_DCHECK(config.spatial_layers.empty() ||
229                  config.spatial_layers.size() ==
230                      video_codec.VP9()->numberOfSpatialLayers);
231 
232       std::vector<SpatialLayer> spatial_layers;
233       if (!config.spatial_layers.empty()) {
234         // Layering is set explicitly.
235         spatial_layers = config.spatial_layers;
236       } else if (scalability_mode.has_value()) {
237         // Layering is set via scalability mode.
238         spatial_layers = GetVp9SvcConfig(video_codec);
239         if (spatial_layers.empty())
240           break;
241       } else {
242         size_t first_active_layer = 0;
243         for (size_t spatial_idx = 0;
244              spatial_idx < config.simulcast_layers.size(); ++spatial_idx) {
245           if (config.simulcast_layers[spatial_idx].active) {
246             first_active_layer = spatial_idx;
247             break;
248           }
249         }
250 
251         spatial_layers = GetSvcConfig(
252             video_codec.width, video_codec.height, video_codec.maxFramerate,
253             first_active_layer, video_codec.VP9()->numberOfSpatialLayers,
254             video_codec.VP9()->numberOfTemporalLayers,
255             video_codec.mode == VideoCodecMode::kScreensharing);
256 
257         // If there was no request for spatial layering, don't limit bitrate
258         // of single spatial layer.
259         const bool no_spatial_layering =
260             video_codec.VP9()->numberOfSpatialLayers <= 1;
261         if (no_spatial_layering) {
262           // Use codec's bitrate limits.
263           spatial_layers.back().minBitrate = video_codec.minBitrate;
264           spatial_layers.back().targetBitrate = video_codec.maxBitrate;
265           spatial_layers.back().maxBitrate = video_codec.maxBitrate;
266         }
267 
268         for (size_t spatial_idx = first_active_layer;
269              spatial_idx < config.simulcast_layers.size() &&
270              spatial_idx < spatial_layers.size() + first_active_layer;
271              ++spatial_idx) {
272           spatial_layers[spatial_idx - first_active_layer].active =
273               config.simulcast_layers[spatial_idx].active;
274         }
275       }
276 
277       RTC_DCHECK(!spatial_layers.empty());
278       for (size_t i = 0; i < spatial_layers.size(); ++i) {
279         video_codec.spatialLayers[i] = spatial_layers[i];
280       }
281 
282       // The top spatial layer dimensions may not be equal to the input
283       // resolution because of the rounding or explicit configuration.
284       // This difference must be propagated to the stream configuration.
285       video_codec.width = spatial_layers.back().width;
286       video_codec.height = spatial_layers.back().height;
287       video_codec.simulcastStream[0].width = spatial_layers.back().width;
288       video_codec.simulcastStream[0].height = spatial_layers.back().height;
289 
290       // Update layering settings.
291       video_codec.VP9()->numberOfSpatialLayers =
292           static_cast<unsigned char>(spatial_layers.size());
293       RTC_DCHECK_GE(video_codec.VP9()->numberOfSpatialLayers, 1);
294       RTC_DCHECK_LE(video_codec.VP9()->numberOfSpatialLayers,
295                     kMaxSpatialLayers);
296 
297       video_codec.VP9()->numberOfTemporalLayers = static_cast<unsigned char>(
298           spatial_layers.back().numberOfTemporalLayers);
299       RTC_DCHECK_GE(video_codec.VP9()->numberOfTemporalLayers, 1);
300       RTC_DCHECK_LE(video_codec.VP9()->numberOfTemporalLayers,
301                     kMaxTemporalStreams);
302 
303       break;
304     }
305     case kVideoCodecAV1:
306       if (SetAv1SvcConfig(video_codec,
307                           /*num_temporal_layers=*/
308                           streams.back().num_temporal_layers.value_or(1),
309                           /*num_spatial_layers=*/
310                           std::max<int>(config.spatial_layers.size(), 1))) {
311         for (size_t i = 0; i < config.spatial_layers.size(); ++i) {
312           video_codec.spatialLayers[i].active = config.spatial_layers[i].active;
313         }
314       } else {
315         RTC_LOG(LS_WARNING) << "Failed to configure svc bitrates for av1.";
316       }
317       break;
318     case kVideoCodecH264: {
319       RTC_CHECK(!config.encoder_specific_settings);
320 
321       *video_codec.H264() = VideoEncoder::GetDefaultH264Settings();
322       video_codec.H264()->numberOfTemporalLayers = static_cast<unsigned char>(
323           streams.back().num_temporal_layers.value_or(
324               video_codec.H264()->numberOfTemporalLayers));
325       RTC_DCHECK_GE(video_codec.H264()->numberOfTemporalLayers, 1);
326       RTC_DCHECK_LE(video_codec.H264()->numberOfTemporalLayers,
327                     kMaxTemporalStreams);
328       break;
329     }
330     default:
331       // TODO(pbos): Support encoder_settings codec-agnostically.
332       RTC_DCHECK(!config.encoder_specific_settings)
333           << "Encoder-specific settings for codec type not wired up.";
334       break;
335   }
336 
337   const absl::optional<DataRate> experimental_min_bitrate =
338       GetExperimentalMinVideoBitrate(video_codec.codecType);
339   if (experimental_min_bitrate) {
340     const int experimental_min_bitrate_kbps =
341         rtc::saturated_cast<int>(experimental_min_bitrate->kbps());
342     video_codec.minBitrate = experimental_min_bitrate_kbps;
343     video_codec.simulcastStream[0].minBitrate = experimental_min_bitrate_kbps;
344     if (video_codec.codecType == kVideoCodecVP9) {
345       video_codec.spatialLayers[0].minBitrate = experimental_min_bitrate_kbps;
346     }
347   }
348 
349   return video_codec;
350 }
351 
352 }  // namespace webrtc
353