xref: /aosp_15_r20/external/webrtc/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  *
10  */
11 
12 #ifdef RTC_ENABLE_VP9
13 
14 #include "modules/video_coding/codecs/vp9/libvpx_vp9_decoder.h"
15 
16 #include <algorithm>
17 
18 #include "absl/strings/match.h"
19 #include "api/transport/field_trial_based_config.h"
20 #include "api/video/color_space.h"
21 #include "api/video/i010_buffer.h"
22 #include "common_video/include/video_frame_buffer.h"
23 #include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
24 #include "rtc_base/checks.h"
25 #include "rtc_base/logging.h"
26 #include "third_party/libyuv/include/libyuv/convert.h"
27 #include "vpx/vp8dx.h"
28 #include "vpx/vpx_decoder.h"
29 
30 namespace webrtc {
31 namespace {
32 
33 // Helper class for extracting VP9 colorspace.
ExtractVP9ColorSpace(vpx_color_space_t space_t,vpx_color_range_t range_t,unsigned int bit_depth)34 ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t,
35                                 vpx_color_range_t range_t,
36                                 unsigned int bit_depth) {
37   ColorSpace::PrimaryID primaries = ColorSpace::PrimaryID::kUnspecified;
38   ColorSpace::TransferID transfer = ColorSpace::TransferID::kUnspecified;
39   ColorSpace::MatrixID matrix = ColorSpace::MatrixID::kUnspecified;
40   switch (space_t) {
41     case VPX_CS_BT_601:
42     case VPX_CS_SMPTE_170:
43       primaries = ColorSpace::PrimaryID::kSMPTE170M;
44       transfer = ColorSpace::TransferID::kSMPTE170M;
45       matrix = ColorSpace::MatrixID::kSMPTE170M;
46       break;
47     case VPX_CS_SMPTE_240:
48       primaries = ColorSpace::PrimaryID::kSMPTE240M;
49       transfer = ColorSpace::TransferID::kSMPTE240M;
50       matrix = ColorSpace::MatrixID::kSMPTE240M;
51       break;
52     case VPX_CS_BT_709:
53       primaries = ColorSpace::PrimaryID::kBT709;
54       transfer = ColorSpace::TransferID::kBT709;
55       matrix = ColorSpace::MatrixID::kBT709;
56       break;
57     case VPX_CS_BT_2020:
58       primaries = ColorSpace::PrimaryID::kBT2020;
59       switch (bit_depth) {
60         case 8:
61           transfer = ColorSpace::TransferID::kBT709;
62           break;
63         case 10:
64           transfer = ColorSpace::TransferID::kBT2020_10;
65           break;
66         default:
67           RTC_DCHECK_NOTREACHED();
68           break;
69       }
70       matrix = ColorSpace::MatrixID::kBT2020_NCL;
71       break;
72     case VPX_CS_SRGB:
73       primaries = ColorSpace::PrimaryID::kBT709;
74       transfer = ColorSpace::TransferID::kIEC61966_2_1;
75       matrix = ColorSpace::MatrixID::kBT709;
76       break;
77     default:
78       break;
79   }
80 
81   ColorSpace::RangeID range = ColorSpace::RangeID::kInvalid;
82   switch (range_t) {
83     case VPX_CR_STUDIO_RANGE:
84       range = ColorSpace::RangeID::kLimited;
85       break;
86     case VPX_CR_FULL_RANGE:
87       range = ColorSpace::RangeID::kFull;
88       break;
89     default:
90       break;
91   }
92   return ColorSpace(primaries, transfer, matrix, range);
93 }
94 
95 }  // namespace
96 
LibvpxVp9Decoder()97 LibvpxVp9Decoder::LibvpxVp9Decoder()
98     : decode_complete_callback_(nullptr),
99       inited_(false),
100       decoder_(nullptr),
101       key_frame_required_(true) {}
102 
~LibvpxVp9Decoder()103 LibvpxVp9Decoder::~LibvpxVp9Decoder() {
104   inited_ = true;  // in order to do the actual release
105   Release();
106   int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse();
107   if (num_buffers_in_use > 0) {
108     // The frame buffers are reference counted and frames are exposed after
109     // decoding. There may be valid usage cases where previous frames are still
110     // referenced after ~LibvpxVp9Decoder that is not a leak.
111     RTC_LOG(LS_INFO) << num_buffers_in_use
112                      << " Vp9FrameBuffers are still "
113                         "referenced during ~LibvpxVp9Decoder.";
114   }
115 }
116 
Configure(const Settings & settings)117 bool LibvpxVp9Decoder::Configure(const Settings& settings) {
118   if (Release() < 0) {
119     return false;
120   }
121 
122   if (decoder_ == nullptr) {
123     decoder_ = new vpx_codec_ctx_t;
124     memset(decoder_, 0, sizeof(*decoder_));
125   }
126   vpx_codec_dec_cfg_t cfg;
127   memset(&cfg, 0, sizeof(cfg));
128 
129 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
130   // We focus on webrtc fuzzing here, not libvpx itself. Use single thread for
131   // fuzzing, because:
132   //  - libvpx's VP9 single thread decoder is more fuzzer friendly. It detects
133   //    errors earlier than the multi-threads version.
134   //  - Make peak CPU usage under control (not depending on input)
135   cfg.threads = 1;
136 #else
137   const RenderResolution& resolution = settings.max_render_resolution();
138   if (!resolution.Valid()) {
139     // Postpone configuring number of threads until resolution is known.
140     cfg.threads = 1;
141   } else {
142     // We want to use multithreading when decoding high resolution videos. But
143     // not too many in order to avoid overhead when many stream are decoded
144     // concurrently.
145     // Set 2 thread as target for 1280x720 pixel count, and then scale up
146     // linearly from there - but cap at physical core count.
147     // For common resolutions this results in:
148     // 1 for 360p
149     // 2 for 720p
150     // 4 for 1080p
151     // 8 for 1440p
152     // 18 for 4K
153     int num_threads = std::max(
154         1, 2 * resolution.Width() * resolution.Height() / (1280 * 720));
155     cfg.threads = std::min(settings.number_of_cores(), num_threads);
156   }
157 #endif
158 
159   current_settings_ = settings;
160 
161   vpx_codec_flags_t flags = 0;
162   if (vpx_codec_dec_init(decoder_, vpx_codec_vp9_dx(), &cfg, flags)) {
163     return false;
164   }
165 
166   if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) {
167     return false;
168   }
169 
170   inited_ = true;
171   // Always start with a complete key frame.
172   key_frame_required_ = true;
173   if (absl::optional<int> buffer_pool_size = settings.buffer_pool_size()) {
174     if (!libvpx_buffer_pool_.Resize(*buffer_pool_size)) {
175       return false;
176     }
177   }
178 
179   vpx_codec_err_t status =
180       vpx_codec_control(decoder_, VP9D_SET_LOOP_FILTER_OPT, 1);
181   if (status != VPX_CODEC_OK) {
182     RTC_LOG(LS_ERROR) << "Failed to enable VP9D_SET_LOOP_FILTER_OPT. "
183                       << vpx_codec_error(decoder_);
184     return false;
185   }
186 
187   return true;
188 }
189 
Decode(const EncodedImage & input_image,bool missing_frames,int64_t)190 int LibvpxVp9Decoder::Decode(const EncodedImage& input_image,
191                              bool missing_frames,
192                              int64_t /*render_time_ms*/) {
193   if (!inited_) {
194     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
195   }
196   if (decode_complete_callback_ == nullptr) {
197     return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
198   }
199 
200   if (input_image._frameType == VideoFrameType::kVideoFrameKey) {
201     absl::optional<Vp9UncompressedHeader> frame_info =
202         ParseUncompressedVp9Header(
203             rtc::MakeArrayView(input_image.data(), input_image.size()));
204     if (frame_info) {
205       RenderResolution frame_resolution(frame_info->frame_width,
206                                         frame_info->frame_height);
207       if (frame_resolution != current_settings_.max_render_resolution()) {
208         // Resolution has changed, tear down and re-init a new decoder in
209         // order to get correct sizing.
210         Release();
211         current_settings_.set_max_render_resolution(frame_resolution);
212         if (!Configure(current_settings_)) {
213           RTC_LOG(LS_WARNING) << "Failed to re-init decoder.";
214           return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
215         }
216       }
217     } else {
218       RTC_LOG(LS_WARNING) << "Failed to parse VP9 header from key-frame.";
219     }
220   }
221 
222   // Always start with a complete key frame.
223   if (key_frame_required_) {
224     if (input_image._frameType != VideoFrameType::kVideoFrameKey)
225       return WEBRTC_VIDEO_CODEC_ERROR;
226     key_frame_required_ = false;
227   }
228   vpx_codec_iter_t iter = nullptr;
229   vpx_image_t* img;
230   const uint8_t* buffer = input_image.data();
231   if (input_image.size() == 0) {
232     buffer = nullptr;  // Triggers full frame concealment.
233   }
234   // During decode libvpx may get and release buffers from
235   // `libvpx_buffer_pool_`. In practice libvpx keeps a few (~3-4) buffers alive
236   // at a time.
237   if (vpx_codec_decode(decoder_, buffer,
238                        static_cast<unsigned int>(input_image.size()), 0,
239                        VPX_DL_REALTIME)) {
240     return WEBRTC_VIDEO_CODEC_ERROR;
241   }
242   // `img->fb_priv` contains the image data, a reference counted Vp9FrameBuffer.
243   // It may be released by libvpx during future vpx_codec_decode or
244   // vpx_codec_destroy calls.
245   img = vpx_codec_get_frame(decoder_, &iter);
246   int qp;
247   vpx_codec_err_t vpx_ret =
248       vpx_codec_control(decoder_, VPXD_GET_LAST_QUANTIZER, &qp);
249   RTC_DCHECK_EQ(vpx_ret, VPX_CODEC_OK);
250   int ret =
251       ReturnFrame(img, input_image.Timestamp(), qp, input_image.ColorSpace());
252   if (ret != 0) {
253     return ret;
254   }
255   return WEBRTC_VIDEO_CODEC_OK;
256 }
257 
ReturnFrame(const vpx_image_t * img,uint32_t timestamp,int qp,const webrtc::ColorSpace * explicit_color_space)258 int LibvpxVp9Decoder::ReturnFrame(
259     const vpx_image_t* img,
260     uint32_t timestamp,
261     int qp,
262     const webrtc::ColorSpace* explicit_color_space) {
263   if (img == nullptr) {
264     // Decoder OK and nullptr image => No show frame.
265     return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
266   }
267 
268   // This buffer contains all of `img`'s image data, a reference counted
269   // Vp9FrameBuffer. (libvpx is done with the buffers after a few
270   // vpx_codec_decode calls or vpx_codec_destroy).
271   rtc::scoped_refptr<Vp9FrameBufferPool::Vp9FrameBuffer> img_buffer(
272       static_cast<Vp9FrameBufferPool::Vp9FrameBuffer*>(img->fb_priv));
273 
274   // The buffer can be used directly by the VideoFrame (without copy) by
275   // using a Wrapped*Buffer.
276   rtc::scoped_refptr<VideoFrameBuffer> img_wrapped_buffer;
277   switch (img->fmt) {
278     case VPX_IMG_FMT_I420:
279       img_wrapped_buffer = WrapI420Buffer(
280           img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
281           img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
282           img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
283           img->stride[VPX_PLANE_V],
284           // WrappedI420Buffer's mechanism for allowing the release of its
285           // frame buffer is through a callback function. This is where we
286           // should release `img_buffer`.
287           [img_buffer] {});
288       break;
289     case VPX_IMG_FMT_I422:
290       img_wrapped_buffer = WrapI422Buffer(
291           img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
292           img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
293           img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
294           img->stride[VPX_PLANE_V],
295           // WrappedI444Buffer's mechanism for allowing the release of its
296           // frame buffer is through a callback function. This is where we
297           // should release `img_buffer`.
298           [img_buffer] {});
299       break;
300     case VPX_IMG_FMT_I444:
301       img_wrapped_buffer = WrapI444Buffer(
302           img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
303           img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
304           img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
305           img->stride[VPX_PLANE_V],
306           // WrappedI444Buffer's mechanism for allowing the release of its
307           // frame buffer is through a callback function. This is where we
308           // should release `img_buffer`.
309           [img_buffer] {});
310       break;
311     case VPX_IMG_FMT_I42016:
312       img_wrapped_buffer = WrapI010Buffer(
313           img->d_w, img->d_h,
314           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
315           img->stride[VPX_PLANE_Y] / 2,
316           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
317           img->stride[VPX_PLANE_U] / 2,
318           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
319           img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
320       break;
321     case VPX_IMG_FMT_I42216:
322       img_wrapped_buffer = WrapI210Buffer(
323           img->d_w, img->d_h,
324           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_Y]),
325           img->stride[VPX_PLANE_Y] / 2,
326           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_U]),
327           img->stride[VPX_PLANE_U] / 2,
328           reinterpret_cast<const uint16_t*>(img->planes[VPX_PLANE_V]),
329           img->stride[VPX_PLANE_V] / 2, [img_buffer] {});
330       break;
331     default:
332       RTC_LOG(LS_ERROR) << "Unsupported pixel format produced by the decoder: "
333                         << static_cast<int>(img->fmt);
334       return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
335   }
336 
337   auto builder = VideoFrame::Builder()
338                      .set_video_frame_buffer(img_wrapped_buffer)
339                      .set_timestamp_rtp(timestamp);
340   if (explicit_color_space) {
341     builder.set_color_space(*explicit_color_space);
342   } else {
343     builder.set_color_space(
344         ExtractVP9ColorSpace(img->cs, img->range, img->bit_depth));
345   }
346   VideoFrame decoded_image = builder.build();
347 
348   decode_complete_callback_->Decoded(decoded_image, absl::nullopt, qp);
349   return WEBRTC_VIDEO_CODEC_OK;
350 }
351 
RegisterDecodeCompleteCallback(DecodedImageCallback * callback)352 int LibvpxVp9Decoder::RegisterDecodeCompleteCallback(
353     DecodedImageCallback* callback) {
354   decode_complete_callback_ = callback;
355   return WEBRTC_VIDEO_CODEC_OK;
356 }
357 
Release()358 int LibvpxVp9Decoder::Release() {
359   int ret_val = WEBRTC_VIDEO_CODEC_OK;
360 
361   if (decoder_ != nullptr) {
362     if (inited_) {
363       // When a codec is destroyed libvpx will release any buffers of
364       // `libvpx_buffer_pool_` it is currently using.
365       if (vpx_codec_destroy(decoder_)) {
366         ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
367       }
368     }
369     delete decoder_;
370     decoder_ = nullptr;
371   }
372   // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
373   // still referenced externally are deleted once fully released, not returning
374   // to the pool.
375   libvpx_buffer_pool_.ClearPool();
376   inited_ = false;
377   return ret_val;
378 }
379 
GetDecoderInfo() const380 VideoDecoder::DecoderInfo LibvpxVp9Decoder::GetDecoderInfo() const {
381   DecoderInfo info;
382   info.implementation_name = "libvpx";
383   info.is_hardware_accelerated = false;
384   return info;
385 }
386 
ImplementationName() const387 const char* LibvpxVp9Decoder::ImplementationName() const {
388   return "libvpx";
389 }
390 
391 }  // namespace webrtc
392 
393 #endif  // RTC_ENABLE_VP9
394