xref: /aosp_15_r20/external/virtio-media/extras/ffmpeg-decoder/src/lib.rs (revision 1b4853f54772485c5dd4001ae33a7a958bcc97a1)
1 // Copyright 2024 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 mod event_queue;
6 pub mod ffmpeg;
7 
8 use std::collections::BTreeMap;
9 use std::collections::VecDeque;
10 use std::io::Read;
11 use std::io::Seek;
12 use std::io::SeekFrom;
13 use std::os::fd::AsFd;
14 use std::os::fd::BorrowedFd;
15 
16 use enumn::N;
17 use event_queue::EventQueue;
18 use ffmpeg::avcodec::AvBuffer;
19 use ffmpeg::avcodec::AvBufferSource;
20 use ffmpeg::avcodec::AvCodec;
21 use ffmpeg::avcodec::AvCodecContext;
22 use ffmpeg::avcodec::AvCodecIterator;
23 use ffmpeg::avcodec::AvCodecOpenError;
24 use ffmpeg::avcodec::AvError;
25 use ffmpeg::avcodec::AvFrame;
26 use ffmpeg::avcodec::AvFrameError;
27 use ffmpeg::avcodec::AvPacket;
28 use ffmpeg::avcodec::AvPixelFormat;
29 use ffmpeg::avcodec::Dimensions;
30 use ffmpeg::avcodec::PlaneDescriptor;
31 use ffmpeg::avcodec::TryReceiveResult;
32 use ffmpeg::avcodec::AV_PIXEL_FORMAT_NV12;
33 use ffmpeg::avcodec::AV_PIXEL_FORMAT_YUV420P;
34 use ffmpeg::swscale::ConversionError;
35 use ffmpeg::swscale::SwConverter;
36 use ffmpeg::swscale::SwConverterCreationError;
37 use ffmpeg::AVERROR_EOF;
38 use ffmpeg::AVERROR_INVALIDDATA;
39 use nix::errno::Errno;
40 use thiserror::Error as ThisError;
41 use virtio_media::devices::video_decoder::StreamParams;
42 use virtio_media::devices::video_decoder::VideoDecoderBackend;
43 use virtio_media::devices::video_decoder::VideoDecoderBackendEvent;
44 use virtio_media::devices::video_decoder::VideoDecoderBackendSession;
45 use virtio_media::devices::video_decoder::VideoDecoderBufferBacking;
46 use virtio_media::devices::video_decoder::VideoDecoderSession;
47 use virtio_media::ioctl::IoctlResult;
48 use virtio_media::memfd::MemFdBuffer;
49 use virtio_media::memfd::MemFdMapping;
50 use virtio_media::v4l2r;
51 use virtio_media::v4l2r::bindings;
52 use virtio_media::v4l2r::ioctl::V4l2MplaneFormat;
53 use virtio_media::v4l2r::PixelFormat;
54 use virtio_media::v4l2r::QueueClass;
55 use virtio_media::v4l2r::QueueDirection;
56 use virtio_media::v4l2r::QueueType;
57 use virtio_media::v4l2r::Rect;
58 
59 use crate::ffmpeg::AV_CODEC_CAP_DR1;
60 
61 type BufferPlanesFmt = [bindings::v4l2_plane_pix_format; bindings::VIDEO_MAX_PLANES as usize];
62 
63 impl AvBufferSource for MemFdMapping {
as_ptr(&self) -> *const u864     fn as_ptr(&self) -> *const u8 {
65         self.as_ref().as_ptr()
66     }
67 
as_mut_ptr(&mut self) -> *mut u868     fn as_mut_ptr(&mut self) -> *mut u8 {
69         self.as_mut().as_mut_ptr()
70     }
71 
len(&self) -> usize72     fn len(&self) -> usize {
73         self.size()
74     }
75 }
76 
77 pub struct FfmpegDecoderBuffer {
78     // Plane backing memory, for MMAP buffers only.
79     fds: Vec<MemFdBuffer>,
80 }
81 
82 // TODO: technically this is a Mmap backing? For other buffer types we provide the backing
83 // externally...
84 impl VideoDecoderBufferBacking for FfmpegDecoderBuffer {
new(_queue: QueueType, _index: u32, sizes: &[usize]) -> IoctlResult<Self> where Self: Sized,85     fn new(_queue: QueueType, _index: u32, sizes: &[usize]) -> IoctlResult<Self>
86     where
87         Self: Sized,
88     {
89         let fds = sizes
90             .iter()
91             .map(|size| MemFdBuffer::new(*size as u64))
92             .collect::<Result<_, _>>()
93             .map_err(|_| libc::ENOMEM)?;
94 
95         Ok(Self { fds })
96     }
97 
fd_for_plane(&self, plane_idx: usize) -> Option<BorrowedFd>98     fn fd_for_plane(&self, plane_idx: usize) -> Option<BorrowedFd> {
99         self.fds.get(plane_idx).map(|memfd| memfd.as_file().as_fd())
100     }
101 }
102 
103 #[derive(Debug, Default, PartialOrd, Ord, PartialEq, Eq, Clone, Copy, N)]
104 #[repr(u32)]
105 pub enum OutputFormat {
106     #[default]
107     H264 = PixelFormat::from_fourcc(b"H264").to_u32(),
108     VP8 = PixelFormat::from_fourcc(b"VP80").to_u32(),
109     VP9 = PixelFormat::from_fourcc(b"VP90").to_u32(),
110     HEVC = PixelFormat::from_fourcc(b"HEVC").to_u32(),
111 }
112 
113 impl OutputFormat {
into_v4l2_pix_format(self, coded_size: (u32, u32)) -> bindings::v4l2_pix_format_mplane114     fn into_v4l2_pix_format(self, coded_size: (u32, u32)) -> bindings::v4l2_pix_format_mplane {
115         // TODO: use `coded_size` to infer a reasonable size?
116         const INPUT_SIZEIMAGE: u32 = 1024 * 1024;
117 
118         let mut plane_fmt: BufferPlanesFmt = Default::default();
119         plane_fmt[0] = bindings::v4l2_plane_pix_format {
120             bytesperline: 0,
121             sizeimage: INPUT_SIZEIMAGE,
122             reserved: Default::default(),
123         };
124 
125         bindings::v4l2_pix_format_mplane {
126             width: coded_size.0,
127             height: coded_size.1,
128             pixelformat: self as u32,
129             plane_fmt,
130             num_planes: 1,
131             ..format_filler()
132         }
133     }
134 }
135 
136 #[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Clone, Copy, N)]
137 #[repr(u32)]
138 pub enum CaptureFormat {
139     NV12 = PixelFormat::from_fourcc(b"NV12").to_u32(),
140 }
141 
142 impl From<CaptureFormat> for AvPixelFormat {
from(format: CaptureFormat) -> Self143     fn from(format: CaptureFormat) -> Self {
144         AvPixelFormat(match format {
145             CaptureFormat::NV12 => AV_PIXEL_FORMAT_NV12.into(),
146         })
147     }
148 }
149 
150 impl CaptureFormat {
into_v4l2_pix_format(self, coded_size: (u32, u32)) -> bindings::v4l2_pix_format_mplane151     fn into_v4l2_pix_format(self, coded_size: (u32, u32)) -> bindings::v4l2_pix_format_mplane {
152         let mut plane_fmt: BufferPlanesFmt = Default::default();
153         let av_format = AvPixelFormat::from(self);
154 
155         let num_planes = match self {
156             CaptureFormat::NV12 => {
157                 let plane = &mut plane_fmt[0];
158                 let line_size = av_format.line_size(coded_size.0, 0) as u32;
159                 plane.bytesperline = line_size;
160                 plane.sizeimage = av_format
161                     .plane_sizes([line_size, line_size], coded_size.1)
162                     .into_iter()
163                     .sum::<usize>() as u32;
164                 1
165             }
166         };
167 
168         bindings::v4l2_pix_format_mplane {
169             width: coded_size.0,
170             height: coded_size.1,
171             pixelformat: self as u32,
172             plane_fmt,
173             num_planes,
174             ..format_filler()
175         }
176     }
177 }
178 
179 enum FfmpegDecoderJob {
180     Decode {
181         /// Ffmpeg packet containing the input data.
182         ///
183         /// TODO: we can probably avoid the copy by keeping the input, mapping it, and using the
184         /// mapping as the source of the AvPacket?
185         packet: AvPacket<'static>,
186         /// Index of the input buffer from which the input was received.
187         input_index: u32,
188     },
189     Drain,
190 }
191 
192 /// State for a session that is actively decoding.
193 struct DecodingContext {
194     /// FIFO of input buffers waiting to be submitted.
195     jobs: VecDeque<FfmpegDecoderJob>,
196     /// Decoder context, dependent on the input format.
197     av_context: AvCodecContext,
198     /// Converter from the current AVCodec output format to the format expected by the client.
199     converter: SwConverter,
200     /// Set when
201     accepting_output_buffers: bool,
202     /// Latest `AvFrame` received from ffmpeg.
203     avframe: Option<AvFrame>,
204     /// Whether the context is currently draining.
205     drain_state: DrainState,
206 }
207 
208 #[derive(Debug, ThisError)]
209 pub enum NewDecodingContextError {
210     #[error("cannot create decoder: {0}")]
211     DecoderCreation(#[from] AvCodecOpenError),
212     #[error("cannot create sw decoder: {0}")]
213     SwConverter(#[from] SwConverterCreationError),
214 }
215 
216 impl DecodingContext {
217     /// Build a new decoding context for `codec`.
new( codec: AvCodec, output_format: CaptureFormat, coded_size: (u32, u32), ) -> Result<Self, NewDecodingContextError>218     fn new(
219         codec: AvCodec,
220         output_format: CaptureFormat,
221         coded_size: (u32, u32),
222     ) -> Result<Self, NewDecodingContextError> {
223         let av_context = codec.build_decoder().and_then(|b| {
224             b.set_initial_format(coded_size, AV_PIXEL_FORMAT_YUV420P);
225             b.build()
226         })?;
227 
228         let converter = Self::create_converter_from_context(&av_context, output_format)?;
229 
230         Ok(DecodingContext {
231             jobs: Default::default(),
232             av_context,
233             // We accept CAPTURE buffers tentatively, as the client might know the stream format.
234             accepting_output_buffers: true,
235             converter,
236             avframe: None,
237             drain_state: DrainState::None,
238         })
239     }
240 
create_converter_from_context( av_context: &AvCodecContext, output_format: CaptureFormat, ) -> Result<SwConverter, SwConverterCreationError>241     fn create_converter_from_context(
242         av_context: &AvCodecContext,
243         output_format: CaptureFormat,
244     ) -> Result<SwConverter, SwConverterCreationError> {
245         let avcontext = av_context.as_ref();
246         let dst_pix_fmt: AvPixelFormat = output_format.into();
247         log::info!(
248             "creating SW converter from {}x{} {} to {:?}",
249             avcontext.width,
250             avcontext.height,
251             avcontext.pix_fmt,
252             dst_pix_fmt
253         );
254 
255         SwConverter::new(
256             avcontext.width as usize,
257             avcontext.height as usize,
258             avcontext.pix_fmt,
259             dst_pix_fmt.0,
260         )
261     }
262 
263     /// Recreate the frame converter for this context. This should be called whenever the stream
264     /// format changes.
update_converter( &mut self, output_format: CaptureFormat, ) -> Result<(), SwConverterCreationError>265     fn update_converter(
266         &mut self,
267         output_format: CaptureFormat,
268     ) -> Result<(), SwConverterCreationError> {
269         self.converter = Self::create_converter_from_context(&self.av_context, output_format)?;
270 
271         Ok(())
272     }
273 }
274 
275 /// An output frame ready to be decoded into.
276 struct AvailableOutputFrame {
277     /// V4L2 buffer index for this frame.
278     index: u32,
279     /// CPU mappings for all the planes.
280     planes: Vec<MemFdMapping>,
281 }
282 
283 #[derive(Debug, PartialEq, Eq)]
284 enum DrainState {
285     /// No drain at the moment.
286     None,
287     /// Drain has started, we are waiting for all input to be processed.
288     Initiated,
289     /// Ffmpeg has been flushed, we are waiting for a frame to signal with the LAST flag.
290     AwaitingFinalFrame,
291 }
292 
293 pub struct FfmpegDecoderSession {
294     /// Input format currently exposed to the client. This can be changed until the first buffer is
295     /// queued on the OUTPUT queue.
296     input_format: (OutputFormat, AvCodec),
297     /// Output format currently exposed to the client.
298     output_format: CaptureFormat,
299     /// Coded size set for CAPTURE buffers. Can be larger than the one reported in `stream_params`.
300     coded_size: (u32, u32),
301     /// TODO: actually we should be able to change the stream's coded size by setting the OUTPUT
302     /// resolution. This would adjust the CAPTURE resolution too, and trigger a DRC event if the
303     /// format is not large enough when the next input buffer is submitted.
304     stream_params: StreamParams,
305 
306     /// Initialize once the input codec has been determined.
307     context: Option<DecodingContext>,
308 
309     /// FIFO of output frames we can decode into.
310     available_output_frames: VecDeque<AvailableOutputFrame>,
311 
312     /// FIFO of decoder events waiting to be dequeued.
313     events: EventQueue<VideoDecoderBackendEvent>,
314 }
315 
316 #[derive(Debug, ThisError)]
317 enum TrySendInputError {
318     #[error("decoder context has not been created yet")]
319     NoContext,
320     #[error("error while sending input packet to libavcodec: {0}")]
321     AvError(#[from] AvError),
322     #[error("error while queueing input buffer done event: {0}")]
323     EventQueue(Errno),
324 }
325 
326 #[derive(Debug, ThisError)]
327 enum TryReceiveFrameError {
328     #[error("decoder context has not been created yet")]
329     // TODO: get the context in a caller method so we can deduplicate? Or better, set the context
330     // as part of the state of the decoder?
331     NoContext,
332     #[error("cannot create AvFrame")]
333     CannotCreateAvFrame(#[from] AvFrameError),
334     #[error("decoding error: {0}")]
335     DecodingError(AvError),
336     #[error("error while queueing input completed event: {0}")]
337     EventQueue(Errno),
338     #[error("error while creating SW converter: {0}")]
339     SwConverter(#[from] SwConverterCreationError),
340     #[error("drain operation failed")]
341     DrainFailed,
342 }
343 
344 #[derive(Debug, ThisError)]
345 enum TryOutputFrameError {
346     #[error("decoder context has not been created yet")]
347     NoContext,
348     #[error("error while creating output AvFrame")]
349     AvFrame(#[from] AvFrameError),
350     #[error("error while queueing frame decoded event: {0}")]
351     EventQueue(Errno),
352     #[error("not enough planes in target frame")]
353     NotEnoughPlanes,
354     #[error("error while building AvFrame: {0}")]
355     CannotBuild(AvFrameError),
356     #[error("error while converting frame: {0}")]
357     ConversionError(ConversionError),
358 }
359 
360 #[derive(Debug, ThisError)]
361 enum TryDecodeError {
362     #[error("error while sending input: {0}")]
363     SendInput(#[from] TrySendInputError),
364     #[error("error while receiving frame: {0}")]
365     ReceiveFrame(#[from] TryReceiveFrameError),
366     #[error("error while outputing decoded frame: {0}")]
367     OutputFrame(#[from] TryOutputFrameError),
368 }
369 
370 impl FfmpegDecoderSession {
371     /// Try to run the next input job, if any.
372     ///
373     /// Returns `true` if the next job has been submitted, `false` if it could not be, either
374     /// because all pending work has already been queued or because the codec could not accept more
375     /// input at the moment.
try_send_input_job(&mut self) -> Result<bool, TrySendInputError>376     fn try_send_input_job(&mut self) -> Result<bool, TrySendInputError> {
377         let context = self.context.as_mut().ok_or(TrySendInputError::NoContext)?;
378 
379         let next_job = match context.jobs.pop_front() {
380             None => return Ok(false),
381             Some(job) => job,
382         };
383 
384         match &next_job {
385             FfmpegDecoderJob::Decode {
386                 packet,
387                 input_index,
388             } => {
389                 let input_consumed = match context.av_context.try_send_packet(packet) {
390                     Ok(res) => Ok(res),
391                     // This could happen if we attempt to submit data while flushing.
392                     Err(AvError(AVERROR_EOF)) => Ok(false),
393                     // If we got invalid data, keep going in hope that we will catch a valid state later.
394                     Err(AvError(AVERROR_INVALIDDATA)) => {
395                         log::warn!("try_send_input: invalid data in stream, ignoring...");
396                         Ok(true)
397                     }
398                     Err(e) => Err(TrySendInputError::from(e)),
399                 }?;
400 
401                 // If the input job has been rejected, push it back. Otherwise, signal the input buffer can
402                 // be reused.
403                 match input_consumed {
404                     false => context.jobs.push_front(next_job),
405                     true => self
406                         .events
407                         .queue_event(VideoDecoderBackendEvent::InputBufferDone(*input_index))
408                         .map_err(TrySendInputError::EventQueue)?,
409                 }
410 
411                 Ok(input_consumed)
412             }
413             FfmpegDecoderJob::Drain => {
414                 log::debug!("drain initiated");
415                 // Just set the state as draining for now. We will send the actual flush command
416                 // when `try_receive_frame` returns `TryAgain`. This should probably not be
417                 // necessary but we sometimes miss the last frame if we send the flush command to
418                 // libavcodec earlier (which looks like a bug with libavcodec but needs to be
419                 // confirmed).
420                 context.drain_state = DrainState::Initiated;
421                 Ok(true)
422             }
423         }
424     }
425 
426     /// Try to receive a frame from the context and return it if it worked.
try_receive_frame(&mut self) -> Result<bool, TryReceiveFrameError>427     fn try_receive_frame(&mut self) -> Result<bool, TryReceiveFrameError> {
428         let context = self
429             .context
430             .as_mut()
431             .ok_or(TryReceiveFrameError::NoContext)?;
432         let mut avframe = match context.avframe {
433             // We already have a frame waiting. Wait until it is sent to process the next one.
434             Some(_) => return Ok(false),
435             None => AvFrame::new()?,
436         };
437 
438         match context.av_context.try_receive_frame(&mut avframe) {
439             Ok(TryReceiveResult::Received) => {
440                 // Now check whether the resolution of the stream has changed.
441                 let new_coded_size = (avframe.width as u32, avframe.height as u32);
442                 // TODO: incorrect! We need to discard these values.
443                 let new_visible_rect = v4l2r::Rect::new(
444                     avframe.crop_left as i32,
445                     avframe.crop_top as i32,
446                     (avframe.crop_right - avframe.crop_left) as u32,
447                     (avframe.crop_bottom - avframe.crop_top) as u32,
448                 );
449 
450                 if new_coded_size != self.stream_params.coded_size
451                     || new_visible_rect != self.stream_params.visible_rect
452                 {
453                     log::info!(
454                         "new resolution detected in stream: {:?} -> {:?}",
455                         self.stream_params.coded_size,
456                         new_coded_size
457                     );
458                     self.stream_params.coded_size = new_coded_size;
459                     self.stream_params.visible_rect = new_visible_rect;
460                     // Reset adjustable coded size if the new stream cannot fit into the current
461                     // buffers.
462                     if new_coded_size.0 > self.coded_size.0 || new_coded_size.1 > self.coded_size.1
463                     {
464                         self.coded_size = new_coded_size;
465                     }
466 
467                     context.update_converter(self.output_format)?;
468 
469                     // TODO: change decoding state to awaiting buffers and reject output buffers
470                     // until the format has been confirmed somehow? IOW we need the decoder to
471                     // confirm it has acknowledged our format change before we can accept new
472                     // buffers.
473                     //
474                     // TODO: 07/23: decoder state, check how the crosvm decoder adapter handles
475                     // resolution change?
476 
477                     self.available_output_frames.clear();
478                     context.accepting_output_buffers = false;
479 
480                     self.events
481                         .queue_event(VideoDecoderBackendEvent::StreamFormatChanged)
482                         .map_err(TryReceiveFrameError::EventQueue)?;
483                 }
484 
485                 context.avframe = Some(avframe);
486 
487                 Ok(true)
488             }
489             Ok(TryReceiveResult::TryAgain) => {
490                 // Start flushing. `try_receive_frame` will return `FlushCompleted` when the
491                 // flush is completed. `TryAgain` will not be returned again until the flush is
492                 // completed.
493                 if context.drain_state == DrainState::Initiated {
494                     match context.av_context.flush_decoder() {
495                         // Call ourselves again so we can process the flush.
496                         Ok(()) => self.try_receive_frame(),
497                         Err(_) => {
498                             context.drain_state = DrainState::None;
499                             Err(TryReceiveFrameError::DrainFailed)
500                         }
501                     }
502                 } else {
503                     Ok(false)
504                 }
505             }
506             Ok(TryReceiveResult::FlushCompleted) => {
507                 if context.drain_state == DrainState::Initiated {
508                     log::debug!(
509                         "decoder drain completed ; waiting for frame to send with the LAST flag"
510                     );
511                     context.drain_state = DrainState::AwaitingFinalFrame;
512                     Ok(true)
513                 } else {
514                     Ok(false)
515                 }
516             }
517             // If we got invalid data, keep going in hope that we will catch a valid state later.
518             Err(AvError(AVERROR_INVALIDDATA)) => {
519                 log::warn!("try_receive_frame: invalid data in stream, ignoring...");
520                 Ok(true)
521             }
522             Err(av_err) => Err(TryReceiveFrameError::DecodingError(av_err)),
523         }
524     }
525 
526     /// Try to output the currently decoded frame in [`DecodingContext::avframe`] into a client's output
527     /// buffer.
try_output_frame(&mut self) -> Result<bool, TryOutputFrameError>528     fn try_output_frame(&mut self) -> Result<bool, TryOutputFrameError> {
529         let context = self
530             .context
531             .as_mut()
532             .ok_or(TryOutputFrameError::NoContext)?;
533         let mut output_frame = match self.available_output_frames.pop_front() {
534             Some(output_frame) => output_frame,
535             None => return Ok(false),
536         };
537 
538         // Special case: if we are at the end of draining, send an empty frame with the LAST flag
539         // set.
540         if context.drain_state == DrainState::AwaitingFinalFrame {
541             // ... but only if all the pending frames have been outputted.
542             if context.avframe.is_some() {
543                 self.available_output_frames.push_front(output_frame);
544                 return Ok(false);
545             }
546 
547             log::debug!("sending frame with LAST flag to signal end of drain");
548             context.drain_state = DrainState::None;
549 
550             self.events
551                 .queue_event(VideoDecoderBackendEvent::FrameCompleted {
552                     buffer_id: output_frame.index,
553                     timestamp: bindings::timeval {
554                         tv_sec: 0,
555                         tv_usec: 0,
556                     },
557                     bytes_used: vec![],
558                     is_last: true,
559                 })
560                 .map_err(TryOutputFrameError::EventQueue)?;
561 
562             return Ok(true);
563         }
564 
565         let avframe = match context.avframe.take() {
566             Some(avframe) => avframe,
567             None => {
568                 self.available_output_frames.push_front(output_frame);
569                 return Ok(false);
570             }
571         };
572 
573         let av_format: AvPixelFormat = self.output_format.into();
574         let bytes_used = av_format.plane_sizes(
575             // TODO: this works for NV12, but not for other formats...
576             [self.coded_size.0, self.coded_size.0],
577             self.coded_size.1,
578         );
579         // Build an AvFrame for the output frame.
580         // TODO: we need to handle stride, and more complex output frame formats.
581         let mut dst_avframe = {
582             let mut builder = AvFrame::builder()?;
583             builder.set_dimensions(Dimensions {
584                 width: self.coded_size.0,
585                 height: self.coded_size.1,
586             })?;
587             builder.set_format(av_format)?;
588 
589             let planes = [
590                 PlaneDescriptor {
591                     buffer_index: 0,
592                     offset: 0,
593                     stride: av_format.line_size(self.coded_size.0, 0),
594                 },
595                 PlaneDescriptor {
596                     buffer_index: 0,
597                     offset: bytes_used[0],
598                     stride: av_format.line_size(self.coded_size.0, 1),
599                 },
600             ];
601 
602             let av_buffer = AvBuffer::new(output_frame.planes.remove(0))
603                 .ok_or(TryOutputFrameError::NotEnoughPlanes)?;
604             builder
605                 .build_owned([av_buffer], planes)
606                 .map_err(TryOutputFrameError::CannotBuild)?
607         };
608 
609         context
610             .converter
611             .convert(&avframe, &mut dst_avframe)
612             .map_err(TryOutputFrameError::ConversionError)?;
613 
614         let timestamp = bindings::timeval {
615             tv_sec: avframe.pts / 1_000_000,
616             tv_usec: avframe.pts % 1_000_000,
617         };
618 
619         self.events
620             .queue_event(VideoDecoderBackendEvent::FrameCompleted {
621                 buffer_id: output_frame.index,
622                 timestamp,
623                 bytes_used: vec![bytes_used.iter().sum::<usize>() as u32],
624                 is_last: false,
625             })
626             .map_err(TryOutputFrameError::EventQueue)?;
627 
628         Ok(true)
629     }
630 
631     /// Try to make progress with decoding.
try_decode(&mut self) -> Result<(), TryDecodeError>632     fn try_decode(&mut self) -> Result<(), TryDecodeError> {
633         if self.context.is_none() {
634             return Ok(());
635         }
636 
637         while self.try_output_frame()? || self.try_receive_frame()? || self.try_send_input_job()? {}
638 
639         Ok(())
640     }
641 }
642 
643 impl VideoDecoderBackendSession for FfmpegDecoderSession {
644     type BufferStorage = FfmpegDecoderBuffer;
645 
decode( &mut self, input: &Self::BufferStorage, index: u32, timestamp: bindings::timeval, bytes_used: u32, ) -> IoctlResult<()>646     fn decode(
647         &mut self,
648         input: &Self::BufferStorage,
649         index: u32,
650         timestamp: bindings::timeval,
651         bytes_used: u32,
652     ) -> IoctlResult<()> {
653         // The input format is decided at the time the first input buffer is queued, so this is
654         // when we create our context.
655         // Ensure we are in decoding state, and switch to it if we aren't.
656         let context = match &mut self.context {
657             Some(context) => context,
658             None => {
659                 let codec = self.input_format.1;
660 
661                 let context =
662                     DecodingContext::new(codec, self.output_format, self.stream_params.coded_size)
663                         .map_err(|_| libc::ENODEV)?;
664 
665                 let avcontext = context.av_context.as_ref();
666                 log::info!(
667                     "starting decoding {} at resolution {}x{} (AVContext pix_fmt {}) for output format {:?}",
668                     codec.name(),
669                     avcontext.width,
670                     avcontext.height,
671                     avcontext.pix_fmt,
672                     self.output_format
673                 );
674 
675                 self.context.get_or_insert(context)
676             }
677         };
678 
679         #[allow(clippy::unnecessary_cast)]
680         let timestamp =
681             (timestamp.tv_sec as i64).wrapping_mul(1_000_000) + (timestamp.tv_usec as i64);
682 
683         let mut input_data = vec![0u8; bytes_used as usize];
684         let mut f = input.fds.first().ok_or(libc::EINVAL)?.as_file();
685         f.seek(SeekFrom::Start(0)).map_err(|_| libc::EIO)?;
686         f.read_exact(&mut input_data).map_err(|_| libc::EIO)?;
687 
688         let avbuffer = AvBuffer::new(input_data).ok_or(libc::ENOMEM)?;
689         let avpacket = AvPacket::new_owned(timestamp, avbuffer);
690 
691         context.jobs.push_back(FfmpegDecoderJob::Decode {
692             packet: avpacket,
693             input_index: index,
694         });
695 
696         self.try_decode().map_err(|e| {
697             log::warn!("while decoding: {:#}", e);
698             libc::EINVAL
699         })
700     }
701 
use_as_output(&mut self, index: u32, backing: &mut Self::BufferStorage) -> IoctlResult<()>702     fn use_as_output(&mut self, index: u32, backing: &mut Self::BufferStorage) -> IoctlResult<()> {
703         // Silently ignore buffers if we are not ready to accept them yet.
704         if !self
705             .context
706             .as_ref()
707             .map(|c| c.accepting_output_buffers)
708             .unwrap_or(true)
709         {
710             return Ok(());
711         }
712 
713         let planes = backing
714             .fds
715             .iter()
716             .map(|fd| fd.mmap())
717             .collect::<Result<_, _>>()
718             .map_err(|_| libc::ENOMEM)?;
719 
720         self.available_output_frames
721             .push_back(AvailableOutputFrame { index, planes });
722 
723         Ok(())
724     }
725 
drain(&mut self) -> IoctlResult<()>726     fn drain(&mut self) -> IoctlResult<()> {
727         let context = match &mut self.context {
728             Some(context) => context,
729             // If the decoder is not ready, the drain command should succeed but no action shall be
730             // taken.
731             None => return Ok(()),
732         };
733 
734         log::debug!("enqueuing drain request");
735         context.jobs.push_back(FfmpegDecoderJob::Drain);
736         self.try_decode().map_err(|e| {
737             log::warn!("while draining: {:#}", e);
738             libc::EINVAL
739         })
740     }
741 
clear_output_buffers(&mut self) -> IoctlResult<()>742     fn clear_output_buffers(&mut self) -> IoctlResult<()> {
743         self.available_output_frames.clear();
744         self.events
745             .retain(|event| !matches!(event, VideoDecoderBackendEvent::FrameCompleted { .. }));
746         // We keep `self.context.avframe` as it is likely a DRC frame waiting for its new buffers.
747 
748         Ok(())
749     }
750 
next_event(&mut self) -> Option<VideoDecoderBackendEvent>751     fn next_event(&mut self) -> Option<VideoDecoderBackendEvent> {
752         self.events.dequeue_event()
753     }
754 
poll_fd(&self) -> Option<BorrowedFd>755     fn poll_fd(&self) -> Option<BorrowedFd> {
756         Some(self.events.as_fd())
757     }
758 
current_format(&self, direction: QueueDirection) -> V4l2MplaneFormat759     fn current_format(&self, direction: QueueDirection) -> V4l2MplaneFormat {
760         match direction {
761             QueueDirection::Output => {
762                 let pix_mp = self
763                     .input_format
764                     .0
765                     .into_v4l2_pix_format(self.stream_params.coded_size);
766 
767                 V4l2MplaneFormat::from((direction, pix_mp))
768             }
769             QueueDirection::Capture => {
770                 let pix_mp = self.output_format.into_v4l2_pix_format(self.coded_size);
771 
772                 V4l2MplaneFormat::from((direction, pix_mp))
773             }
774         }
775     }
776 
stream_params(&self) -> StreamParams777     fn stream_params(&self) -> StreamParams {
778         self.stream_params.clone()
779     }
780 
streaming_state(&mut self, direction: QueueDirection, streaming: bool)781     fn streaming_state(&mut self, direction: QueueDirection, streaming: bool) {
782         if direction == QueueDirection::Capture && streaming {
783             if let Some(context) = &mut self.context {
784                 context.accepting_output_buffers = true;
785             }
786         }
787     }
788 }
789 
790 pub struct FfmpegDecoder {
791     codecs: BTreeMap<OutputFormat, AvCodec>,
792 }
793 
794 impl FfmpegDecoder {
795     /// Create a new ffmpeg decoder backend instance.
796     #[allow(clippy::new_without_default)]
new() -> Self797     pub fn new() -> Self {
798         // Find all the decoders supported by libav and store them.
799         let codecs = AvCodecIterator::new()
800             .filter_map(|codec| {
801                 if !codec.is_decoder() {
802                     return None;
803                 }
804 
805                 let codec_name = codec.name();
806 
807                 // Only keep processing the decoders we are interested in.
808                 let format = match codec_name {
809                     "h264" => OutputFormat::H264,
810                     "hevc" => OutputFormat::HEVC,
811                     "vp8" => OutputFormat::VP8,
812                     "vp9" => OutputFormat::VP9,
813                     _ => return None,
814                 };
815 
816                 // We require custom buffer allocators, so ignore codecs that are not capable of
817                 // using them.
818                 if codec.capabilities() & AV_CODEC_CAP_DR1 == 0 {
819                     log::info!(
820                         "Skipping codec {} due to lack of DR1 capability.",
821                         codec_name
822                     );
823                     return None;
824                 }
825 
826                 Some((format, codec))
827             })
828             .collect();
829 
830         Self { codecs }
831     }
832 }
833 
834 const SUPPORTED_OUTPUT_FORMATS: [CaptureFormat; 1] = [CaptureFormat::NV12];
835 
836 /// Returns a format with its invariant fields filled as expected.
format_filler() -> bindings::v4l2_pix_format_mplane837 fn format_filler() -> bindings::v4l2_pix_format_mplane {
838     bindings::v4l2_pix_format_mplane {
839         field: bindings::v4l2_field_V4L2_FIELD_NONE,
840         flags: 0,
841         colorspace: bindings::v4l2_colorspace_V4L2_COLORSPACE_DEFAULT,
842         __bindgen_anon_1: bindings::v4l2_pix_format_mplane__bindgen_ty_1 {
843             ycbcr_enc: bindings::v4l2_ycbcr_encoding_V4L2_YCBCR_ENC_DEFAULT as u8,
844         },
845         quantization: bindings::v4l2_quantization_V4L2_QUANTIZATION_DEFAULT as u8,
846         xfer_func: bindings::v4l2_xfer_func_V4L2_XFER_FUNC_DEFAULT as u8,
847         ..Default::default()
848     }
849 }
850 
851 impl VideoDecoderBackend for FfmpegDecoder {
852     type Session = FfmpegDecoderSession;
853 
new_session(&mut self, _id: u32) -> IoctlResult<Self::Session>854     fn new_session(&mut self, _id: u32) -> IoctlResult<Self::Session> {
855         const DEFAULT_CODED_SIZE: (u32, u32) = (320, 240);
856 
857         let input_format = self
858             .codecs
859             .iter()
860             .map(|(k, v)| (*k, *v))
861             .next()
862             .ok_or(libc::ENODEV)?;
863 
864         Ok(FfmpegDecoderSession {
865             input_format,
866             output_format: SUPPORTED_OUTPUT_FORMATS
867                 .iter()
868                 .copied()
869                 .next()
870                 .unwrap_or(CaptureFormat::NV12),
871             context: None,
872             coded_size: DEFAULT_CODED_SIZE,
873             stream_params: StreamParams {
874                 min_output_buffers: 4,
875                 coded_size: DEFAULT_CODED_SIZE,
876                 visible_rect: Rect {
877                     left: 0,
878                     top: 0,
879                     width: DEFAULT_CODED_SIZE.0,
880                     height: DEFAULT_CODED_SIZE.1,
881                 },
882             },
883             available_output_frames: Default::default(),
884             events: EventQueue::new().map_err(|_| libc::EIO)?,
885         })
886     }
887 
close_session(&mut self, _session: Self::Session)888     fn close_session(&mut self, _session: Self::Session) {}
889 
enum_formats( &self, _session: &VideoDecoderSession<Self::Session>, direction: QueueDirection, index: u32, ) -> Option<bindings::v4l2_fmtdesc>890     fn enum_formats(
891         &self,
892         _session: &VideoDecoderSession<Self::Session>,
893         direction: QueueDirection,
894         index: u32,
895     ) -> Option<bindings::v4l2_fmtdesc> {
896         let pixelformat = match direction {
897             QueueDirection::Output => self.codecs.iter().map(|f| *f.0).nth(index as usize)? as u32,
898             QueueDirection::Capture => SUPPORTED_OUTPUT_FORMATS
899                 .iter()
900                 .copied()
901                 .nth(index as usize)? as u32,
902         };
903 
904         Some(bindings::v4l2_fmtdesc {
905             index,
906             type_: QueueType::from_dir_and_class(direction, QueueClass::VideoMplane) as u32,
907             pixelformat,
908             ..Default::default()
909         })
910     }
911 
frame_sizes(&self, pixel_format: u32) -> Option<bindings::v4l2_frmsize_stepwise>912     fn frame_sizes(&self, pixel_format: u32) -> Option<bindings::v4l2_frmsize_stepwise> {
913         // Only return a value for valid formats.
914         let _ = CaptureFormat::n(pixel_format)?;
915 
916         Some(bindings::v4l2_frmsize_stepwise {
917             min_width: 32,
918             max_width: 4096,
919             step_width: 1,
920             min_height: 32,
921             max_height: 4096,
922             step_height: 1,
923         })
924     }
925 
adjust_format( &self, session: &Self::Session, direction: QueueDirection, format: V4l2MplaneFormat, ) -> V4l2MplaneFormat926     fn adjust_format(
927         &self,
928         session: &Self::Session,
929         direction: QueueDirection,
930         format: V4l2MplaneFormat,
931     ) -> V4l2MplaneFormat {
932         // Apply the requested pixel format or fall back to the current one.
933         let pix_mp = match direction {
934             QueueDirection::Output => {
935                 let pixelformat = OutputFormat::n(format.pixelformat().to_u32())
936                     .unwrap_or(session.input_format.0);
937 
938                 pixelformat.into_v4l2_pix_format(session.stream_params.coded_size)
939             }
940             QueueDirection::Capture => {
941                 let pixelformat = CaptureFormat::n(format.pixelformat().to_u32())
942                     .unwrap_or(session.output_format);
943 
944                 pixelformat.into_v4l2_pix_format(session.coded_size)
945             }
946         };
947 
948         V4l2MplaneFormat::from((direction, pix_mp))
949     }
950 
apply_format( &self, session: &mut Self::Session, direction: QueueDirection, format: &V4l2MplaneFormat, )951     fn apply_format(
952         &self,
953         session: &mut Self::Session,
954         direction: QueueDirection,
955         format: &V4l2MplaneFormat,
956     ) {
957         match direction {
958             QueueDirection::Output => {
959                 let format = match OutputFormat::n(format.pixelformat().to_u32()) {
960                     Some(format) => format,
961                     None => return,
962                 };
963                 let avcodec = match self.codecs.get(&format).copied() {
964                     Some(codec) => codec,
965                     None => return,
966                 };
967 
968                 session.input_format = (format, avcodec);
969             }
970             QueueDirection::Capture => {
971                 session.output_format = match CaptureFormat::n(format.pixelformat().to_u32()) {
972                     Some(format) => format,
973                     None => return,
974                 }
975             }
976         }
977     }
978 }
979