xref: /aosp_15_r20/external/crosvm/devices/src/virtio/video/decoder/backend/mod.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 //! This module implements the interface that actual decoder devices need to
6 //! implement in order to provide video decoding capability to the guest.
7 
8 use base::AsRawDescriptor;
9 
10 use crate::virtio::video::decoder::Capability;
11 use crate::virtio::video::error::VideoError;
12 use crate::virtio::video::error::VideoResult;
13 use crate::virtio::video::format::Format;
14 use crate::virtio::video::format::Rect;
15 use crate::virtio::video::resource::GuestResource;
16 use crate::virtio::video::resource::GuestResourceHandle;
17 
18 #[cfg(feature = "ffmpeg")]
19 pub mod ffmpeg;
20 
21 #[cfg(feature = "vaapi")]
22 pub mod vaapi;
23 #[cfg(feature = "libvda")]
24 pub mod vda;
25 
26 /// Contains the device's state for one playback session, i.e. one stream.
27 pub trait DecoderSession {
28     /// Tell how many output buffers will be used for this session and which format they will carry.
29     /// This method must be called after a `ProvidePictureBuffers` event is emitted, and before the
30     /// first call to `use_output_buffer()`.
set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>31     fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>;
32 
33     /// Decode the compressed stream contained in [`offset`..`offset`+`bytes_used`] of the shared
34     /// memory in the input `resource`.
35     ///
36     /// `resource_id` is the ID of the input resource. It will be signaled using the
37     /// `NotifyEndOfBitstreamBuffer` once the input resource is not used anymore.
38     ///
39     /// `timestamp` is a timestamp that will be copied into the frames decoded from that input
40     /// stream. Units are effectively free and provided by the input stream.
41     ///
42     /// The device takes ownership of `resource` and is responsible for closing it once it is not
43     /// used anymore.
44     ///
45     /// The device will emit a `NotifyEndOfBitstreamBuffer` event with the `resource_id` value after
46     /// the input buffer has been entirely processed.
47     ///
48     /// The device will emit a `PictureReady` event with the `timestamp` value for each picture
49     /// produced from that input buffer.
decode( &mut self, resource_id: u32, timestamp: u64, resource: GuestResourceHandle, offset: u32, bytes_used: u32, ) -> VideoResult<()>50     fn decode(
51         &mut self,
52         resource_id: u32,
53         timestamp: u64,
54         resource: GuestResourceHandle,
55         offset: u32,
56         bytes_used: u32,
57     ) -> VideoResult<()>;
58 
59     /// Flush the decoder device, i.e. finish processing all queued decode requests and emit frames
60     /// for them.
61     ///
62     /// The device will emit a `FlushCompleted` event once the flush is done.
flush(&mut self) -> VideoResult<()>63     fn flush(&mut self) -> VideoResult<()>;
64 
65     /// Reset the decoder device, i.e. cancel all pending decoding requests.
66     ///
67     /// The device will emit a `ResetCompleted` event once the reset is done.
reset(&mut self) -> VideoResult<()>68     fn reset(&mut self) -> VideoResult<()>;
69 
70     /// Immediately release all buffers passed using `use_output_buffer()` and
71     /// `reuse_output_buffer()`.
clear_output_buffers(&mut self) -> VideoResult<()>72     fn clear_output_buffers(&mut self) -> VideoResult<()>;
73 
74     /// Returns the event pipe on which the availability of events will be signaled. Note that the
75     /// returned value is borrowed and only valid as long as the session is alive.
event_pipe(&self) -> &dyn AsRawDescriptor76     fn event_pipe(&self) -> &dyn AsRawDescriptor;
77 
78     /// Ask the device to use `resource` to store decoded frames according to its layout.
79     /// `picture_buffer_id` is the ID of the picture that will be reproduced in `PictureReady`
80     /// events using this buffer.
81     ///
82     /// The device takes ownership of `resource` and is responsible for closing it once the buffer
83     /// is not used anymore (either when the session is closed, or a new set of buffers is provided
84     /// for the session).
85     ///
86     /// The device will emit a `PictureReady` event with the `picture_buffer_id` field set to the
87     /// same value as the argument of the same name when a frame has been decoded into that buffer.
use_output_buffer( &mut self, picture_buffer_id: i32, resource: GuestResource, ) -> VideoResult<()>88     fn use_output_buffer(
89         &mut self,
90         picture_buffer_id: i32,
91         resource: GuestResource,
92     ) -> VideoResult<()>;
93 
94     /// Ask the device to reuse an output buffer previously passed to
95     /// `use_output_buffer` and that has previously been returned to the decoder
96     /// in a `PictureReady` event.
97     ///
98     /// The device will emit a `PictureReady` event with the `picture_buffer_id`
99     /// field set to the same value as the argument of the same name when a
100     /// frame has been decoded into that buffer.
reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>101     fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>;
102 
103     /// Blocking call to read a single event from the event pipe.
read_event(&mut self) -> VideoResult<DecoderEvent>104     fn read_event(&mut self) -> VideoResult<DecoderEvent>;
105 }
106 
107 impl<S: AsMut<dyn DecoderSession> + AsRef<dyn DecoderSession> + ?Sized> DecoderSession for S {
set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>108     fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()> {
109         self.as_mut().set_output_parameters(buffer_count, format)
110     }
111 
decode( &mut self, resource_id: u32, timestamp: u64, resource: GuestResourceHandle, offset: u32, bytes_used: u32, ) -> VideoResult<()>112     fn decode(
113         &mut self,
114         resource_id: u32,
115         timestamp: u64,
116         resource: GuestResourceHandle,
117         offset: u32,
118         bytes_used: u32,
119     ) -> VideoResult<()> {
120         self.as_mut()
121             .decode(resource_id, timestamp, resource, offset, bytes_used)
122     }
123 
flush(&mut self) -> VideoResult<()>124     fn flush(&mut self) -> VideoResult<()> {
125         self.as_mut().flush()
126     }
127 
reset(&mut self) -> VideoResult<()>128     fn reset(&mut self) -> VideoResult<()> {
129         self.as_mut().reset()
130     }
131 
clear_output_buffers(&mut self) -> VideoResult<()>132     fn clear_output_buffers(&mut self) -> VideoResult<()> {
133         self.as_mut().clear_output_buffers()
134     }
135 
event_pipe(&self) -> &dyn AsRawDescriptor136     fn event_pipe(&self) -> &dyn AsRawDescriptor {
137         self.as_ref().event_pipe()
138     }
139 
use_output_buffer( &mut self, picture_buffer_id: i32, resource: GuestResource, ) -> VideoResult<()>140     fn use_output_buffer(
141         &mut self,
142         picture_buffer_id: i32,
143         resource: GuestResource,
144     ) -> VideoResult<()> {
145         self.as_mut().use_output_buffer(picture_buffer_id, resource)
146     }
147 
reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>148     fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()> {
149         self.as_mut().reuse_output_buffer(picture_buffer_id)
150     }
151 
read_event(&mut self) -> VideoResult<DecoderEvent>152     fn read_event(&mut self) -> VideoResult<DecoderEvent> {
153         self.as_mut().read_event()
154     }
155 }
156 
157 pub trait DecoderBackend: Send {
158     type Session: DecoderSession;
159 
160     /// Return the decoding capabilities for this backend instance.
get_capabilities(&self) -> Capability161     fn get_capabilities(&self) -> Capability;
162 
163     /// Create a new decoding session for the passed `format`.
new_session(&mut self, format: Format) -> VideoResult<Self::Session>164     fn new_session(&mut self, format: Format) -> VideoResult<Self::Session>;
165 
166     /// Turn this backend into a trait object, allowing the same decoder to operate on a set of
167     /// different backends.
into_trait_object(self) -> Box<dyn DecoderBackend<Session = Box<dyn DecoderSession>>> where Self: Sized + 'static,168     fn into_trait_object(self) -> Box<dyn DecoderBackend<Session = Box<dyn DecoderSession>>>
169     where
170         Self: Sized + 'static,
171     {
172         Box::new(GenericDecoderBackend(self)) as Box<dyn DecoderBackend<Session = _>>
173     }
174 }
175 
176 /// Type that changes the `Session` associated type to `Box<dyn DecoderSession>`, allowing us to
177 /// use trait objects for backends.
178 struct GenericDecoderBackend<S: DecoderBackend>(pub S);
179 
180 impl<S> DecoderBackend for GenericDecoderBackend<S>
181 where
182     S: DecoderBackend,
183     <S as DecoderBackend>::Session: 'static,
184 {
185     type Session = Box<dyn DecoderSession>;
186 
get_capabilities(&self) -> Capability187     fn get_capabilities(&self) -> Capability {
188         self.0.get_capabilities()
189     }
190 
new_session(&mut self, format: Format) -> VideoResult<Self::Session>191     fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {
192         self.0
193             .new_session(format)
194             .map(|s| Box::new(s) as Box<dyn DecoderSession>)
195     }
196 }
197 
198 impl<S> DecoderBackend for Box<S>
199 where
200     S: ?Sized,
201     S: DecoderBackend,
202 {
203     type Session = S::Session;
204 
get_capabilities(&self) -> Capability205     fn get_capabilities(&self) -> Capability {
206         self.as_ref().get_capabilities()
207     }
208 
new_session(&mut self, format: Format) -> VideoResult<Self::Session>209     fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {
210         self.as_mut().new_session(format)
211     }
212 }
213 
214 #[derive(Debug)]
215 pub enum DecoderEvent {
216     /// Emitted when the device knows the buffer format it will need to decode frames, and how many
217     /// buffers it will need. The decoder is supposed to call `set_output_parameters()` to confirm
218     /// the pixel format and actual number of buffers used, and provide buffers of the requested
219     /// dimensions using `use_output_buffer()`.
220     ProvidePictureBuffers {
221         min_num_buffers: u32,
222         width: i32,
223         height: i32,
224         visible_rect: Rect,
225     },
226     /// Emitted when the decoder is done decoding a picture. `picture_buffer_id`
227     /// corresponds to the argument of the same name passed to `use_output_buffer()`
228     /// or `reuse_output_buffer()`. `bitstream_id` corresponds to the argument of
229     /// the same name passed to `decode()` and can be used to match decoded frames
230     /// to the input buffer they were produced from.
231     PictureReady {
232         picture_buffer_id: i32,
233         timestamp: u64,
234     },
235     /// Emitted when an input buffer passed to `decode()` is not used by the
236     /// device anymore and can be reused by the decoder. The parameter corresponds
237     /// to the `timestamp` argument passed to `decode()`.
238     NotifyEndOfBitstreamBuffer(u32),
239     /// Emitted when a decoding error has occured.
240     NotifyError(VideoError),
241     /// Emitted after `flush()` has been called to signal that the flush is completed.
242     FlushCompleted(VideoResult<()>),
243     /// Emitted after `reset()` has been called to signal that the reset is completed.
244     ResetCompleted(VideoResult<()>),
245 }
246 
247 #[cfg(test)]
248 /// Shared functions that can be used to test individual backends.
249 mod tests {
250     use std::time::Duration;
251 
252     use base::MappedRegion;
253     use base::MemoryMappingBuilder;
254     use base::SharedMemory;
255     use base::WaitContext;
256 
257     use super::*;
258     use crate::virtio::video::format::FramePlane;
259     use crate::virtio::video::resource::GuestMemArea;
260     use crate::virtio::video::resource::GuestMemHandle;
261     use crate::virtio::video::resource::VirtioObjectHandle;
262 
263     // Test video stream and its properties.
264     const H264_STREAM: &[u8] = include_bytes!("test-25fps.h264");
265     const H264_STREAM_WIDTH: i32 = 320;
266     const H264_STREAM_HEIGHT: i32 = 240;
267     const H264_STREAM_NUM_FRAMES: usize = 250;
268     const H264_STREAM_CRCS: &str = include_str!("test-25fps.crc");
269 
270     /// Splits a H.264 annex B stream into chunks that are all guaranteed to contain a full frame
271     /// worth of data.
272     ///
273     /// This is a pretty naive implementation that is only guaranteed to work with our test stream.
274     /// We are not using `AVCodecParser` because it seems to modify the decoding context, which
275     /// would result in testing conditions that diverge more from our real use case where parsing
276     /// has already been done.
277     struct H264NalIterator<'a> {
278         stream: &'a [u8],
279         pos: usize,
280     }
281 
282     impl<'a> H264NalIterator<'a> {
new(stream: &'a [u8]) -> Self283         fn new(stream: &'a [u8]) -> Self {
284             Self { stream, pos: 0 }
285         }
286 
287         /// Returns the position of the start of the next frame in the stream.
next_frame_pos(&self) -> Option<usize>288         fn next_frame_pos(&self) -> Option<usize> {
289             const H264_START_CODE: [u8; 4] = [0x0, 0x0, 0x0, 0x1];
290             self.stream[self.pos + 1..]
291                 .windows(H264_START_CODE.len())
292                 .position(|window| window == H264_START_CODE)
293                 .map(|pos| self.pos + pos + 1)
294         }
295 
296         /// Returns whether `slice` contains frame data, i.e. a header where the NAL unit type is
297         /// 0x1 or 0x5.
contains_frame(slice: &[u8]) -> bool298         fn contains_frame(slice: &[u8]) -> bool {
299             slice[4..].windows(4).any(|window| {
300                 window[0..3] == [0x0, 0x0, 0x1]
301                     && (window[3] & 0x1f == 0x5 || window[3] & 0x1f == 0x1)
302             })
303         }
304     }
305 
306     impl<'a> Iterator for H264NalIterator<'a> {
307         type Item = &'a [u8];
308 
next(&mut self) -> Option<Self::Item>309         fn next(&mut self) -> Option<Self::Item> {
310             match self.pos {
311                 cur_pos if cur_pos == self.stream.len() => None,
312                 cur_pos => loop {
313                     self.pos = self.next_frame_pos().unwrap_or(self.stream.len());
314                     let slice = &self.stream[cur_pos..self.pos];
315 
316                     // Keep advancing as long as we don't have frame data in our slice.
317                     if Self::contains_frame(slice) || self.pos == self.stream.len() {
318                         return Some(slice);
319                     }
320                 },
321             }
322         }
323     }
324 
325     // Build a virtio object handle from a linear memory area. This is useful to emulate the
326     // scenario where we are decoding from or into virtio objects.
327     #[allow(dead_code)]
build_object_handle(mem: &SharedMemory) -> GuestResourceHandle328     pub fn build_object_handle(mem: &SharedMemory) -> GuestResourceHandle {
329         GuestResourceHandle::VirtioObject(VirtioObjectHandle {
330             desc: base::clone_descriptor(mem).unwrap(),
331             modifier: 0,
332         })
333     }
334 
335     // Build a guest memory handle from a linear memory area. This is useful to emulate the
336     // scenario where we are decoding from or into guest memory.
337     #[allow(dead_code)]
build_guest_mem_handle(mem: &SharedMemory) -> GuestResourceHandle338     pub fn build_guest_mem_handle(mem: &SharedMemory) -> GuestResourceHandle {
339         GuestResourceHandle::GuestPages(GuestMemHandle {
340             desc: base::clone_descriptor(mem).unwrap(),
341             mem_areas: vec![GuestMemArea {
342                 offset: 0,
343                 length: mem.size() as usize,
344             }],
345         })
346     }
347 
348     /// Full decoding test of a H.264 video, checking that the flow of events is happening as
349     /// expected.
decode_h264_generic<D, I, O>( decoder: &mut D, input_resource_builder: I, output_resource_builder: O, ) where D: DecoderBackend, I: Fn(&SharedMemory) -> GuestResourceHandle, O: Fn(&SharedMemory) -> GuestResourceHandle,350     pub fn decode_h264_generic<D, I, O>(
351         decoder: &mut D,
352         input_resource_builder: I,
353         output_resource_builder: O,
354     ) where
355         D: DecoderBackend,
356         I: Fn(&SharedMemory) -> GuestResourceHandle,
357         O: Fn(&SharedMemory) -> GuestResourceHandle,
358     {
359         const NUM_OUTPUT_BUFFERS: usize = 4;
360         const INPUT_BUF_SIZE: usize = 0x4000;
361         const OUTPUT_BUFFER_SIZE: usize =
362             (H264_STREAM_WIDTH * (H264_STREAM_HEIGHT + H264_STREAM_HEIGHT / 2)) as usize;
363         let mut session = decoder
364             .new_session(Format::H264)
365             .expect("failed to create H264 decoding session.");
366         let wait_ctx = WaitContext::new().expect("Failed to create wait context");
367         wait_ctx
368             .add(session.event_pipe(), 0u8)
369             .expect("Failed to add event pipe to wait context");
370         // Output buffers suitable for receiving NV12 frames for our stream.
371         let output_buffers = (0..NUM_OUTPUT_BUFFERS)
372             .map(|i| {
373                 SharedMemory::new(
374                     format!("video-output-buffer-{}", i),
375                     OUTPUT_BUFFER_SIZE as u64,
376                 )
377                 .unwrap()
378             })
379             .collect::<Vec<_>>();
380         let input_shm = SharedMemory::new("video-input-buffer", INPUT_BUF_SIZE as u64).unwrap();
381         let input_mapping = MemoryMappingBuilder::new(input_shm.size() as usize)
382             .from_shared_memory(&input_shm)
383             .build()
384             .unwrap();
385 
386         let mut decoded_frames_count = 0usize;
387         let mut expected_frames_crcs = H264_STREAM_CRCS.lines();
388 
389         let mut on_frame_decoded = |session: &mut D::Session, picture_buffer_id: i32| {
390             // Verify that the CRC of the decoded frame matches the expected one.
391             let mapping = MemoryMappingBuilder::new(OUTPUT_BUFFER_SIZE)
392                 .from_shared_memory(&output_buffers[picture_buffer_id as usize])
393                 .build()
394                 .unwrap();
395             let mut frame_data = vec![0u8; mapping.size()];
396             assert_eq!(
397                 mapping.read_slice(&mut frame_data, 0).unwrap(),
398                 mapping.size()
399             );
400 
401             let mut hasher = crc32fast::Hasher::new();
402             hasher.update(&frame_data);
403             let frame_crc = hasher.finalize();
404             assert_eq!(
405                 format!("{:08x}", frame_crc),
406                 expected_frames_crcs
407                     .next()
408                     .expect("No CRC for decoded frame")
409             );
410 
411             // We can recycle the frame now.
412             session.reuse_output_buffer(picture_buffer_id).unwrap();
413             decoded_frames_count += 1;
414         };
415 
416         // Simple value by which we will multiply the frame number to obtain a fake timestamp.
417         const TIMESTAMP_FOR_INPUT_ID_FACTOR: u64 = 1_000_000;
418         for (input_id, slice) in H264NalIterator::new(H264_STREAM).enumerate() {
419             let buffer_handle = input_resource_builder(&input_shm);
420             input_mapping
421                 .write_slice(slice, 0)
422                 .expect("Failed to write stream data into input buffer.");
423             session
424                 .decode(
425                     input_id as u32,
426                     input_id as u64 * TIMESTAMP_FOR_INPUT_ID_FACTOR,
427                     buffer_handle,
428                     0,
429                     slice.len() as u32,
430                 )
431                 .expect("Call to decode() failed.");
432 
433             // Get all the events resulting from this submission.
434             let mut events = Vec::new();
435             while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {
436                 events.push(session.read_event().unwrap());
437             }
438 
439             // Our bitstream buffer should have been returned.
440             let event_idx = events
441                 .iter()
442                 .position(|event| {
443                     let input_id = input_id as u32;
444                     matches!(event, DecoderEvent::NotifyEndOfBitstreamBuffer(index) if *index == input_id)
445                 })
446                 .unwrap();
447             events.remove(event_idx);
448 
449             // After sending the first buffer we should get the initial resolution change event and
450             // can provide the frames to decode into.
451             if input_id == 0 {
452                 let event_idx = events
453                     .iter()
454                     .position(|event| {
455                         matches!(
456                             event,
457                             DecoderEvent::ProvidePictureBuffers {
458                                 width: H264_STREAM_WIDTH,
459                                 height: H264_STREAM_HEIGHT,
460                                 visible_rect: Rect {
461                                     left: 0,
462                                     top: 0,
463                                     right: H264_STREAM_WIDTH,
464                                     bottom: H264_STREAM_HEIGHT,
465                                 },
466                                 ..
467                             }
468                         )
469                     })
470                     .unwrap();
471                 events.remove(event_idx);
472 
473                 let out_format = Format::NV12;
474 
475                 session
476                     .set_output_parameters(NUM_OUTPUT_BUFFERS, out_format)
477                     .unwrap();
478 
479                 // Pass the buffers we will decode into.
480                 for (picture_buffer_id, buffer) in output_buffers.iter().enumerate() {
481                     session
482                         .use_output_buffer(
483                             picture_buffer_id as i32,
484                             GuestResource {
485                                 handle: output_resource_builder(buffer),
486                                 planes: vec![
487                                     FramePlane {
488                                         offset: 0,
489                                         stride: H264_STREAM_WIDTH as usize,
490                                         size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
491                                     },
492                                     FramePlane {
493                                         offset: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
494                                         stride: H264_STREAM_WIDTH as usize,
495                                         size: (H264_STREAM_WIDTH * H264_STREAM_HEIGHT) as usize,
496                                     },
497                                 ],
498                                 width: H264_STREAM_WIDTH as _,
499                                 height: H264_STREAM_HEIGHT as _,
500                                 format: out_format,
501                                 guest_cpu_mappable: false,
502                             },
503                         )
504                         .unwrap();
505                 }
506             }
507 
508             // If we have remaining events, they must be decoded frames. Get them and recycle them.
509             for event in events {
510                 match event {
511                     DecoderEvent::PictureReady {
512                         picture_buffer_id, ..
513                     } => on_frame_decoded(&mut session, picture_buffer_id),
514                     e => panic!("Unexpected event: {:?}", e),
515                 }
516             }
517         }
518 
519         session.flush().unwrap();
520 
521         // Keep getting frames until the final event, which should be `FlushCompleted`.
522         let mut received_flush_completed = false;
523         while !wait_ctx.wait_timeout(Duration::ZERO).unwrap().is_empty() {
524             match session.read_event().unwrap() {
525                 DecoderEvent::PictureReady {
526                     picture_buffer_id, ..
527                 } => on_frame_decoded(&mut session, picture_buffer_id),
528                 DecoderEvent::FlushCompleted(Ok(())) => {
529                     received_flush_completed = true;
530                     break;
531                 }
532                 e => panic!("Unexpected event: {:?}", e),
533             }
534         }
535 
536         // Confirm that we got the FlushCompleted event.
537         assert!(received_flush_completed);
538 
539         // We should have read all the events for that session.
540         assert_eq!(wait_ctx.wait_timeout(Duration::ZERO).unwrap().len(), 0);
541 
542         // We should not be expecting any more frame
543         assert_eq!(expected_frames_crcs.next(), None);
544 
545         // Check that we decoded the expected number of frames.
546         assert_eq!(decoded_frames_count, H264_STREAM_NUM_FRAMES);
547     }
548 }
549