xref: /aosp_15_r20/external/crosvm/devices/src/virtio/video/decoder/backend/vda.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2020 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use std::collections::btree_map::Entry;
6 use std::collections::BTreeMap;
7 use std::convert::TryFrom;
8 
9 use anyhow::anyhow;
10 use base::error;
11 use base::warn;
12 use base::AsRawDescriptor;
13 use base::IntoRawDescriptor;
14 use libvda::decode::Event as LibvdaEvent;
15 
16 use crate::virtio::video::decoder::backend::*;
17 use crate::virtio::video::decoder::Capability;
18 use crate::virtio::video::error::VideoError;
19 use crate::virtio::video::error::VideoResult;
20 use crate::virtio::video::format::*;
21 
22 /// Since libvda only accepts 32-bit timestamps, we are going to truncate the frame 64-bit timestamp
23 /// (of nanosecond granularity) to only keep seconds granularity. This would result in information
24 /// being lost on a regular client, but the Android C2 decoder only sends timestamps with second
25 /// granularity, so this approach is going to work there. However, this means that this backend is
26 /// very unlikely to work with any other guest software. We accept this fact because it is
27 /// impossible to use outside of ChromeOS anyway.
28 const TIMESTAMP_TRUNCATE_FACTOR: u64 = 1_000_000_000;
29 
30 impl TryFrom<Format> for libvda::Profile {
31     type Error = VideoError;
32 
try_from(format: Format) -> Result<Self, Self::Error>33     fn try_from(format: Format) -> Result<Self, Self::Error> {
34         Ok(match format {
35             Format::VP8 => libvda::Profile::VP8,
36             Format::VP9 => libvda::Profile::VP9Profile0,
37             Format::H264 => libvda::Profile::H264ProfileBaseline,
38             Format::Hevc => libvda::Profile::HevcProfileMain,
39             _ => {
40                 error!("specified format {} is not supported by VDA", format);
41                 return Err(VideoError::InvalidParameter);
42             }
43         })
44     }
45 }
46 
47 impl TryFrom<Format> for libvda::PixelFormat {
48     type Error = VideoError;
49 
try_from(format: Format) -> Result<Self, Self::Error>50     fn try_from(format: Format) -> Result<Self, Self::Error> {
51         Ok(match format {
52             Format::NV12 => libvda::PixelFormat::NV12,
53             _ => {
54                 error!("specified format {} is not supported by VDA", format);
55                 return Err(VideoError::InvalidParameter);
56             }
57         })
58     }
59 }
60 
61 impl From<&FramePlane> for libvda::FramePlane {
from(plane: &FramePlane) -> Self62     fn from(plane: &FramePlane) -> Self {
63         libvda::FramePlane {
64             offset: plane.offset as i32,
65             stride: plane.stride as i32,
66         }
67     }
68 }
69 
70 impl From<libvda::decode::Event> for DecoderEvent {
from(event: libvda::decode::Event) -> Self71     fn from(event: libvda::decode::Event) -> Self {
72         // We cannot use the From trait here since neither libvda::decode::Response
73         // no std::result::Result are defined in the current crate.
74         fn vda_response_to_result(resp: libvda::decode::Response) -> VideoResult<()> {
75             match resp {
76                 libvda::decode::Response::Success => Ok(()),
77                 resp => Err(VideoError::BackendFailure(anyhow!("VDA failure: {}", resp))),
78             }
79         }
80 
81         match event {
82             LibvdaEvent::ProvidePictureBuffers {
83                 min_num_buffers,
84                 width,
85                 height,
86                 visible_rect_left,
87                 visible_rect_top,
88                 visible_rect_right,
89                 visible_rect_bottom,
90             } => DecoderEvent::ProvidePictureBuffers {
91                 min_num_buffers,
92                 width,
93                 height,
94                 visible_rect: Rect {
95                     left: visible_rect_left,
96                     top: visible_rect_top,
97                     right: visible_rect_right,
98                     bottom: visible_rect_bottom,
99                 },
100             },
101             LibvdaEvent::PictureReady {
102                 buffer_id,
103                 bitstream_id,
104                 ..
105             } => DecoderEvent::PictureReady {
106                 picture_buffer_id: buffer_id,
107                 // Restore the truncated timestamp to its original value (hopefully).
108                 timestamp: TIMESTAMP_TRUNCATE_FACTOR.wrapping_mul(bitstream_id as u64),
109             },
110             LibvdaEvent::NotifyEndOfBitstreamBuffer { bitstream_id } => {
111                 // We will patch the timestamp to the actual bitstream ID in `read_event`.
112                 DecoderEvent::NotifyEndOfBitstreamBuffer(bitstream_id as u32)
113             }
114             LibvdaEvent::NotifyError(resp) => DecoderEvent::NotifyError(
115                 VideoError::BackendFailure(anyhow!("VDA failure: {}", resp)),
116             ),
117             LibvdaEvent::ResetResponse(resp) => {
118                 DecoderEvent::ResetCompleted(vda_response_to_result(resp))
119             }
120             LibvdaEvent::FlushResponse(resp) => {
121                 DecoderEvent::FlushCompleted(vda_response_to_result(resp))
122             }
123         }
124     }
125 }
126 
127 // Used by DecoderSession::get_capabilities().
from_pixel_format( fmt: &libvda::PixelFormat, mask: u64, width_range: FormatRange, height_range: FormatRange, ) -> FormatDesc128 fn from_pixel_format(
129     fmt: &libvda::PixelFormat,
130     mask: u64,
131     width_range: FormatRange,
132     height_range: FormatRange,
133 ) -> FormatDesc {
134     let format = match fmt {
135         libvda::PixelFormat::NV12 => Format::NV12,
136         libvda::PixelFormat::YV12 => Format::YUV420,
137     };
138 
139     let frame_formats = vec![FrameFormat {
140         width: width_range,
141         height: height_range,
142         bitrates: Vec::new(),
143     }];
144 
145     FormatDesc {
146         mask,
147         format,
148         frame_formats,
149         plane_align: 1,
150     }
151 }
152 
153 pub struct VdaDecoderSession {
154     vda_session: libvda::decode::Session,
155     format: Option<libvda::PixelFormat>,
156     /// libvda can only handle 32-bit timestamps, so we will give it the buffer ID as a timestamp
157     /// and map it back to the actual timestamp using this table when a decoded frame is produced.
158     timestamp_to_resource_id: BTreeMap<u32, u32>,
159 }
160 
161 impl DecoderSession for VdaDecoderSession {
set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()>162     fn set_output_parameters(&mut self, buffer_count: usize, format: Format) -> VideoResult<()> {
163         self.format = Some(libvda::PixelFormat::try_from(format)?);
164         Ok(self.vda_session.set_output_buffer_count(buffer_count)?)
165     }
166 
decode( &mut self, resource_id: u32, timestamp: u64, resource: GuestResourceHandle, offset: u32, bytes_used: u32, ) -> VideoResult<()>167     fn decode(
168         &mut self,
169         resource_id: u32,
170         timestamp: u64,
171         resource: GuestResourceHandle,
172         offset: u32,
173         bytes_used: u32,
174     ) -> VideoResult<()> {
175         let handle = match resource {
176             GuestResourceHandle::VirtioObject(handle) => handle,
177             _ => {
178                 return Err(VideoError::BackendFailure(anyhow!(
179                     "VDA backend only supports virtio object resources"
180                 )))
181             }
182         };
183 
184         // While the virtio-video driver handles timestamps as nanoseconds, Chrome assumes
185         // per-second timestamps coming. So, we need a conversion from nsec to sec. Note that this
186         // value should not be an unix time stamp but a frame number that the Android V4L2 C2
187         // decoder passes to the driver as a 32-bit integer in our implementation. So, overflow must
188         // not happen in this conversion.
189         let truncated_timestamp = (timestamp / TIMESTAMP_TRUNCATE_FACTOR) as u32;
190         self.timestamp_to_resource_id
191             .insert(truncated_timestamp, resource_id);
192 
193         if truncated_timestamp as u64 * TIMESTAMP_TRUNCATE_FACTOR != timestamp {
194             warn!("truncation of timestamp {} resulted in precision loss. Only send timestamps with second granularity to this backend.", timestamp);
195         }
196 
197         Ok(self.vda_session.decode(
198             truncated_timestamp as i32, // bitstream_id
199             // Steal the descriptor of the resource, as libvda will close it.
200             handle.desc.into_raw_descriptor(),
201             offset,
202             bytes_used,
203         )?)
204     }
205 
flush(&mut self) -> VideoResult<()>206     fn flush(&mut self) -> VideoResult<()> {
207         Ok(self.vda_session.flush()?)
208     }
209 
reset(&mut self) -> VideoResult<()>210     fn reset(&mut self) -> VideoResult<()> {
211         Ok(self.vda_session.reset()?)
212     }
213 
clear_output_buffers(&mut self) -> VideoResult<()>214     fn clear_output_buffers(&mut self) -> VideoResult<()> {
215         Ok(())
216     }
217 
event_pipe(&self) -> &dyn AsRawDescriptor218     fn event_pipe(&self) -> &dyn AsRawDescriptor {
219         self.vda_session.pipe()
220     }
221 
use_output_buffer( &mut self, picture_buffer_id: i32, resource: GuestResource, ) -> VideoResult<()>222     fn use_output_buffer(
223         &mut self,
224         picture_buffer_id: i32,
225         resource: GuestResource,
226     ) -> VideoResult<()> {
227         let handle = match resource.handle {
228             GuestResourceHandle::VirtioObject(handle) => handle,
229             _ => {
230                 return Err(VideoError::BackendFailure(anyhow!(
231                     "VDA backend only supports virtio object resources"
232                 )))
233             }
234         };
235         let vda_planes: Vec<libvda::FramePlane> = resource.planes.iter().map(Into::into).collect();
236 
237         Ok(self.vda_session.use_output_buffer(
238             picture_buffer_id,
239             self.format.ok_or(VideoError::BackendFailure(anyhow!(
240                 "set_output_parameters() must be called before use_output_buffer()"
241             )))?,
242             // Steal the descriptor of the resource, as libvda will close it.
243             handle.desc.into_raw_descriptor(),
244             &vda_planes,
245             handle.modifier,
246         )?)
247     }
248 
reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()>249     fn reuse_output_buffer(&mut self, picture_buffer_id: i32) -> VideoResult<()> {
250         Ok(self.vda_session.reuse_output_buffer(picture_buffer_id)?)
251     }
252 
read_event(&mut self) -> VideoResult<DecoderEvent>253     fn read_event(&mut self) -> VideoResult<DecoderEvent> {
254         self.vda_session
255             .read_event()
256             .map(Into::into)
257             // Libvda returned the truncated timestamp that we gave it as the timestamp of this
258             // buffer. Replace it with the bitstream ID that was passed to `decode` for this
259             // resource.
260             .map(|mut e| {
261                 if let DecoderEvent::NotifyEndOfBitstreamBuffer(timestamp) = &mut e {
262                     let bitstream_id = self
263                         .timestamp_to_resource_id
264                         .remove(timestamp)
265                         .unwrap_or_else(|| {
266                             error!("timestamp {} not registered!", *timestamp);
267                             0
268                         });
269                     *timestamp = bitstream_id;
270                 }
271                 e
272             })
273             .map_err(Into::into)
274     }
275 }
276 
277 /// A VDA decoder backend that can be passed to `Decoder::new` in order to create a working decoder.
278 pub struct LibvdaDecoder(libvda::decode::VdaInstance);
279 
280 /// SAFETY: safe because the Rcs in `VdaInstance` are always used from the same thread.
281 unsafe impl Send for LibvdaDecoder {}
282 
283 impl LibvdaDecoder {
284     /// Create a decoder backend instance that can be used to instantiate an decoder.
new(backend_type: libvda::decode::VdaImplType) -> VideoResult<Self>285     pub fn new(backend_type: libvda::decode::VdaImplType) -> VideoResult<Self> {
286         Ok(Self(libvda::decode::VdaInstance::new(backend_type)?))
287     }
288 }
289 
290 impl DecoderBackend for LibvdaDecoder {
291     type Session = VdaDecoderSession;
292 
new_session(&mut self, format: Format) -> VideoResult<Self::Session>293     fn new_session(&mut self, format: Format) -> VideoResult<Self::Session> {
294         let profile = libvda::Profile::try_from(format)?;
295 
296         Ok(VdaDecoderSession {
297             vda_session: self.0.open_session(profile).map_err(|e| {
298                 error!("failed to open a session for {:?}: {}", format, e);
299                 VideoError::InvalidOperation
300             })?,
301             format: None,
302             timestamp_to_resource_id: Default::default(),
303         })
304     }
305 
get_capabilities(&self) -> Capability306     fn get_capabilities(&self) -> Capability {
307         let caps = libvda::decode::VdaInstance::get_capabilities(&self.0);
308 
309         // Raise the first |# of supported raw formats|-th bits because we can assume that any
310         // combination of (a coded format, a raw format) is valid in Chrome.
311         let mask = !(u64::MAX << caps.output_formats.len());
312 
313         let mut in_fmts = vec![];
314         let mut profiles: BTreeMap<Format, Vec<Profile>> = Default::default();
315         for fmt in caps.input_formats.iter() {
316             match Profile::from_libvda_profile(fmt.profile) {
317                 Some(profile) => {
318                     let format = profile.to_format();
319                     in_fmts.push(FormatDesc {
320                         mask,
321                         format,
322                         frame_formats: vec![FrameFormat {
323                             width: FormatRange {
324                                 min: fmt.min_width,
325                                 max: fmt.max_width,
326                                 step: 1,
327                             },
328                             height: FormatRange {
329                                 min: fmt.min_height,
330                                 max: fmt.max_height,
331                                 step: 1,
332                             },
333                             bitrates: Vec::new(),
334                         }],
335                         plane_align: 1,
336                     });
337                     match profiles.entry(format) {
338                         Entry::Occupied(mut e) => e.get_mut().push(profile),
339                         Entry::Vacant(e) => {
340                             e.insert(vec![profile]);
341                         }
342                     }
343                 }
344                 None => {
345                     warn!(
346                         "No virtio-video equivalent for libvda profile, skipping: {:?}",
347                         fmt.profile
348                     );
349                 }
350             }
351         }
352 
353         let levels: BTreeMap<Format, Vec<Level>> = if profiles.contains_key(&Format::H264) {
354             // We only support Level 1.0 for H.264.
355             vec![(Format::H264, vec![Level::H264_1_0])]
356                 .into_iter()
357                 .collect()
358         } else {
359             Default::default()
360         };
361 
362         // Prepare {min, max} of {width, height}.
363         // While these values are associated with each input format in libvda,
364         // they are associated with each output format in virtio-video protocol.
365         // Thus, we compute max of min values and min of max values here.
366         let min_width = caps.input_formats.iter().map(|fmt| fmt.min_width).max();
367         let max_width = caps.input_formats.iter().map(|fmt| fmt.max_width).min();
368         let min_height = caps.input_formats.iter().map(|fmt| fmt.min_height).max();
369         let max_height = caps.input_formats.iter().map(|fmt| fmt.max_height).min();
370         let width_range = FormatRange {
371             min: min_width.unwrap_or(0),
372             max: max_width.unwrap_or(0),
373             step: 1,
374         };
375         let height_range = FormatRange {
376             min: min_height.unwrap_or(0),
377             max: max_height.unwrap_or(0),
378             step: 1,
379         };
380 
381         // Raise the first |# of supported coded formats|-th bits because we can assume that any
382         // combination of (a coded format, a raw format) is valid in Chrome.
383         let mask = !(u64::MAX << caps.input_formats.len());
384         let out_fmts = caps
385             .output_formats
386             .iter()
387             .map(|fmt| from_pixel_format(fmt, mask, width_range, height_range))
388             .collect();
389 
390         Capability::new(in_fmts, out_fmts, profiles, levels)
391     }
392 }
393