1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::collections::HashMap;
6 use std::ffi::CString;
7 use std::fs::File;
8 use std::fs::OpenOptions;
9 use std::io;
10 use std::mem;
11 use std::os::raw::c_ulong;
12 use std::os::unix::prelude::FileExt;
13 use std::path::Path;
14 use std::path::PathBuf;
15 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
16 use std::ptr::addr_of_mut;
17 use std::slice;
18 use std::sync::Arc;
19
20 use base::error;
21 use base::ioctl;
22 use base::ioctl_with_mut_ptr;
23 use base::ioctl_with_mut_ref;
24 use base::ioctl_with_ptr;
25 use base::ioctl_with_ref;
26 use base::ioctl_with_val;
27 use base::warn;
28 use base::AsRawDescriptor;
29 use base::Error;
30 use base::Event;
31 use base::FromRawDescriptor;
32 use base::RawDescriptor;
33 use base::SafeDescriptor;
34 use cfg_if::cfg_if;
35 use data_model::vec_with_array_field;
36 use hypervisor::DeviceKind;
37 use hypervisor::Vm;
38 use once_cell::sync::OnceCell;
39 use rand::seq::index::sample;
40 use rand::thread_rng;
41 use remain::sorted;
42 use resources::address_allocator::AddressAllocator;
43 use resources::AddressRange;
44 use resources::Alloc;
45 use resources::Error as ResourcesError;
46 use sync::Mutex;
47 use thiserror::Error;
48 use vfio_sys::vfio::vfio_acpi_dsm;
49 use vfio_sys::vfio::VFIO_IRQ_SET_DATA_BOOL;
50 use vfio_sys::*;
51 use zerocopy::AsBytes;
52 use zerocopy::FromBytes;
53
54 use crate::IommuDevType;
55
56 #[sorted]
57 #[derive(Error, Debug)]
58 pub enum VfioError {
59 #[error("failed to duplicate VfioContainer")]
60 ContainerDupError,
61 #[error("failed to set container's IOMMU driver type as {0:?}: {1}")]
62 ContainerSetIOMMU(IommuType, Error),
63 #[error("failed to create KVM vfio device: {0}")]
64 CreateVfioKvmDevice(Error),
65 #[error("failed to get Group Status: {0}")]
66 GetGroupStatus(Error),
67 #[error("failed to get vfio device fd: {0}")]
68 GroupGetDeviceFD(Error),
69 #[error("failed to add vfio group into vfio container: {0}")]
70 GroupSetContainer(Error),
71 #[error("group is inviable")]
72 GroupViable,
73 #[error("invalid region index: {0}")]
74 InvalidIndex(usize),
75 #[error("invalid operation")]
76 InvalidOperation,
77 #[error("invalid file path")]
78 InvalidPath,
79 #[error("failed to add guest memory map into iommu table: {0}")]
80 IommuDmaMap(Error),
81 #[error("failed to remove guest memory map from iommu table: {0}")]
82 IommuDmaUnmap(Error),
83 #[error("failed to get IOMMU cap info from host")]
84 IommuGetCapInfo,
85 #[error("failed to get IOMMU info from host: {0}")]
86 IommuGetInfo(Error),
87 #[error("failed to attach device to pKVM pvIOMMU: {0}")]
88 KvmPviommuSetConfig(Error),
89 #[error("failed to set KVM vfio device's attribute: {0}")]
90 KvmSetDeviceAttr(Error),
91 #[error("AddressAllocator is unavailable")]
92 NoRescAlloc,
93 #[error("failed to open /dev/vfio/vfio container: {0}")]
94 OpenContainer(io::Error),
95 #[error("failed to open {1} group: {0}")]
96 OpenGroup(io::Error, String),
97 #[error("failed to read {1} link: {0}")]
98 ReadLink(io::Error, PathBuf),
99 #[error("resources error: {0}")]
100 Resources(ResourcesError),
101 #[error("unknown vfio device type (flags: {0:#x})")]
102 UnknownDeviceType(u32),
103 #[error("failed to call vfio device's ACPI _DSM: {0}")]
104 VfioAcpiDsm(Error),
105 #[error("failed to disable vfio deviece's acpi notification: {0}")]
106 VfioAcpiNotificationDisable(Error),
107 #[error("failed to enable vfio deviece's acpi notification: {0}")]
108 VfioAcpiNotificationEnable(Error),
109 #[error("failed to test vfio deviece's acpi notification: {0}")]
110 VfioAcpiNotificationTest(Error),
111 #[error(
112 "vfio API version doesn't match with VFIO_API_VERSION defined in vfio_sys/src/vfio.rs"
113 )]
114 VfioApiVersion,
115 #[error("failed to get vfio device's info or info doesn't match: {0}")]
116 VfioDeviceGetInfo(Error),
117 #[error("failed to get vfio device's region info: {0}")]
118 VfioDeviceGetRegionInfo(Error),
119 #[error("container doesn't support IOMMU driver type {0:?}")]
120 VfioIommuSupport(IommuType),
121 #[error("failed to disable vfio deviece's irq: {0}")]
122 VfioIrqDisable(Error),
123 #[error("failed to enable vfio deviece's irq: {0}")]
124 VfioIrqEnable(Error),
125 #[error("failed to mask vfio deviece's irq: {0}")]
126 VfioIrqMask(Error),
127 #[error("failed to unmask vfio deviece's irq: {0}")]
128 VfioIrqUnmask(Error),
129 #[error("failed to enter vfio deviece's low power state: {0}")]
130 VfioPmLowPowerEnter(Error),
131 #[error("failed to exit vfio deviece's low power state: {0}")]
132 VfioPmLowPowerExit(Error),
133 }
134
135 type Result<T> = std::result::Result<T, VfioError>;
136
get_error() -> Error137 fn get_error() -> Error {
138 Error::last()
139 }
140
141 static KVM_VFIO_FILE: OnceCell<SafeDescriptor> = OnceCell::new();
142
143 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
144 pub enum VfioDeviceType {
145 Pci,
146 Platform,
147 }
148
149 enum KvmVfioGroupOps {
150 Add,
151 Delete,
152 }
153
154 #[derive(Debug)]
155 pub struct KvmVfioPviommu {
156 file: File,
157 }
158
159 impl KvmVfioPviommu {
new(vm: &impl Vm) -> Result<Self>160 pub fn new(vm: &impl Vm) -> Result<Self> {
161 cfg_if! {
162 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
163 let file = Self::ioctl_kvm_dev_vfio_pviommu_attach(vm)?;
164
165 Ok(Self { file })
166 } else {
167 let _ = vm;
168 unimplemented!()
169 }
170 }
171 }
172
attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()>173 pub fn attach<T: AsRawDescriptor>(&self, device: &T, sid_idx: u32, vsid: u32) -> Result<()> {
174 cfg_if! {
175 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
176 self.ioctl_kvm_pviommu_set_config(device, sid_idx, vsid)
177 } else {
178 let _ = device;
179 let _ = sid_idx;
180 let _ = vsid;
181 unimplemented!()
182 }
183 }
184 }
185
id(&self) -> u32186 pub fn id(&self) -> u32 {
187 let fd = self.as_raw_descriptor();
188 // Guests identify pvIOMMUs to the hypervisor using the corresponding VMM FDs.
189 fd.try_into().unwrap()
190 }
191
get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32>192 pub fn get_sid_count<T: AsRawDescriptor>(vm: &impl Vm, device: &T) -> Result<u32> {
193 cfg_if! {
194 if #[cfg(all(target_os = "android", target_arch = "aarch64"))] {
195 let info = Self::ioctl_kvm_dev_vfio_pviommu_get_info(vm, device)?;
196
197 Ok(info.nr_sids)
198 } else {
199 let _ = vm;
200 let _ = device;
201 unimplemented!()
202 }
203 }
204 }
205
206 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File>207 fn ioctl_kvm_dev_vfio_pviommu_attach(vm: &impl Vm) -> Result<File> {
208 let kvm_vfio_file = KVM_VFIO_FILE
209 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
210 .map_err(VfioError::CreateVfioKvmDevice)?;
211
212 let vfio_dev_attr = kvm_sys::kvm_device_attr {
213 flags: 0,
214 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
215 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_ATTACH as u64,
216 addr: 0,
217 };
218
219 // SAFETY:
220 // Safe as we are the owner of vfio_dev_attr, which is valid.
221 let ret =
222 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
223
224 if ret < 0 {
225 Err(VfioError::KvmSetDeviceAttr(get_error()))
226 } else {
227 // SAFETY: Safe as we verify the return value.
228 Ok(unsafe { File::from_raw_descriptor(ret) })
229 }
230 }
231
232 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>( &self, device: &T, sid_idx: u32, vsid: u32, ) -> Result<()>233 fn ioctl_kvm_pviommu_set_config<T: AsRawDescriptor>(
234 &self,
235 device: &T,
236 sid_idx: u32,
237 vsid: u32,
238 ) -> Result<()> {
239 let config = kvm_sys::kvm_vfio_iommu_config {
240 device_fd: device.as_raw_descriptor(),
241 sid_idx,
242 vsid,
243 };
244
245 // SAFETY:
246 // Safe as we are the owner of device and config which are valid, and we verify the return
247 // value.
248 let ret = unsafe { ioctl_with_ref(self, kvm_sys::KVM_PVIOMMU_SET_CONFIG, &config) };
249
250 if ret < 0 {
251 Err(VfioError::KvmPviommuSetConfig(get_error()))
252 } else {
253 Ok(())
254 }
255 }
256
257 #[cfg(all(target_os = "android", target_arch = "aarch64"))]
ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>( vm: &impl Vm, device: &T, ) -> Result<kvm_sys::kvm_vfio_iommu_info>258 fn ioctl_kvm_dev_vfio_pviommu_get_info<T: AsRawDescriptor>(
259 vm: &impl Vm,
260 device: &T,
261 ) -> Result<kvm_sys::kvm_vfio_iommu_info> {
262 let kvm_vfio_file = KVM_VFIO_FILE
263 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
264 .map_err(VfioError::CreateVfioKvmDevice)?;
265
266 let mut info = kvm_sys::kvm_vfio_iommu_info {
267 device_fd: device.as_raw_descriptor(),
268 nr_sids: 0,
269 };
270
271 let vfio_dev_attr = kvm_sys::kvm_device_attr {
272 flags: 0,
273 group: kvm_sys::KVM_DEV_VFIO_PVIOMMU,
274 attr: kvm_sys::KVM_DEV_VFIO_PVIOMMU_GET_INFO as u64,
275 addr: addr_of_mut!(info) as usize as u64,
276 };
277
278 // SAFETY:
279 // Safe as we are the owner of vfio_dev_attr, which is valid.
280 let ret =
281 unsafe { ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr) };
282
283 if ret < 0 {
284 Err(VfioError::KvmSetDeviceAttr(get_error()))
285 } else {
286 Ok(info)
287 }
288 }
289 }
290
291 impl AsRawDescriptor for KvmVfioPviommu {
as_raw_descriptor(&self) -> RawDescriptor292 fn as_raw_descriptor(&self) -> RawDescriptor {
293 self.file.as_raw_descriptor()
294 }
295 }
296
297 #[repr(u32)]
298 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
299 pub enum IommuType {
300 Type1V2 = VFIO_TYPE1v2_IOMMU,
301 PkvmPviommu = VFIO_PKVM_PVIOMMU,
302 // ChromeOS specific vfio_iommu_type1 implementation that is optimized for
303 // small, dynamic mappings. For clients which create large, relatively
304 // static mappings, Type1V2 is still preferred.
305 //
306 // See crrev.com/c/3593528 for the implementation.
307 Type1ChromeOS = 100001,
308 }
309
310 /// VfioContainer contain multi VfioGroup, and delegate an IOMMU domain table
311 pub struct VfioContainer {
312 container: File,
313 groups: HashMap<u32, Arc<Mutex<VfioGroup>>>,
314 iommu_type: Option<IommuType>,
315 }
316
extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T> where T: FromBytes,317 fn extract_vfio_struct<T>(bytes: &[u8], offset: usize) -> Option<T>
318 where
319 T: FromBytes,
320 {
321 bytes.get(offset..).and_then(T::read_from_prefix)
322 }
323
324 const VFIO_API_VERSION: u8 = 0;
325 impl VfioContainer {
new() -> Result<Self>326 pub fn new() -> Result<Self> {
327 let container = OpenOptions::new()
328 .read(true)
329 .write(true)
330 .open("/dev/vfio/vfio")
331 .map_err(VfioError::OpenContainer)?;
332
333 Self::new_from_container(container)
334 }
335
336 // Construct a VfioContainer from an exist container file.
new_from_container(container: File) -> Result<Self>337 pub fn new_from_container(container: File) -> Result<Self> {
338 // SAFETY:
339 // Safe as file is vfio container descriptor and ioctl is defined by kernel.
340 let version = unsafe { ioctl(&container, VFIO_GET_API_VERSION) };
341 if version as u8 != VFIO_API_VERSION {
342 return Err(VfioError::VfioApiVersion);
343 }
344
345 Ok(VfioContainer {
346 container,
347 groups: HashMap::new(),
348 iommu_type: None,
349 })
350 }
351
is_group_set(&self, group_id: u32) -> bool352 fn is_group_set(&self, group_id: u32) -> bool {
353 self.groups.contains_key(&group_id)
354 }
355
check_extension(&self, val: IommuType) -> bool356 fn check_extension(&self, val: IommuType) -> bool {
357 // SAFETY:
358 // Safe as file is vfio container and make sure val is valid.
359 let ret = unsafe { ioctl_with_val(self, VFIO_CHECK_EXTENSION, val as c_ulong) };
360 ret != 0
361 }
362
set_iommu(&mut self, val: IommuType) -> i32363 fn set_iommu(&mut self, val: IommuType) -> i32 {
364 // SAFETY:
365 // Safe as file is vfio container and make sure val is valid.
366 unsafe { ioctl_with_val(self, VFIO_SET_IOMMU, val as c_ulong) }
367 }
368
set_iommu_checked(&mut self, val: IommuType) -> Result<()>369 fn set_iommu_checked(&mut self, val: IommuType) -> Result<()> {
370 if !self.check_extension(val) {
371 Err(VfioError::VfioIommuSupport(val))
372 } else if self.set_iommu(val) != 0 {
373 Err(VfioError::ContainerSetIOMMU(val, get_error()))
374 } else {
375 self.iommu_type = Some(val);
376 Ok(())
377 }
378 }
379
380 /// # Safety
381 ///
382 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>383 pub unsafe fn vfio_dma_map(
384 &self,
385 iova: u64,
386 size: u64,
387 user_addr: u64,
388 write_en: bool,
389 ) -> Result<()> {
390 match self
391 .iommu_type
392 .expect("vfio_dma_map called before configuring IOMMU")
393 {
394 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
395 self.vfio_iommu_type1_dma_map(iova, size, user_addr, write_en)
396 }
397 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
398 }
399 }
400
401 /// # Safety
402 ///
403 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_iommu_type1_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>404 unsafe fn vfio_iommu_type1_dma_map(
405 &self,
406 iova: u64,
407 size: u64,
408 user_addr: u64,
409 write_en: bool,
410 ) -> Result<()> {
411 let mut dma_map = vfio_iommu_type1_dma_map {
412 argsz: mem::size_of::<vfio_iommu_type1_dma_map>() as u32,
413 flags: VFIO_DMA_MAP_FLAG_READ,
414 vaddr: user_addr,
415 iova,
416 size,
417 };
418
419 if write_en {
420 dma_map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
421 }
422
423 let ret = ioctl_with_ref(self, VFIO_IOMMU_MAP_DMA, &dma_map);
424 if ret != 0 {
425 return Err(VfioError::IommuDmaMap(get_error()));
426 }
427
428 Ok(())
429 }
430
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>431 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
432 match self
433 .iommu_type
434 .expect("vfio_dma_unmap called before configuring IOMMU")
435 {
436 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
437 self.vfio_iommu_type1_dma_unmap(iova, size)
438 }
439 IommuType::PkvmPviommu => Err(VfioError::InvalidOperation),
440 }
441 }
442
vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()>443 fn vfio_iommu_type1_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
444 let mut dma_unmap = vfio_iommu_type1_dma_unmap {
445 argsz: mem::size_of::<vfio_iommu_type1_dma_unmap>() as u32,
446 flags: 0,
447 iova,
448 size,
449 ..Default::default()
450 };
451
452 // SAFETY:
453 // Safe as file is vfio container, dma_unmap is constructed by us, and
454 // we check the return value
455 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_UNMAP_DMA, &mut dma_unmap) };
456 if ret != 0 || dma_unmap.size != size {
457 return Err(VfioError::IommuDmaUnmap(get_error()));
458 }
459
460 Ok(())
461 }
462
vfio_get_iommu_page_size_mask(&self) -> Result<u64>463 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
464 match self
465 .iommu_type
466 .expect("vfio_get_iommu_page_size_mask called before configuring IOMMU")
467 {
468 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
469 self.vfio_iommu_type1_get_iommu_page_size_mask()
470 }
471 IommuType::PkvmPviommu => Ok(0),
472 }
473 }
474
vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64>475 fn vfio_iommu_type1_get_iommu_page_size_mask(&self) -> Result<u64> {
476 let mut iommu_info = vfio_iommu_type1_info {
477 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
478 flags: 0,
479 iova_pgsizes: 0,
480 ..Default::default()
481 };
482
483 // SAFETY:
484 // Safe as file is vfio container, iommu_info has valid values,
485 // and we check the return value
486 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info) };
487 if ret != 0 || (iommu_info.flags & VFIO_IOMMU_INFO_PGSIZES) == 0 {
488 return Err(VfioError::IommuGetInfo(get_error()));
489 }
490
491 Ok(iommu_info.iova_pgsizes)
492 }
493
vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>>494 pub fn vfio_iommu_iova_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
495 match self
496 .iommu_type
497 .expect("vfio_iommu_iova_get_iova_ranges called before configuring IOMMU")
498 {
499 IommuType::Type1V2 | IommuType::Type1ChromeOS => {
500 self.vfio_iommu_type1_get_iova_ranges()
501 }
502 IommuType::PkvmPviommu => Ok(Vec::new()),
503 }
504 }
505
vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>>506 fn vfio_iommu_type1_get_iova_ranges(&self) -> Result<Vec<AddressRange>> {
507 // Query the buffer size needed fetch the capabilities.
508 let mut iommu_info_argsz = vfio_iommu_type1_info {
509 argsz: mem::size_of::<vfio_iommu_type1_info>() as u32,
510 flags: 0,
511 iova_pgsizes: 0,
512 ..Default::default()
513 };
514
515 // SAFETY:
516 // Safe as file is vfio container, iommu_info_argsz has valid values,
517 // and we check the return value
518 let ret = unsafe { ioctl_with_mut_ref(self, VFIO_IOMMU_GET_INFO, &mut iommu_info_argsz) };
519 if ret != 0 {
520 return Err(VfioError::IommuGetInfo(get_error()));
521 }
522
523 if (iommu_info_argsz.flags & VFIO_IOMMU_INFO_CAPS) == 0 {
524 return Err(VfioError::IommuGetCapInfo);
525 }
526
527 let mut iommu_info = vec_with_array_field::<vfio_iommu_type1_info, u8>(
528 iommu_info_argsz.argsz as usize - mem::size_of::<vfio_iommu_type1_info>(),
529 );
530 iommu_info[0].argsz = iommu_info_argsz.argsz;
531 let ret =
532 // SAFETY:
533 // Safe as file is vfio container, iommu_info has valid values,
534 // and we check the return value
535 unsafe { ioctl_with_mut_ptr(self, VFIO_IOMMU_GET_INFO, iommu_info.as_mut_ptr()) };
536 if ret != 0 {
537 return Err(VfioError::IommuGetInfo(get_error()));
538 }
539
540 // SAFETY:
541 // Safe because we initialized iommu_info with enough space, u8 has less strict
542 // alignment, and since it will no longer be mutated.
543 let info_bytes = unsafe {
544 std::slice::from_raw_parts(
545 iommu_info.as_ptr() as *const u8,
546 iommu_info_argsz.argsz as usize,
547 )
548 };
549
550 if (iommu_info[0].flags & VFIO_IOMMU_INFO_CAPS) == 0 {
551 return Err(VfioError::IommuGetCapInfo);
552 }
553
554 let mut offset = iommu_info[0].cap_offset as usize;
555 while offset != 0 {
556 let header = extract_vfio_struct::<vfio_info_cap_header>(info_bytes, offset)
557 .ok_or(VfioError::IommuGetCapInfo)?;
558
559 if header.id == VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE as u16 && header.version == 1 {
560 let iova_header =
561 extract_vfio_struct::<vfio_iommu_type1_info_cap_iova_range_header>(
562 info_bytes, offset,
563 )
564 .ok_or(VfioError::IommuGetCapInfo)?;
565 let range_offset = offset + mem::size_of::<vfio_iommu_type1_info_cap_iova_range>();
566 let mut ret = Vec::new();
567 for i in 0..iova_header.nr_iovas {
568 ret.push(
569 extract_vfio_struct::<vfio_iova_range>(
570 info_bytes,
571 range_offset + i as usize * mem::size_of::<vfio_iova_range>(),
572 )
573 .ok_or(VfioError::IommuGetCapInfo)?,
574 );
575 }
576 return Ok(ret
577 .iter()
578 .map(|range| AddressRange {
579 start: range.start,
580 end: range.end,
581 })
582 .collect());
583 }
584 offset = header.next as usize;
585 }
586
587 Err(VfioError::IommuGetCapInfo)
588 }
589
set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()>590 fn set_iommu_from(&mut self, iommu_dev: IommuDevType) -> Result<()> {
591 match iommu_dev {
592 IommuDevType::CoIommu | IommuDevType::VirtioIommu => {
593 // If we expect granular, dynamic mappings, try the ChromeOS Type1ChromeOS first,
594 // then fall back to upstream versions.
595 self.set_iommu_checked(IommuType::Type1ChromeOS)
596 .or_else(|_| self.set_iommu_checked(IommuType::Type1V2))
597 }
598 IommuDevType::NoIommu => self.set_iommu_checked(IommuType::Type1V2),
599 IommuDevType::PkvmPviommu => self.set_iommu_checked(IommuType::PkvmPviommu),
600 }
601 }
602
get_group_with_vm( &mut self, id: u32, vm: &impl Vm, iommu_dev: IommuDevType, ) -> Result<Arc<Mutex<VfioGroup>>>603 fn get_group_with_vm(
604 &mut self,
605 id: u32,
606 vm: &impl Vm,
607 iommu_dev: IommuDevType,
608 ) -> Result<Arc<Mutex<VfioGroup>>> {
609 if let Some(group) = self.groups.get(&id) {
610 return Ok(group.clone());
611 }
612
613 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
614 if self.groups.is_empty() {
615 self.set_iommu_from(iommu_dev)?;
616 // Before the first group is added into container, do once per container
617 // initialization. Both coiommu and virtio-iommu rely on small, dynamic
618 // mappings. However, if an iommu is not enabled, then we map the entirety
619 // of guest memory as a small number of large, static mappings.
620 match iommu_dev {
621 IommuDevType::CoIommu | IommuDevType::PkvmPviommu | IommuDevType::VirtioIommu => {}
622 IommuDevType::NoIommu => {
623 for region in vm.get_memory().regions() {
624 // SAFETY:
625 // Safe because the guest regions are guaranteed not to overlap
626 unsafe {
627 self.vfio_dma_map(
628 region.guest_addr.0,
629 region.size as u64,
630 region.host_addr as u64,
631 true,
632 )
633 }?;
634 }
635 }
636 }
637 }
638
639 let kvm_vfio_file = KVM_VFIO_FILE
640 .get_or_try_init(|| vm.create_device(DeviceKind::Vfio))
641 .map_err(VfioError::CreateVfioKvmDevice)?;
642 group
643 .lock()
644 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Add)?;
645
646 self.groups.insert(id, group.clone());
647
648 Ok(group)
649 }
650
get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>>651 fn get_group(&mut self, id: u32) -> Result<Arc<Mutex<VfioGroup>>> {
652 if let Some(group) = self.groups.get(&id) {
653 return Ok(group.clone());
654 }
655
656 let group = Arc::new(Mutex::new(VfioGroup::new(self, id)?));
657
658 if self.groups.is_empty() {
659 // Before the first group is added into container, do once per
660 // container initialization.
661 self.set_iommu_checked(IommuType::Type1V2)?;
662 }
663
664 self.groups.insert(id, group.clone());
665 Ok(group)
666 }
667
remove_group(&mut self, id: u32, reduce: bool)668 fn remove_group(&mut self, id: u32, reduce: bool) {
669 let mut remove = false;
670
671 if let Some(group) = self.groups.get(&id) {
672 if reduce {
673 group.lock().reduce_device_num();
674 }
675 if group.lock().device_num() == 0 {
676 let kvm_vfio_file = KVM_VFIO_FILE.get().expect("kvm vfio file isn't created");
677 if group
678 .lock()
679 .kvm_device_set_group(kvm_vfio_file, KvmVfioGroupOps::Delete)
680 .is_err()
681 {
682 warn!("failing in remove vfio group from kvm device");
683 }
684 remove = true;
685 }
686 }
687
688 if remove {
689 self.groups.remove(&id);
690 }
691 }
692
clone_as_raw_descriptor(&self) -> Result<RawDescriptor>693 pub fn clone_as_raw_descriptor(&self) -> Result<RawDescriptor> {
694 // SAFETY: this call is safe because it doesn't modify any memory and we
695 // check the return value.
696 let raw_descriptor = unsafe { libc::dup(self.container.as_raw_descriptor()) };
697 if raw_descriptor < 0 {
698 Err(VfioError::ContainerDupError)
699 } else {
700 Ok(raw_descriptor)
701 }
702 }
703
704 // Gets group ids for all groups in the container.
group_ids(&self) -> Vec<&u32>705 pub fn group_ids(&self) -> Vec<&u32> {
706 self.groups.keys().collect()
707 }
708 }
709
710 impl AsRawDescriptor for VfioContainer {
as_raw_descriptor(&self) -> RawDescriptor711 fn as_raw_descriptor(&self) -> RawDescriptor {
712 self.container.as_raw_descriptor()
713 }
714 }
715
716 struct VfioGroup {
717 group: File,
718 device_num: u32,
719 }
720
721 impl VfioGroup {
new(container: &VfioContainer, id: u32) -> Result<Self>722 fn new(container: &VfioContainer, id: u32) -> Result<Self> {
723 let group_path = format!("/dev/vfio/{}", id);
724 let group_file = OpenOptions::new()
725 .read(true)
726 .write(true)
727 .open(Path::new(&group_path))
728 .map_err(|e| VfioError::OpenGroup(e, group_path))?;
729
730 let mut group_status = vfio_group_status {
731 argsz: mem::size_of::<vfio_group_status>() as u32,
732 flags: 0,
733 };
734 let mut ret =
735 // SAFETY:
736 // Safe as we are the owner of group_file and group_status which are valid value.
737 unsafe { ioctl_with_mut_ref(&group_file, VFIO_GROUP_GET_STATUS, &mut group_status) };
738 if ret < 0 {
739 return Err(VfioError::GetGroupStatus(get_error()));
740 }
741
742 if group_status.flags != VFIO_GROUP_FLAGS_VIABLE {
743 return Err(VfioError::GroupViable);
744 }
745
746 let container_raw_descriptor = container.as_raw_descriptor();
747 // SAFETY:
748 // Safe as we are the owner of group_file and container_raw_descriptor which are valid
749 // value, and we verify the ret value
750 ret = unsafe {
751 ioctl_with_ref(
752 &group_file,
753 VFIO_GROUP_SET_CONTAINER,
754 &container_raw_descriptor,
755 )
756 };
757 if ret < 0 {
758 return Err(VfioError::GroupSetContainer(get_error()));
759 }
760
761 Ok(VfioGroup {
762 group: group_file,
763 device_num: 0,
764 })
765 }
766
get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32>767 fn get_group_id<P: AsRef<Path>>(sysfspath: P) -> Result<u32> {
768 let mut uuid_path = PathBuf::new();
769 uuid_path.push(sysfspath);
770 uuid_path.push("iommu_group");
771 let group_path = uuid_path
772 .read_link()
773 .map_err(|e| VfioError::ReadLink(e, uuid_path))?;
774 let group_osstr = group_path.file_name().ok_or(VfioError::InvalidPath)?;
775 let group_str = group_osstr.to_str().ok_or(VfioError::InvalidPath)?;
776 let group_id = group_str
777 .parse::<u32>()
778 .map_err(|_| VfioError::InvalidPath)?;
779
780 Ok(group_id)
781 }
782
kvm_device_set_group( &self, kvm_vfio_file: &SafeDescriptor, ops: KvmVfioGroupOps, ) -> Result<()>783 fn kvm_device_set_group(
784 &self,
785 kvm_vfio_file: &SafeDescriptor,
786 ops: KvmVfioGroupOps,
787 ) -> Result<()> {
788 let group_descriptor = self.as_raw_descriptor();
789 let group_descriptor_ptr = &group_descriptor as *const i32;
790 let vfio_dev_attr = match ops {
791 KvmVfioGroupOps::Add => kvm_sys::kvm_device_attr {
792 flags: 0,
793 group: kvm_sys::KVM_DEV_VFIO_GROUP,
794 attr: kvm_sys::KVM_DEV_VFIO_GROUP_ADD as u64,
795 addr: group_descriptor_ptr as u64,
796 },
797 KvmVfioGroupOps::Delete => kvm_sys::kvm_device_attr {
798 flags: 0,
799 group: kvm_sys::KVM_DEV_VFIO_GROUP,
800 attr: kvm_sys::KVM_DEV_VFIO_GROUP_DEL as u64,
801 addr: group_descriptor_ptr as u64,
802 },
803 };
804
805 // SAFETY:
806 // Safe as we are the owner of vfio_dev_descriptor and vfio_dev_attr which are valid value,
807 // and we verify the return value.
808 if 0 != unsafe {
809 ioctl_with_ref(kvm_vfio_file, kvm_sys::KVM_SET_DEVICE_ATTR, &vfio_dev_attr)
810 } {
811 return Err(VfioError::KvmSetDeviceAttr(get_error()));
812 }
813
814 Ok(())
815 }
816
get_device(&self, name: &str) -> Result<File>817 fn get_device(&self, name: &str) -> Result<File> {
818 let path: CString = CString::new(name.as_bytes()).expect("CString::new() failed");
819 let path_ptr = path.as_ptr();
820
821 // SAFETY:
822 // Safe as we are the owner of self and path_ptr which are valid value.
823 let ret = unsafe { ioctl_with_ptr(self, VFIO_GROUP_GET_DEVICE_FD, path_ptr) };
824 if ret < 0 {
825 return Err(VfioError::GroupGetDeviceFD(get_error()));
826 }
827
828 // SAFETY:
829 // Safe as ret is valid descriptor
830 Ok(unsafe { File::from_raw_descriptor(ret) })
831 }
832
add_device_num(&mut self)833 fn add_device_num(&mut self) {
834 self.device_num += 1;
835 }
836
reduce_device_num(&mut self)837 fn reduce_device_num(&mut self) {
838 self.device_num -= 1;
839 }
840
device_num(&self) -> u32841 fn device_num(&self) -> u32 {
842 self.device_num
843 }
844 }
845
846 impl AsRawDescriptor for VfioGroup {
as_raw_descriptor(&self) -> RawDescriptor847 fn as_raw_descriptor(&self) -> RawDescriptor {
848 self.group.as_raw_descriptor()
849 }
850 }
851
852 /// A helper struct for managing VFIO containers
853 #[derive(Default)]
854 pub struct VfioContainerManager {
855 /// One VFIO container shared by all VFIO devices that don't attach to any IOMMU device.
856 no_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
857
858 /// For IOMMU enabled devices, all VFIO groups that share the same IOVA space are managed by
859 /// one VFIO container.
860 iommu_containers: Vec<Arc<Mutex<VfioContainer>>>,
861
862 /// One VFIO container shared by all VFIO devices that attach to the CoIOMMU device.
863 coiommu_container: Option<Arc<Mutex<VfioContainer>>>,
864
865 /// One VFIO container shared by all VFIO devices that attach to pKVM.
866 pkvm_iommu_container: Option<Arc<Mutex<VfioContainer>>>,
867 }
868
869 impl VfioContainerManager {
new() -> Self870 pub fn new() -> Self {
871 Self::default()
872 }
873
874 /// The single place to create a VFIO container for a PCI endpoint.
875 ///
876 /// The policy to determine whether an individual or a shared VFIO container
877 /// will be created for this device is governed by the physical PCI topology,
878 /// and the argument iommu_type.
879 ///
880 /// # Arguments
881 ///
882 /// * `sysfspath` - the path to the PCI device, e.g. /sys/bus/pci/devices/0000:02:00.0
883 /// * `iommu_type` - which type of IOMMU is enabled on this device
get_container<P: AsRef<Path>>( &mut self, iommu_type: IommuDevType, sysfspath: Option<P>, ) -> Result<Arc<Mutex<VfioContainer>>>884 pub fn get_container<P: AsRef<Path>>(
885 &mut self,
886 iommu_type: IommuDevType,
887 sysfspath: Option<P>,
888 ) -> Result<Arc<Mutex<VfioContainer>>> {
889 match iommu_type {
890 IommuDevType::NoIommu => {
891 // One VFIO container is used for all IOMMU disabled groups.
892 if let Some(container) = &self.no_iommu_container {
893 Ok(container.clone())
894 } else {
895 let container = Arc::new(Mutex::new(VfioContainer::new()?));
896 self.no_iommu_container = Some(container.clone());
897 Ok(container)
898 }
899 }
900 IommuDevType::VirtioIommu => {
901 let path = sysfspath.ok_or(VfioError::InvalidPath)?;
902 let group_id = VfioGroup::get_group_id(path)?;
903
904 // One VFIO container is used for all devices that belong to one VFIO group.
905 // NOTE: vfio_wrapper relies on each container containing exactly one group.
906 if let Some(container) = self
907 .iommu_containers
908 .iter()
909 .find(|container| container.lock().is_group_set(group_id))
910 {
911 Ok(container.clone())
912 } else {
913 let container = Arc::new(Mutex::new(VfioContainer::new()?));
914 self.iommu_containers.push(container.clone());
915 Ok(container)
916 }
917 }
918 IommuDevType::CoIommu => {
919 // One VFIO container is used for devices attached to CoIommu
920 if let Some(container) = &self.coiommu_container {
921 Ok(container.clone())
922 } else {
923 let container = Arc::new(Mutex::new(VfioContainer::new()?));
924 self.coiommu_container = Some(container.clone());
925 Ok(container)
926 }
927 }
928 IommuDevType::PkvmPviommu => {
929 // One VFIO container is used for devices attached to pKVM
930 if let Some(container) = &self.pkvm_iommu_container {
931 Ok(container.clone())
932 } else {
933 let container = Arc::new(Mutex::new(VfioContainer::new()?));
934 self.pkvm_iommu_container = Some(container.clone());
935 Ok(container)
936 }
937 }
938 }
939 }
940 }
941
942 /// Vfio Irq type used to enable/disable/mask/unmask vfio irq
943 pub enum VfioIrqType {
944 Intx,
945 Msi,
946 Msix,
947 }
948
949 /// Vfio Irq information used to assign and enable/disable/mask/unmask vfio irq
950 pub struct VfioIrq {
951 pub flags: u32,
952 pub index: u32,
953 }
954
955 /// Address on VFIO memory region.
956 #[derive(Debug, Default, Clone)]
957 pub struct VfioRegionAddr {
958 /// region number.
959 pub index: usize,
960 /// offset in the region.
961 pub addr: u64,
962 }
963
964 #[derive(Debug)]
965 pub struct VfioRegion {
966 // flags for this region: read/write/mmap
967 flags: u32,
968 size: u64,
969 // region offset used to read/write with vfio device descriptor
970 offset: u64,
971 // vectors for mmap offset and size
972 mmaps: Vec<vfio_region_sparse_mmap_area>,
973 // type and subtype for cap type
974 cap_info: Option<(u32, u32)>,
975 }
976
977 /// Vfio device for exposing regions which could be read/write to kernel vfio device.
978 pub struct VfioDevice {
979 dev: File,
980 name: String,
981 container: Arc<Mutex<VfioContainer>>,
982 dev_type: VfioDeviceType,
983 group_descriptor: RawDescriptor,
984 group_id: u32,
985 // vec for vfio device's regions
986 regions: Vec<VfioRegion>,
987 num_irqs: u32,
988
989 iova_alloc: Arc<Mutex<AddressAllocator>>,
990 dt_symbol: Option<String>,
991 pviommu: Option<(Arc<Mutex<KvmVfioPviommu>>, Vec<u32>)>,
992 }
993
994 impl VfioDevice {
995 /// Create a new vfio device, then guest read/write on this device could be
996 /// transfered into kernel vfio.
997 /// sysfspath specify the vfio device path in sys file system.
new_passthrough<P: AsRef<Path>>( sysfspath: &P, vm: &impl Vm, container: Arc<Mutex<VfioContainer>>, iommu_dev: IommuDevType, dt_symbol: Option<String>, ) -> Result<Self>998 pub fn new_passthrough<P: AsRef<Path>>(
999 sysfspath: &P,
1000 vm: &impl Vm,
1001 container: Arc<Mutex<VfioContainer>>,
1002 iommu_dev: IommuDevType,
1003 dt_symbol: Option<String>,
1004 ) -> Result<Self> {
1005 let group_id = VfioGroup::get_group_id(sysfspath)?;
1006
1007 let group = container
1008 .lock()
1009 .get_group_with_vm(group_id, vm, iommu_dev)?;
1010 let name_osstr = sysfspath
1011 .as_ref()
1012 .file_name()
1013 .ok_or(VfioError::InvalidPath)?;
1014 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1015 let name = String::from(name_str);
1016 let dev = group.lock().get_device(&name)?;
1017 let (dev_info, dev_type) = Self::get_device_info(&dev)?;
1018 let regions = Self::get_regions(&dev, dev_info.num_regions)?;
1019 group.lock().add_device_num();
1020 let group_descriptor = group.lock().as_raw_descriptor();
1021
1022 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1023 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1024 .map_err(VfioError::Resources)?;
1025
1026 let pviommu = if matches!(iommu_dev, IommuDevType::PkvmPviommu) {
1027 // We currently have a 1-to-1 mapping between pvIOMMUs and VFIO devices.
1028 let pviommu = KvmVfioPviommu::new(vm)?;
1029
1030 let vsids_len = KvmVfioPviommu::get_sid_count(vm, &dev)?.try_into().unwrap();
1031 let max_vsid = u32::MAX.try_into().unwrap();
1032 let random_vsids = sample(&mut thread_rng(), max_vsid, vsids_len).into_iter();
1033 let vsids = Vec::from_iter(random_vsids.map(|v| u32::try_from(v).unwrap()));
1034 for (i, vsid) in vsids.iter().enumerate() {
1035 pviommu.attach(&dev, i.try_into().unwrap(), *vsid)?;
1036 }
1037
1038 Some((Arc::new(Mutex::new(pviommu)), vsids))
1039 } else {
1040 None
1041 };
1042
1043 Ok(VfioDevice {
1044 dev,
1045 name,
1046 container,
1047 dev_type,
1048 group_descriptor,
1049 group_id,
1050 regions,
1051 num_irqs: dev_info.num_irqs,
1052 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1053 dt_symbol,
1054 pviommu,
1055 })
1056 }
1057
new<P: AsRef<Path>>( sysfspath: &P, container: Arc<Mutex<VfioContainer>>, ) -> Result<Self>1058 pub fn new<P: AsRef<Path>>(
1059 sysfspath: &P,
1060 container: Arc<Mutex<VfioContainer>>,
1061 ) -> Result<Self> {
1062 let group_id = VfioGroup::get_group_id(sysfspath)?;
1063 let group = container.lock().get_group(group_id)?;
1064 let name_osstr = sysfspath
1065 .as_ref()
1066 .file_name()
1067 .ok_or(VfioError::InvalidPath)?;
1068 let name_str = name_osstr.to_str().ok_or(VfioError::InvalidPath)?;
1069 let name = String::from(name_str);
1070
1071 let dev = match group.lock().get_device(&name) {
1072 Ok(dev) => dev,
1073 Err(e) => {
1074 container.lock().remove_group(group_id, false);
1075 return Err(e);
1076 }
1077 };
1078 let (dev_info, dev_type) = match Self::get_device_info(&dev) {
1079 Ok(dev_info) => dev_info,
1080 Err(e) => {
1081 container.lock().remove_group(group_id, false);
1082 return Err(e);
1083 }
1084 };
1085 let regions = match Self::get_regions(&dev, dev_info.num_regions) {
1086 Ok(regions) => regions,
1087 Err(e) => {
1088 container.lock().remove_group(group_id, false);
1089 return Err(e);
1090 }
1091 };
1092 group.lock().add_device_num();
1093 let group_descriptor = group.lock().as_raw_descriptor();
1094
1095 let iova_ranges = container.lock().vfio_iommu_iova_get_iova_ranges()?;
1096 let iova_alloc = AddressAllocator::new_from_list(iova_ranges, None, None)
1097 .map_err(VfioError::Resources)?;
1098
1099 Ok(VfioDevice {
1100 dev,
1101 name,
1102 container,
1103 dev_type,
1104 group_descriptor,
1105 group_id,
1106 regions,
1107 num_irqs: dev_info.num_irqs,
1108 iova_alloc: Arc::new(Mutex::new(iova_alloc)),
1109 dt_symbol: None,
1110 pviommu: None,
1111 })
1112 }
1113
1114 /// Returns the file for this device.
dev_file(&self) -> &File1115 pub fn dev_file(&self) -> &File {
1116 &self.dev
1117 }
1118
1119 /// Returns PCI device name, formatted as BUS:DEVICE.FUNCTION string.
device_name(&self) -> &String1120 pub fn device_name(&self) -> &String {
1121 &self.name
1122 }
1123
1124 /// Returns the type of this VFIO device.
device_type(&self) -> VfioDeviceType1125 pub fn device_type(&self) -> VfioDeviceType {
1126 self.dev_type
1127 }
1128
1129 /// Returns the DT symbol (node label) of this VFIO device.
dt_symbol(&self) -> Option<&str>1130 pub fn dt_symbol(&self) -> Option<&str> {
1131 self.dt_symbol.as_deref()
1132 }
1133
1134 /// Returns the type and indentifier (if applicable) of the IOMMU used by this VFIO device and
1135 /// its master IDs.
iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])>1136 pub fn iommu(&self) -> Option<(IommuDevType, Option<u32>, &[u32])> {
1137 // We currently only report IommuDevType::PkvmPviommu.
1138 if let Some((ref pviommu, ref ids)) = self.pviommu {
1139 Some((
1140 IommuDevType::PkvmPviommu,
1141 Some(pviommu.lock().id()),
1142 ids.as_ref(),
1143 ))
1144 } else {
1145 None
1146 }
1147 }
1148
1149 /// enter the device's low power state
pm_low_power_enter(&self) -> Result<()>1150 pub fn pm_low_power_enter(&self) -> Result<()> {
1151 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1152 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1153 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY;
1154 // SAFETY:
1155 // Safe as we are the owner of self and power_management which are valid value
1156 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1157 if ret < 0 {
1158 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1159 } else {
1160 Ok(())
1161 }
1162 }
1163
1164 /// enter the device's low power state with wakeup notification
pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()>1165 pub fn pm_low_power_enter_with_wakeup(&self, wakeup_evt: Event) -> Result<()> {
1166 let payload = vfio_device_low_power_entry_with_wakeup {
1167 wakeup_eventfd: wakeup_evt.as_raw_descriptor(),
1168 reserved: 0,
1169 };
1170 let payload_size = mem::size_of::<vfio_device_low_power_entry_with_wakeup>();
1171 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(payload_size);
1172 device_feature[0].argsz = (mem::size_of::<vfio_device_feature>() + payload_size) as u32;
1173 device_feature[0].flags =
1174 VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP;
1175 // SAFETY:
1176 // Safe as we know vfio_device_low_power_entry_with_wakeup has two 32-bit int fields
1177 unsafe {
1178 device_feature[0]
1179 .data
1180 .as_mut_slice(payload_size)
1181 .copy_from_slice(
1182 mem::transmute::<vfio_device_low_power_entry_with_wakeup, [u8; 8]>(payload)
1183 .as_slice(),
1184 );
1185 }
1186 // SAFETY:
1187 // Safe as we are the owner of self and power_management which are valid value
1188 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1189 if ret < 0 {
1190 Err(VfioError::VfioPmLowPowerEnter(get_error()))
1191 } else {
1192 Ok(())
1193 }
1194 }
1195
1196 /// exit the device's low power state
pm_low_power_exit(&self) -> Result<()>1197 pub fn pm_low_power_exit(&self) -> Result<()> {
1198 let mut device_feature = vec_with_array_field::<vfio_device_feature, u8>(0);
1199 device_feature[0].argsz = mem::size_of::<vfio_device_feature>() as u32;
1200 device_feature[0].flags = VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_LOW_POWER_EXIT;
1201 // SAFETY:
1202 // Safe as we are the owner of self and power_management which are valid value
1203 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_FEATURE, &device_feature[0]) };
1204 if ret < 0 {
1205 Err(VfioError::VfioPmLowPowerExit(get_error()))
1206 } else {
1207 Ok(())
1208 }
1209 }
1210
1211 /// call _DSM from the device's ACPI table
acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>>1212 pub fn acpi_dsm(&self, args: &[u8]) -> Result<Vec<u8>> {
1213 let count = args.len();
1214 let mut dsm = vec_with_array_field::<vfio_acpi_dsm, u8>(count);
1215 dsm[0].argsz = (mem::size_of::<vfio_acpi_dsm>() + mem::size_of_val(args)) as u32;
1216 dsm[0].padding = 0;
1217 // SAFETY:
1218 // Safe as we allocated enough space to hold args
1219 unsafe {
1220 dsm[0].args.as_mut_slice(count).clone_from_slice(args);
1221 }
1222 // SAFETY:
1223 // Safe as we are the owner of self and dsm which are valid value
1224 let ret = unsafe { ioctl_with_mut_ref(&self.dev, VFIO_DEVICE_ACPI_DSM, &mut dsm[0]) };
1225 if ret < 0 {
1226 Err(VfioError::VfioAcpiDsm(get_error()))
1227 } else {
1228 // SAFETY:
1229 // Safe as we allocated enough space to hold args
1230 let res = unsafe { dsm[0].args.as_slice(count) };
1231 Ok(res.to_vec())
1232 }
1233 }
1234
1235 /// Enable vfio device's ACPI notifications and associate EventFD with device.
acpi_notification_evt_enable( &self, acpi_notification_eventfd: &Event, index: u32, ) -> Result<()>1236 pub fn acpi_notification_evt_enable(
1237 &self,
1238 acpi_notification_eventfd: &Event,
1239 index: u32,
1240 ) -> Result<()> {
1241 let u32_size = mem::size_of::<u32>();
1242 let count = 1;
1243
1244 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1245 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1246 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1247 irq_set[0].index = index;
1248 irq_set[0].start = 0;
1249 irq_set[0].count = count as u32;
1250
1251 // SAFETY:
1252 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1253 let data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1254 data.copy_from_slice(&acpi_notification_eventfd.as_raw_descriptor().to_ne_bytes()[..]);
1255
1256 // SAFETY:
1257 // Safe as we are the owner of self and irq_set which are valid value
1258 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1259 if ret < 0 {
1260 Err(VfioError::VfioAcpiNotificationEnable(get_error()))
1261 } else {
1262 Ok(())
1263 }
1264 }
1265
1266 /// Disable vfio device's ACPI notification and disconnect EventFd with device.
acpi_notification_disable(&self, index: u32) -> Result<()>1267 pub fn acpi_notification_disable(&self, index: u32) -> Result<()> {
1268 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1269 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1270 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1271 irq_set[0].index = index;
1272 irq_set[0].start = 0;
1273 irq_set[0].count = 0;
1274
1275 // SAFETY:
1276 // Safe as we are the owner of self and irq_set which are valid value
1277 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1278 if ret < 0 {
1279 Err(VfioError::VfioAcpiNotificationDisable(get_error()))
1280 } else {
1281 Ok(())
1282 }
1283 }
1284
1285 /// Test vfio device's ACPI notification by simulating hardware triggering.
1286 /// When the signaling mechanism is set, the VFIO_IRQ_SET_DATA_BOOL can be used with
1287 /// VFIO_IRQ_SET_ACTION_TRIGGER to perform kernel level interrupt loopback testing.
acpi_notification_test(&self, index: u32, val: u32) -> Result<()>1288 pub fn acpi_notification_test(&self, index: u32, val: u32) -> Result<()> {
1289 let u32_size = mem::size_of::<u32>();
1290 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1291 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + u32_size) as u32;
1292 irq_set[0].flags = VFIO_IRQ_SET_DATA_BOOL | VFIO_IRQ_SET_ACTION_TRIGGER;
1293 irq_set[0].index = index;
1294 irq_set[0].start = 0;
1295 irq_set[0].count = 1;
1296
1297 // SAFETY:
1298 // It is safe as enough space is reserved through vec_with_array_field(u32)<count>.
1299 let data = unsafe { irq_set[0].data.as_mut_slice(u32_size) };
1300 data.copy_from_slice(&val.to_ne_bytes()[..]);
1301
1302 // SAFETY:
1303 // Safe as we are the owner of self and irq_set which are valid value
1304 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1305 if ret < 0 {
1306 Err(VfioError::VfioAcpiNotificationTest(get_error()))
1307 } else {
1308 Ok(())
1309 }
1310 }
1311
1312 /// Enable vfio device's irq and associate Irqfd Event with device.
1313 /// When MSIx is enabled, multi vectors will be supported, and vectors starting from subindex to
1314 /// subindex + descriptors length will be assigned with irqfd in the descriptors array.
1315 /// when index = VFIO_PCI_REQ_IRQ_INDEX, kernel vfio will trigger this event when physical
1316 /// device is removed.
1317 /// If descriptor is None, -1 is assigned to the irq. A value of -1 is used to either de-assign
1318 /// interrupts if already assigned or skip un-assigned interrupts.
irq_enable( &self, descriptors: &[Option<&Event>], index: u32, subindex: u32, ) -> Result<()>1319 pub fn irq_enable(
1320 &self,
1321 descriptors: &[Option<&Event>],
1322 index: u32,
1323 subindex: u32,
1324 ) -> Result<()> {
1325 let count = descriptors.len();
1326 let u32_size = mem::size_of::<u32>();
1327 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(count);
1328 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + count * u32_size) as u32;
1329 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
1330 irq_set[0].index = index;
1331 irq_set[0].start = subindex;
1332 irq_set[0].count = count as u32;
1333
1334 // SAFETY:
1335 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data
1336 // is u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1337 // together as u32. It is safe as enough space is reserved through
1338 // vec_with_array_field(u32)<count>.
1339 let mut data = unsafe { irq_set[0].data.as_mut_slice(count * u32_size) };
1340 for descriptor in descriptors.iter().take(count) {
1341 let (left, right) = data.split_at_mut(u32_size);
1342 match descriptor {
1343 Some(fd) => left.copy_from_slice(&fd.as_raw_descriptor().to_ne_bytes()[..]),
1344 None => left.copy_from_slice(&(-1i32).to_ne_bytes()[..]),
1345 }
1346 data = right;
1347 }
1348
1349 // SAFETY:
1350 // Safe as we are the owner of self and irq_set which are valid value
1351 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1352 if ret < 0 {
1353 Err(VfioError::VfioIrqEnable(get_error()))
1354 } else {
1355 Ok(())
1356 }
1357 }
1358
1359 /// When intx is enabled, irqfd is used to trigger a level interrupt into guest, resample irqfd
1360 /// is used to get guest EOI notification.
1361 /// When host hw generates interrupt, vfio irq handler in host kernel receive and handle it,
1362 /// this handler disable hw irq first, then trigger irqfd to inject interrupt into guest. When
1363 /// resample irqfd is triggered by guest EOI, vfio kernel could enable hw irq, so hw could
1364 /// generate another interrupts.
1365 /// This function enable resample irqfd and let vfio kernel could get EOI notification.
1366 ///
1367 /// descriptor: should be resample IrqFd.
resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()>1368 pub fn resample_virq_enable(&self, descriptor: &Event, index: u32) -> Result<()> {
1369 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(1);
1370 irq_set[0].argsz = (mem::size_of::<vfio_irq_set>() + mem::size_of::<u32>()) as u32;
1371 irq_set[0].flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_UNMASK;
1372 irq_set[0].index = index;
1373 irq_set[0].start = 0;
1374 irq_set[0].count = 1;
1375
1376 {
1377 // SAFETY:
1378 // irq_set.data could be none, bool or descriptor according to flags, so irq_set.data is
1379 // u8 default, here irq_set.data is descriptor as u32, so 4 default u8 are combined
1380 // together as u32. It is safe as enough space is reserved through
1381 // vec_with_array_field(u32)<1>.
1382 let descriptors = unsafe { irq_set[0].data.as_mut_slice(4) };
1383 descriptors.copy_from_slice(&descriptor.as_raw_descriptor().to_le_bytes()[..]);
1384 }
1385
1386 // SAFETY:
1387 // Safe as we are the owner of self and irq_set which are valid value
1388 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1389 if ret < 0 {
1390 Err(VfioError::VfioIrqEnable(get_error()))
1391 } else {
1392 Ok(())
1393 }
1394 }
1395
1396 /// disable vfio device's irq and disconnect Irqfd Event with device
irq_disable(&self, index: u32) -> Result<()>1397 pub fn irq_disable(&self, index: u32) -> Result<()> {
1398 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1399 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1400 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
1401 irq_set[0].index = index;
1402 irq_set[0].start = 0;
1403 irq_set[0].count = 0;
1404
1405 // SAFETY:
1406 // Safe as we are the owner of self and irq_set which are valid value
1407 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1408 if ret < 0 {
1409 Err(VfioError::VfioIrqDisable(get_error()))
1410 } else {
1411 Ok(())
1412 }
1413 }
1414
1415 /// Unmask vfio device irq
irq_unmask(&self, index: u32) -> Result<()>1416 pub fn irq_unmask(&self, index: u32) -> Result<()> {
1417 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1418 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1419 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
1420 irq_set[0].index = index;
1421 irq_set[0].start = 0;
1422 irq_set[0].count = 1;
1423
1424 // SAFETY:
1425 // Safe as we are the owner of self and irq_set which are valid value
1426 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1427 if ret < 0 {
1428 Err(VfioError::VfioIrqUnmask(get_error()))
1429 } else {
1430 Ok(())
1431 }
1432 }
1433
1434 /// Mask vfio device irq
irq_mask(&self, index: u32) -> Result<()>1435 pub fn irq_mask(&self, index: u32) -> Result<()> {
1436 let mut irq_set = vec_with_array_field::<vfio_irq_set, u32>(0);
1437 irq_set[0].argsz = mem::size_of::<vfio_irq_set>() as u32;
1438 irq_set[0].flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
1439 irq_set[0].index = index;
1440 irq_set[0].start = 0;
1441 irq_set[0].count = 1;
1442
1443 // SAFETY:
1444 // Safe as we are the owner of self and irq_set which are valid value
1445 let ret = unsafe { ioctl_with_ref(&self.dev, VFIO_DEVICE_SET_IRQS, &irq_set[0]) };
1446 if ret < 0 {
1447 Err(VfioError::VfioIrqMask(get_error()))
1448 } else {
1449 Ok(())
1450 }
1451 }
1452
1453 /// Get and validate VFIO device information.
get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)>1454 fn get_device_info(device_file: &File) -> Result<(vfio_device_info, VfioDeviceType)> {
1455 let mut dev_info = vfio_device_info {
1456 argsz: mem::size_of::<vfio_device_info>() as u32,
1457 flags: 0,
1458 num_regions: 0,
1459 num_irqs: 0,
1460 ..Default::default()
1461 };
1462
1463 // SAFETY:
1464 // Safe as we are the owner of device_file and dev_info which are valid value,
1465 // and we verify the return value.
1466 let ret = unsafe { ioctl_with_mut_ref(device_file, VFIO_DEVICE_GET_INFO, &mut dev_info) };
1467 if ret < 0 {
1468 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1469 }
1470
1471 let dev_type = if (dev_info.flags & VFIO_DEVICE_FLAGS_PCI) != 0 {
1472 if dev_info.num_regions < VFIO_PCI_CONFIG_REGION_INDEX + 1
1473 || dev_info.num_irqs < VFIO_PCI_MSIX_IRQ_INDEX + 1
1474 {
1475 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1476 }
1477
1478 VfioDeviceType::Pci
1479 } else if (dev_info.flags & VFIO_DEVICE_FLAGS_PLATFORM) != 0 {
1480 VfioDeviceType::Platform
1481 } else {
1482 return Err(VfioError::UnknownDeviceType(dev_info.flags));
1483 };
1484
1485 Ok((dev_info, dev_type))
1486 }
1487
1488 /// Query interrupt information
1489 /// return: Vector of interrupts information, each of which contains flags and index
get_irqs(&self) -> Result<Vec<VfioIrq>>1490 pub fn get_irqs(&self) -> Result<Vec<VfioIrq>> {
1491 let mut irqs: Vec<VfioIrq> = Vec::new();
1492
1493 for i in 0..self.num_irqs {
1494 let argsz = mem::size_of::<vfio_irq_info>() as u32;
1495 let mut irq_info = vfio_irq_info {
1496 argsz,
1497 flags: 0,
1498 index: i,
1499 count: 0,
1500 };
1501 // SAFETY:
1502 // Safe as we are the owner of dev and irq_info which are valid value,
1503 // and we verify the return value.
1504 let ret = unsafe {
1505 ioctl_with_mut_ref(self.device_file(), VFIO_DEVICE_GET_IRQ_INFO, &mut irq_info)
1506 };
1507 if ret < 0 || irq_info.count != 1 {
1508 return Err(VfioError::VfioDeviceGetInfo(get_error()));
1509 }
1510
1511 let irq = VfioIrq {
1512 flags: irq_info.flags,
1513 index: irq_info.index,
1514 };
1515 irqs.push(irq);
1516 }
1517 Ok(irqs)
1518 }
1519
1520 #[allow(clippy::cast_ptr_alignment)]
get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>>1521 fn get_regions(dev: &File, num_regions: u32) -> Result<Vec<VfioRegion>> {
1522 let mut regions: Vec<VfioRegion> = Vec::new();
1523 for i in 0..num_regions {
1524 let argsz = mem::size_of::<vfio_region_info>() as u32;
1525 let mut reg_info = vfio_region_info {
1526 argsz,
1527 flags: 0,
1528 index: i,
1529 cap_offset: 0,
1530 size: 0,
1531 offset: 0,
1532 };
1533 let ret =
1534 // SAFETY:
1535 // Safe as we are the owner of dev and reg_info which are valid value,
1536 // and we verify the return value.
1537 unsafe { ioctl_with_mut_ref(dev, VFIO_DEVICE_GET_REGION_INFO, &mut reg_info) };
1538 if ret < 0 {
1539 continue;
1540 }
1541
1542 let mut mmaps: Vec<vfio_region_sparse_mmap_area> = Vec::new();
1543 let mut cap_info: Option<(u32, u32)> = None;
1544 if reg_info.argsz > argsz {
1545 let cap_len: usize = (reg_info.argsz - argsz) as usize;
1546 let mut region_with_cap =
1547 vec_with_array_field::<vfio_region_info_with_cap, u8>(cap_len);
1548 region_with_cap[0].region_info.argsz = reg_info.argsz;
1549 region_with_cap[0].region_info.flags = 0;
1550 region_with_cap[0].region_info.index = i;
1551 region_with_cap[0].region_info.cap_offset = 0;
1552 region_with_cap[0].region_info.size = 0;
1553 region_with_cap[0].region_info.offset = 0;
1554 // SAFETY:
1555 // Safe as we are the owner of dev and region_info which are valid value,
1556 // and we verify the return value.
1557 let ret = unsafe {
1558 ioctl_with_mut_ref(
1559 dev,
1560 VFIO_DEVICE_GET_REGION_INFO,
1561 &mut (region_with_cap[0].region_info),
1562 )
1563 };
1564 if ret < 0 {
1565 return Err(VfioError::VfioDeviceGetRegionInfo(get_error()));
1566 }
1567
1568 if region_with_cap[0].region_info.flags & VFIO_REGION_INFO_FLAG_CAPS == 0 {
1569 continue;
1570 }
1571
1572 let cap_header_sz = mem::size_of::<vfio_info_cap_header>() as u32;
1573 let mmap_cap_sz = mem::size_of::<vfio_region_info_cap_sparse_mmap>() as u32;
1574 let mmap_area_sz = mem::size_of::<vfio_region_sparse_mmap_area>() as u32;
1575 let type_cap_sz = mem::size_of::<vfio_region_info_cap_type>() as u32;
1576 let region_info_sz = reg_info.argsz;
1577
1578 // region_with_cap[0].cap_info may contain many structures, like
1579 // vfio_region_info_cap_sparse_mmap struct or vfio_region_info_cap_type struct.
1580 // Both of them begin with vfio_info_cap_header, so we will get individual cap from
1581 // vfio_into_cap_header.
1582 // Go through all the cap structs.
1583 let info_ptr = region_with_cap.as_ptr() as *mut u8;
1584 let mut offset = region_with_cap[0].region_info.cap_offset;
1585 while offset != 0 {
1586 if offset + cap_header_sz > region_info_sz {
1587 break;
1588 }
1589 // SAFETY:
1590 // Safe, as cap_header struct is in this function allocated region_with_cap
1591 // vec.
1592 let cap_ptr = unsafe { info_ptr.offset(offset as isize) };
1593 // SAFETY:
1594 // Safe, as cap_header struct is in this function allocated region_with_cap
1595 // vec.
1596 let cap_header = unsafe { &*(cap_ptr as *const vfio_info_cap_header) };
1597 if cap_header.id as u32 == VFIO_REGION_INFO_CAP_SPARSE_MMAP {
1598 if offset + mmap_cap_sz > region_info_sz {
1599 break;
1600 }
1601 // cap_ptr is vfio_region_info_cap_sparse_mmap here
1602 let sparse_mmap =
1603 // SAFETY:
1604 // Safe, this vfio_region_info_cap_sparse_mmap is in this function
1605 // allocated region_with_cap vec.
1606 unsafe { &*(cap_ptr as *const vfio_region_info_cap_sparse_mmap) };
1607
1608 let area_num = sparse_mmap.nr_areas;
1609 if offset + mmap_cap_sz + area_num * mmap_area_sz > region_info_sz {
1610 break;
1611 }
1612 let areas =
1613 // SAFETY:
1614 // Safe, these vfio_region_sparse_mmap_area are in this function allocated
1615 // region_with_cap vec.
1616 unsafe { sparse_mmap.areas.as_slice(sparse_mmap.nr_areas as usize) };
1617 for area in areas.iter() {
1618 mmaps.push(*area);
1619 }
1620 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_TYPE {
1621 if offset + type_cap_sz > region_info_sz {
1622 break;
1623 }
1624 // cap_ptr is vfio_region_info_cap_type here
1625 let cap_type_info =
1626 // SAFETY:
1627 // Safe, this vfio_region_info_cap_type is in this function allocated
1628 // region_with_cap vec
1629 unsafe { &*(cap_ptr as *const vfio_region_info_cap_type) };
1630
1631 cap_info = Some((cap_type_info.type_, cap_type_info.subtype));
1632 } else if cap_header.id as u32 == VFIO_REGION_INFO_CAP_MSIX_MAPPABLE {
1633 mmaps.push(vfio_region_sparse_mmap_area {
1634 offset: 0,
1635 size: region_with_cap[0].region_info.size,
1636 });
1637 }
1638
1639 offset = cap_header.next;
1640 }
1641 } else if reg_info.flags & VFIO_REGION_INFO_FLAG_MMAP != 0 {
1642 mmaps.push(vfio_region_sparse_mmap_area {
1643 offset: 0,
1644 size: reg_info.size,
1645 });
1646 }
1647
1648 let region = VfioRegion {
1649 flags: reg_info.flags,
1650 size: reg_info.size,
1651 offset: reg_info.offset,
1652 mmaps,
1653 cap_info,
1654 };
1655 regions.push(region);
1656 }
1657
1658 Ok(regions)
1659 }
1660
1661 /// get a region's flag
1662 /// the return's value may conatin:
1663 /// VFIO_REGION_INFO_FLAG_READ: region supports read
1664 /// VFIO_REGION_INFO_FLAG_WRITE: region supports write
1665 /// VFIO_REGION_INFO_FLAG_MMAP: region supports mmap
1666 /// VFIO_REGION_INFO_FLAG_CAPS: region's info supports caps
get_region_flags(&self, index: usize) -> u321667 pub fn get_region_flags(&self, index: usize) -> u32 {
1668 match self.regions.get(index) {
1669 Some(v) => v.flags,
1670 None => {
1671 warn!("get_region_flags() with invalid index: {}", index);
1672 0
1673 }
1674 }
1675 }
1676
1677 /// get a region's offset
1678 /// return: Region offset from the start of vfio device descriptor
get_region_offset(&self, index: usize) -> u641679 pub fn get_region_offset(&self, index: usize) -> u64 {
1680 match self.regions.get(index) {
1681 Some(v) => v.offset,
1682 None => {
1683 warn!("get_region_offset with invalid index: {}", index);
1684 0
1685 }
1686 }
1687 }
1688
1689 /// get a region's size
1690 /// return: Region size from the start of vfio device descriptor
get_region_size(&self, index: usize) -> u641691 pub fn get_region_size(&self, index: usize) -> u64 {
1692 match self.regions.get(index) {
1693 Some(v) => v.size,
1694 None => {
1695 warn!("get_region_size with invalid index: {}", index);
1696 0
1697 }
1698 }
1699 }
1700
1701 /// get a number of regions
1702 /// return: Number of regions of vfio device descriptor
get_region_count(&self) -> usize1703 pub fn get_region_count(&self) -> usize {
1704 self.regions.len()
1705 }
1706
1707 /// get a region's mmap info vector
get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area>1708 pub fn get_region_mmap(&self, index: usize) -> Vec<vfio_region_sparse_mmap_area> {
1709 match self.regions.get(index) {
1710 Some(v) => v.mmaps.clone(),
1711 None => {
1712 warn!("get_region_mmap with invalid index: {}", index);
1713 Vec::new()
1714 }
1715 }
1716 }
1717
1718 /// find the specified cap type in device regions
1719 /// Input:
1720 /// type_: cap type
1721 /// sub_type: cap sub_type
1722 /// Output:
1723 /// None: device doesn't have the specified cap type
1724 /// Some((bar_index, region_size)): device has the specified cap type, return region's
1725 /// index and size
get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)>1726 pub fn get_cap_type_info(&self, type_: u32, sub_type: u32) -> Option<(u32, u64)> {
1727 for (index, region) in self.regions.iter().enumerate() {
1728 if let Some(cap_info) = ®ion.cap_info {
1729 if cap_info.0 == type_ && cap_info.1 == sub_type {
1730 return Some((index as u32, region.size));
1731 }
1732 }
1733 }
1734
1735 None
1736 }
1737
1738 /// Returns file offset corresponding to the given `VfioRegionAddr`.
1739 /// The offset can be used when reading/writing the VFIO device's FD directly.
get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64>1740 pub fn get_offset_for_addr(&self, addr: &VfioRegionAddr) -> Result<u64> {
1741 let region = self
1742 .regions
1743 .get(addr.index)
1744 .ok_or(VfioError::InvalidIndex(addr.index))?;
1745 Ok(region.offset + addr.addr)
1746 }
1747
1748 /// Read region's data from VFIO device into buf
1749 /// index: region num
1750 /// buf: data destination and buf length is read size
1751 /// addr: offset in the region
region_read(&self, index: usize, buf: &mut [u8], addr: u64)1752 pub fn region_read(&self, index: usize, buf: &mut [u8], addr: u64) {
1753 let stub: &VfioRegion = self
1754 .regions
1755 .get(index)
1756 .unwrap_or_else(|| panic!("tried to read VFIO with an invalid index: {}", index));
1757
1758 let size = buf.len() as u64;
1759 if size > stub.size || addr + size > stub.size {
1760 panic!(
1761 "tried to read VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1762 index, addr, size
1763 );
1764 }
1765
1766 self.dev
1767 .read_exact_at(buf, stub.offset + addr)
1768 .unwrap_or_else(|e| {
1769 panic!(
1770 "failed to read region: index={}, addr=0x{:x}, error={}",
1771 index, addr, e
1772 )
1773 });
1774 }
1775
1776 /// Reads a value from the specified `VfioRegionAddr.addr` + `offset`.
region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T1777 pub fn region_read_from_addr<T: FromBytes>(&self, addr: &VfioRegionAddr, offset: u64) -> T {
1778 let mut val = mem::MaybeUninit::zeroed();
1779 let buf =
1780 // SAFETY:
1781 // Safe because we have zero-initialized `size_of::<T>()` bytes.
1782 unsafe { slice::from_raw_parts_mut(val.as_mut_ptr() as *mut u8, mem::size_of::<T>()) };
1783 self.region_read(addr.index, buf, addr.addr + offset);
1784 // SAFETY:
1785 // Safe because any bit pattern is valid for a type that implements FromBytes.
1786 unsafe { val.assume_init() }
1787 }
1788
1789 /// write the data from buf into a vfio device region
1790 /// index: region num
1791 /// buf: data src and buf length is write size
1792 /// addr: offset in the region
region_write(&self, index: usize, buf: &[u8], addr: u64)1793 pub fn region_write(&self, index: usize, buf: &[u8], addr: u64) {
1794 let stub: &VfioRegion = self
1795 .regions
1796 .get(index)
1797 .unwrap_or_else(|| panic!("tried to write VFIO with an invalid index: {}", index));
1798
1799 let size = buf.len() as u64;
1800 if size > stub.size
1801 || addr + size > stub.size
1802 || (stub.flags & VFIO_REGION_INFO_FLAG_WRITE) == 0
1803 {
1804 panic!(
1805 "tried to write VFIO region with invalid arguments: index={}, addr=0x{:x}, size=0x{:x}",
1806 index, addr, size
1807 );
1808 }
1809
1810 self.dev
1811 .write_all_at(buf, stub.offset + addr)
1812 .unwrap_or_else(|e| {
1813 panic!(
1814 "failed to write region: index={}, addr=0x{:x}, error={}",
1815 index, addr, e
1816 )
1817 });
1818 }
1819
1820 /// Writes data into the specified `VfioRegionAddr.addr` + `offset`.
region_write_to_addr<T: AsBytes>(&self, val: &T, addr: &VfioRegionAddr, offset: u64)1821 pub fn region_write_to_addr<T: AsBytes>(&self, val: &T, addr: &VfioRegionAddr, offset: u64) {
1822 self.region_write(addr.index, val.as_bytes(), addr.addr + offset);
1823 }
1824
1825 /// get vfio device's descriptors which are passed into minijail process
keep_rds(&self) -> Vec<RawDescriptor>1826 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
1827 vec![
1828 self.dev.as_raw_descriptor(),
1829 self.group_descriptor,
1830 self.container.lock().as_raw_descriptor(),
1831 ]
1832 }
1833
1834 /// Add (iova, user_addr) map into vfio container iommu table
1835 /// # Safety
1836 ///
1837 /// The caller is responsible for determining the safety of the VFIO_IOMMU_MAP_DMA ioctl.
vfio_dma_map( &self, iova: u64, size: u64, user_addr: u64, write_en: bool, ) -> Result<()>1838 pub unsafe fn vfio_dma_map(
1839 &self,
1840 iova: u64,
1841 size: u64,
1842 user_addr: u64,
1843 write_en: bool,
1844 ) -> Result<()> {
1845 self.container
1846 .lock()
1847 .vfio_dma_map(iova, size, user_addr, write_en)
1848 }
1849
1850 /// Remove (iova, user_addr) map from vfio container iommu table
vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()>1851 pub fn vfio_dma_unmap(&self, iova: u64, size: u64) -> Result<()> {
1852 self.container.lock().vfio_dma_unmap(iova, size)
1853 }
1854
vfio_get_iommu_page_size_mask(&self) -> Result<u64>1855 pub fn vfio_get_iommu_page_size_mask(&self) -> Result<u64> {
1856 self.container.lock().vfio_get_iommu_page_size_mask()
1857 }
1858
alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64>1859 pub fn alloc_iova(&self, size: u64, align_size: u64, alloc: Alloc) -> Result<u64> {
1860 self.iova_alloc
1861 .lock()
1862 .allocate_with_align(size, alloc, "alloc_iova".to_owned(), align_size)
1863 .map_err(VfioError::Resources)
1864 }
1865
get_iova(&self, alloc: &Alloc) -> Option<AddressRange>1866 pub fn get_iova(&self, alloc: &Alloc) -> Option<AddressRange> {
1867 self.iova_alloc.lock().get(alloc).map(|res| res.0)
1868 }
1869
release_iova(&self, alloc: Alloc) -> Result<AddressRange>1870 pub fn release_iova(&self, alloc: Alloc) -> Result<AddressRange> {
1871 self.iova_alloc
1872 .lock()
1873 .release(alloc)
1874 .map_err(VfioError::Resources)
1875 }
1876
get_max_addr(&self) -> u641877 pub fn get_max_addr(&self) -> u64 {
1878 self.iova_alloc.lock().get_max_addr()
1879 }
1880
1881 /// Gets the vfio device backing `File`.
device_file(&self) -> &File1882 pub fn device_file(&self) -> &File {
1883 &self.dev
1884 }
1885
1886 /// close vfio device
close(&self)1887 pub fn close(&self) {
1888 self.container.lock().remove_group(self.group_id, true);
1889 }
1890 }
1891
1892 pub struct VfioPciConfig {
1893 device: Arc<VfioDevice>,
1894 }
1895
1896 impl VfioPciConfig {
new(device: Arc<VfioDevice>) -> Self1897 pub fn new(device: Arc<VfioDevice>) -> Self {
1898 VfioPciConfig { device }
1899 }
1900
read_config<T: FromBytes>(&self, offset: u32) -> T1901 pub fn read_config<T: FromBytes>(&self, offset: u32) -> T {
1902 let mut buf = vec![0u8; std::mem::size_of::<T>()];
1903 self.device.region_read(
1904 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1905 &mut buf,
1906 offset.into(),
1907 );
1908 T::read_from(&buf[..]).expect("failed to convert config data from slice")
1909 }
1910
write_config<T: AsBytes>(&self, config: T, offset: u32)1911 pub fn write_config<T: AsBytes>(&self, config: T, offset: u32) {
1912 self.device.region_write(
1913 VFIO_PCI_CONFIG_REGION_INDEX as usize,
1914 config.as_bytes(),
1915 offset.into(),
1916 );
1917 }
1918
1919 /// Set the VFIO device this config refers to as the bus master.
set_bus_master(&self)1920 pub fn set_bus_master(&self) {
1921 /// Constant definitions from `linux/pci_regs.h`.
1922 const PCI_COMMAND: u32 = 0x4;
1923 /// Enable bus mastering
1924 const PCI_COMMAND_MASTER: u16 = 0x4;
1925
1926 let mut cmd: u16 = self.read_config(PCI_COMMAND);
1927
1928 if cmd & PCI_COMMAND_MASTER != 0 {
1929 return;
1930 }
1931
1932 cmd |= PCI_COMMAND_MASTER;
1933
1934 self.write_config(cmd, PCI_COMMAND);
1935 }
1936 }
1937
1938 impl AsRawDescriptor for VfioDevice {
as_raw_descriptor(&self) -> RawDescriptor1939 fn as_raw_descriptor(&self) -> RawDescriptor {
1940 self.dev.as_raw_descriptor()
1941 }
1942 }
1943