xref: /aosp_15_r20/external/crosvm/hypervisor/src/geniezone/mod.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2023 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 pub mod geniezone_sys;
6 
7 use std::cmp::Reverse;
8 use std::collections::BTreeMap;
9 use std::collections::BinaryHeap;
10 use std::convert::TryFrom;
11 use std::ffi::CString;
12 use std::mem::offset_of;
13 use std::os::raw::c_ulong;
14 use std::os::unix::prelude::OsStrExt;
15 use std::path::Path;
16 use std::path::PathBuf;
17 use std::sync::Arc;
18 
19 use base::errno_result;
20 use base::error;
21 use base::ioctl;
22 use base::ioctl_with_mut_ref;
23 use base::ioctl_with_ref;
24 use base::ioctl_with_val;
25 use base::pagesize;
26 use base::AsRawDescriptor;
27 use base::Error;
28 use base::Event;
29 use base::FromRawDescriptor;
30 use base::MappedRegion;
31 use base::MemoryMapping;
32 use base::MemoryMappingBuilder;
33 use base::MmapError;
34 use base::Protection;
35 use base::RawDescriptor;
36 use base::Result;
37 use base::SafeDescriptor;
38 use cros_fdt::Fdt;
39 pub use geniezone_sys::*;
40 use libc::open;
41 use libc::EFAULT;
42 use libc::EINVAL;
43 use libc::EIO;
44 use libc::ENOENT;
45 use libc::ENOMEM;
46 use libc::ENOSPC;
47 use libc::ENOTSUP;
48 use libc::EOVERFLOW;
49 use libc::O_CLOEXEC;
50 use libc::O_RDWR;
51 use sync::Mutex;
52 use vm_memory::GuestAddress;
53 use vm_memory::GuestMemory;
54 use vm_memory::MemoryRegionPurpose;
55 
56 use crate::AArch64SysRegId;
57 use crate::BalloonEvent;
58 use crate::ClockState;
59 use crate::Config;
60 use crate::Datamatch;
61 use crate::DeviceKind;
62 use crate::Hypervisor;
63 use crate::HypervisorCap;
64 use crate::IoEventAddress;
65 use crate::IoOperation;
66 use crate::IoParams;
67 use crate::MemCacheType;
68 use crate::MemSlot;
69 use crate::PsciVersion;
70 use crate::Vcpu;
71 use crate::VcpuAArch64;
72 use crate::VcpuExit;
73 use crate::VcpuFeature;
74 use crate::VcpuRegAArch64;
75 use crate::VcpuSignalHandle;
76 use crate::VcpuSignalHandleInner;
77 use crate::Vm;
78 use crate::VmAArch64;
79 use crate::VmCap;
80 use crate::PSCI_0_2;
81 
82 impl Geniezone {
83     /// Get the size of guest physical addresses (IPA) in bits.
get_guest_phys_addr_bits(&self) -> u884     pub fn get_guest_phys_addr_bits(&self) -> u8 {
85         // SAFETY:
86         // Safe because we know self is a real geniezone fd
87         match unsafe { ioctl_with_val(self, GZVM_CHECK_EXTENSION, GZVM_CAP_ARM_VM_IPA_SIZE.into()) }
88         {
89             // Default physical address size is 40 bits if the extension is not supported.
90             ret if ret <= 0 => 40,
91             ipa => ipa as u8,
92         }
93     }
94 }
95 
96 impl GeniezoneVm {
97     /// Does platform specific initialization for the GeniezoneVm.
init_arch(&self, cfg: &Config) -> Result<()>98     pub fn init_arch(&self, cfg: &Config) -> Result<()> {
99         #[cfg(target_arch = "aarch64")]
100         if cfg.mte {
101             // SAFETY:
102             // Safe because it does not take pointer arguments.
103             unsafe {
104                 self.ctrl_geniezone_enable_capability(GeniezoneCap::ArmMte, &[0, 0, 0, 0, 0])
105             }?;
106         }
107         Ok(())
108     }
109 
110     /// Checks if a particular `VmCap` is available, or returns None if arch-independent
111     /// Vm.check_capability() should handle the check.
check_capability_arch(&self, _c: VmCap) -> Option<bool>112     pub fn check_capability_arch(&self, _c: VmCap) -> Option<bool> {
113         None
114     }
115 
116     /// Arch-specific implementation of `Vm::get_pvclock`.  Always returns an error on AArch64.
get_pvclock_arch(&self) -> Result<ClockState>117     pub fn get_pvclock_arch(&self) -> Result<ClockState> {
118         // TODO: Geniezone not support pvclock currently
119         error!("Geniezone: not support get_pvclock_arch");
120         Err(Error::new(EINVAL))
121     }
122 
123     /// Arch-specific implementation of `Vm::set_pvclock`.  Always returns an error on AArch64.
set_pvclock_arch(&self, _state: &ClockState) -> Result<()>124     pub fn set_pvclock_arch(&self, _state: &ClockState) -> Result<()> {
125         // TODO: Geniezone not support pvclock currently
126         error!("Geniezone: not support set_pvclock_arch");
127         Err(Error::new(EINVAL))
128     }
129 
get_protected_vm_info(&self) -> Result<u64>130     fn get_protected_vm_info(&self) -> Result<u64> {
131         // SAFETY:
132         // Safe because we allocated the struct and we know the kernel won't write beyond the end of
133         // the struct or keep a pointer to it.
134         let cap: gzvm_enable_cap = unsafe {
135             self.ctrl_geniezone_enable_capability(
136                 GeniezoneCap::ArmProtectedVm,
137                 &[GZVM_CAP_ARM_PVM_GET_PVMFW_SIZE as u64, 0, 0, 0, 0],
138             )
139         }?;
140         Ok(cap.args[1])
141     }
142 
set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()>143     fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress) -> Result<()> {
144         // SAFETY:
145         // Safe because none of the args are pointers.
146         unsafe {
147             self.ctrl_geniezone_enable_capability(
148                 GeniezoneCap::ArmProtectedVm,
149                 &[GZVM_CAP_ARM_PVM_SET_PVMFW_IPA as u64, fw_addr.0, 0, 0, 0],
150             )
151         }?;
152         Ok(())
153     }
154 }
155 
156 impl VmAArch64 for GeniezoneVm {
get_hypervisor(&self) -> &dyn Hypervisor157     fn get_hypervisor(&self) -> &dyn Hypervisor {
158         &self.geniezone
159     }
160 
load_protected_vm_firmware( &mut self, fw_addr: GuestAddress, fw_max_size: u64, ) -> Result<()>161     fn load_protected_vm_firmware(
162         &mut self,
163         fw_addr: GuestAddress,
164         fw_max_size: u64,
165     ) -> Result<()> {
166         let size: u64 = self.get_protected_vm_info()?;
167         if size == 0 {
168             Err(Error::new(EINVAL))
169         } else {
170             if size > fw_max_size {
171                 return Err(Error::new(ENOMEM));
172             }
173             self.set_protected_vm_firmware_ipa(fw_addr)
174         }
175     }
176 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>>177     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuAArch64>> {
178         Ok(Box::new(GeniezoneVm::create_vcpu(self, id)?))
179     }
180 
create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()>181     fn create_fdt(&self, _fdt: &mut Fdt, _phandles: &BTreeMap<&str, u32>) -> cros_fdt::Result<()> {
182         Ok(())
183     }
184 
init_arch( &self, _payload_entry_address: GuestAddress, fdt_address: GuestAddress, fdt_size: usize, ) -> Result<()>185     fn init_arch(
186         &self,
187         _payload_entry_address: GuestAddress,
188         fdt_address: GuestAddress,
189         fdt_size: usize,
190     ) -> Result<()> {
191         let dtb_config = gzvm_dtb_config {
192             dtb_addr: fdt_address.offset(),
193             dtb_size: fdt_size.try_into().unwrap(),
194         };
195         // SAFETY:
196         // Safe because we allocated the struct and we know the kernel will modify exactly the size
197         // of the struct.
198         let ret = unsafe { ioctl_with_ref(self, GZVM_SET_DTB_CONFIG, &dtb_config) };
199         if ret == 0 {
200             Ok(())
201         } else {
202             errno_result()
203         }
204     }
205 }
206 
207 impl GeniezoneVcpu {
set_one_geniezone_reg_u64( &self, gzvm_reg_id: GeniezoneVcpuRegister, data: u64, ) -> Result<()>208     fn set_one_geniezone_reg_u64(
209         &self,
210         gzvm_reg_id: GeniezoneVcpuRegister,
211         data: u64,
212     ) -> Result<()> {
213         self.set_one_geniezone_reg(gzvm_reg_id, data.to_ne_bytes().as_slice())
214     }
215 
set_one_geniezone_reg(&self, gzvm_reg_id: GeniezoneVcpuRegister, data: &[u8]) -> Result<()>216     fn set_one_geniezone_reg(&self, gzvm_reg_id: GeniezoneVcpuRegister, data: &[u8]) -> Result<()> {
217         let onereg = gzvm_one_reg {
218             id: gzvm_reg_id.into(),
219             addr: (data.as_ptr() as usize)
220                 .try_into()
221                 .expect("can't represent usize as u64"),
222         };
223         // SAFETY:
224         // Safe because we allocated the struct and we know the kernel will read exactly the size of
225         // the struct.
226         let ret = unsafe { ioctl_with_ref(self, GZVM_SET_ONE_REG, &onereg) };
227         if ret == 0 {
228             Ok(())
229         } else {
230             errno_result()
231         }
232     }
233 
get_one_geniezone_reg_u64(&self, gzvm_reg_id: GeniezoneVcpuRegister) -> Result<u64>234     fn get_one_geniezone_reg_u64(&self, gzvm_reg_id: GeniezoneVcpuRegister) -> Result<u64> {
235         let mut bytes = 0u64.to_ne_bytes();
236         self.get_one_geniezone_reg(gzvm_reg_id, bytes.as_mut_slice())?;
237         Ok(u64::from_ne_bytes(bytes))
238     }
239 
get_one_geniezone_reg( &self, gzvm_reg_id: GeniezoneVcpuRegister, data: &mut [u8], ) -> Result<()>240     fn get_one_geniezone_reg(
241         &self,
242         gzvm_reg_id: GeniezoneVcpuRegister,
243         data: &mut [u8],
244     ) -> Result<()> {
245         let onereg = gzvm_one_reg {
246             id: gzvm_reg_id.into(),
247             addr: (data.as_mut_ptr() as usize)
248                 .try_into()
249                 .expect("can't represent usize as u64"),
250         };
251 
252         // SAFETY:
253         // Safe because we allocated the struct and we know the kernel will read exactly the size of
254         // the struct.
255         let ret = unsafe { ioctl_with_ref(self, GZVM_GET_ONE_REG, &onereg) };
256         if ret == 0 {
257             Ok(())
258         } else {
259             errno_result()
260         }
261     }
262 }
263 
264 #[allow(dead_code)]
265 /// GZVM registers as used by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API
266 pub enum GeniezoneVcpuRegister {
267     /// General Purpose Registers X0-X30
268     X(u8),
269     /// Stack Pointer
270     Sp,
271     /// Program Counter
272     Pc,
273     /// Processor State
274     Pstate,
275     /// FP & SIMD Registers V0-V31
276     V(u8),
277     /// Geniezone Firmware Pseudo-Registers
278     Firmware(u16),
279     /// System Registers
280     System(AArch64SysRegId),
281     /// CCSIDR_EL1 Demultiplexed by CSSELR_EL1
282     Ccsidr(u8),
283 }
284 
285 /// Gives the `u64` register ID expected by the `GET_ONE_REG`/`SET_ONE_REG` ioctl API.
286 impl From<GeniezoneVcpuRegister> for u64 {
from(register: GeniezoneVcpuRegister) -> Self287     fn from(register: GeniezoneVcpuRegister) -> Self {
288         const fn reg(size: u64, kind: u64, fields: u64) -> u64 {
289             GZVM_REG_ARM64 | size | kind | fields
290         }
291 
292         const fn gzvm_regs_reg(size: u64, offset: usize) -> u64 {
293             let offset = offset / std::mem::size_of::<u32>();
294 
295             reg(size, GZVM_REG_ARM_CORE as u64, offset as u64)
296         }
297 
298         const fn gzvm_reg(offset: usize) -> u64 {
299             gzvm_regs_reg(GZVM_REG_SIZE_U64, offset)
300         }
301 
302         fn spsr_reg(spsr_reg: u32) -> u64 {
303             let n = std::mem::size_of::<u64>() * (spsr_reg as usize);
304             gzvm_reg(offset_of!(gzvm_regs, spsr) + n)
305         }
306 
307         fn user_pt_reg(offset: usize) -> u64 {
308             gzvm_regs_reg(GZVM_REG_SIZE_U64, offset_of!(gzvm_regs, regs) + offset)
309         }
310 
311         fn user_fpsimd_state_reg(size: u64, offset: usize) -> u64 {
312             gzvm_regs_reg(size, offset_of!(gzvm_regs, fp_regs) + offset)
313         }
314 
315         const fn reg_u64(kind: u64, fields: u64) -> u64 {
316             reg(GZVM_REG_SIZE_U64, kind, fields)
317         }
318 
319         const fn demux_reg(size: u64, index: u64, value: u64) -> u64 {
320             let index =
321                 (index << GZVM_REG_ARM_DEMUX_ID_SHIFT) & (GZVM_REG_ARM_DEMUX_ID_MASK as u64);
322             let value =
323                 (value << GZVM_REG_ARM_DEMUX_VAL_SHIFT) & (GZVM_REG_ARM_DEMUX_VAL_MASK as u64);
324 
325             reg(size, GZVM_REG_ARM_DEMUX as u64, index | value)
326         }
327 
328         match register {
329             GeniezoneVcpuRegister::X(n @ 0..=30) => {
330                 let n = std::mem::size_of::<u64>() * (n as usize);
331 
332                 user_pt_reg(offset_of!(user_pt_regs, regs) + n)
333             }
334             GeniezoneVcpuRegister::X(n) => {
335                 unreachable!("invalid GeniezoneVcpuRegister Xn index: {n}")
336             }
337             GeniezoneVcpuRegister::Sp => user_pt_reg(offset_of!(user_pt_regs, sp)),
338             GeniezoneVcpuRegister::Pc => user_pt_reg(offset_of!(user_pt_regs, pc)),
339             GeniezoneVcpuRegister::Pstate => user_pt_reg(offset_of!(user_pt_regs, pstate)),
340             GeniezoneVcpuRegister::V(n @ 0..=31) => {
341                 let n = std::mem::size_of::<u128>() * (n as usize);
342                 user_fpsimd_state_reg(GZVM_REG_SIZE_U128, offset_of!(user_fpsimd_state, vregs) + n)
343             }
344             GeniezoneVcpuRegister::V(n) => {
345                 unreachable!("invalid GeniezoneVcpuRegister Vn index: {n}")
346             }
347             GeniezoneVcpuRegister::System(AArch64SysRegId::FPSR) => {
348                 user_fpsimd_state_reg(GZVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpsr))
349             }
350             GeniezoneVcpuRegister::System(AArch64SysRegId::FPCR) => {
351                 user_fpsimd_state_reg(GZVM_REG_SIZE_U32, offset_of!(user_fpsimd_state, fpcr))
352             }
353             GeniezoneVcpuRegister::System(AArch64SysRegId::SPSR_EL1) => spsr_reg(0),
354             GeniezoneVcpuRegister::System(AArch64SysRegId::SPSR_abt) => spsr_reg(1),
355             GeniezoneVcpuRegister::System(AArch64SysRegId::SPSR_und) => spsr_reg(2),
356             GeniezoneVcpuRegister::System(AArch64SysRegId::SPSR_irq) => spsr_reg(3),
357             GeniezoneVcpuRegister::System(AArch64SysRegId::SPSR_fiq) => spsr_reg(4),
358             GeniezoneVcpuRegister::System(AArch64SysRegId::SP_EL1) => {
359                 gzvm_reg(offset_of!(gzvm_regs, sp_el1))
360             }
361             GeniezoneVcpuRegister::System(AArch64SysRegId::ELR_EL1) => {
362                 gzvm_reg(offset_of!(gzvm_regs, elr_el1))
363             }
364             GeniezoneVcpuRegister::System(sysreg) => {
365                 reg_u64(GZVM_REG_ARM64_SYSREG.into(), sysreg.encoded().into())
366             }
367             GeniezoneVcpuRegister::Firmware(n) => reg_u64(GZVM_REG_ARM, n.into()),
368             GeniezoneVcpuRegister::Ccsidr(n) => demux_reg(GZVM_REG_SIZE_U32, 0, n.into()),
369         }
370     }
371 }
372 
373 impl From<VcpuRegAArch64> for GeniezoneVcpuRegister {
from(reg: VcpuRegAArch64) -> Self374     fn from(reg: VcpuRegAArch64) -> Self {
375         match reg {
376             VcpuRegAArch64::X(n @ 0..=30) => Self::X(n),
377             VcpuRegAArch64::X(n) => unreachable!("invalid VcpuRegAArch64 index: {n}"),
378             VcpuRegAArch64::Sp => Self::Sp,
379             VcpuRegAArch64::Pc => Self::Pc,
380             VcpuRegAArch64::Pstate => Self::Pstate,
381             VcpuRegAArch64::System(sysreg) => Self::System(sysreg),
382         }
383     }
384 }
385 
386 impl VcpuAArch64 for GeniezoneVcpu {
init(&self, _features: &[VcpuFeature]) -> Result<()>387     fn init(&self, _features: &[VcpuFeature]) -> Result<()> {
388         // Geniezone init vcpu in creation
389         // Return Ok since aarch64/src/lib.rs will use this
390         Ok(())
391     }
392 
init_pmu(&self, _irq: u64) -> Result<()>393     fn init_pmu(&self, _irq: u64) -> Result<()> {
394         // TODO: Geniezone not support pmu currently
395         // temporary return ok since aarch64/src/lib.rs will use this
396         Ok(())
397     }
398 
has_pvtime_support(&self) -> bool399     fn has_pvtime_support(&self) -> bool {
400         // TODO: Geniezone not support pvtime currently
401         false
402     }
403 
init_pvtime(&self, _pvtime_ipa: u64) -> Result<()>404     fn init_pvtime(&self, _pvtime_ipa: u64) -> Result<()> {
405         // TODO: Geniezone not support pvtime currently
406         error!("Geniezone: not support init_pvtime");
407         Err(Error::new(EINVAL))
408     }
409 
set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()>410     fn set_one_reg(&self, reg_id: VcpuRegAArch64, data: u64) -> Result<()> {
411         self.set_one_geniezone_reg_u64(GeniezoneVcpuRegister::from(reg_id), data)
412     }
413 
get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64>414     fn get_one_reg(&self, reg_id: VcpuRegAArch64) -> Result<u64> {
415         self.get_one_geniezone_reg_u64(GeniezoneVcpuRegister::from(reg_id))
416     }
417 
set_vector_reg(&self, _reg_num: u8, _data: u128) -> Result<()>418     fn set_vector_reg(&self, _reg_num: u8, _data: u128) -> Result<()> {
419         unimplemented!()
420     }
421 
get_vector_reg(&self, _reg_num: u8) -> Result<u128>422     fn get_vector_reg(&self, _reg_num: u8) -> Result<u128> {
423         unimplemented!()
424     }
425 
get_psci_version(&self) -> Result<PsciVersion>426     fn get_psci_version(&self) -> Result<PsciVersion> {
427         Ok(PSCI_0_2)
428     }
429 
get_max_hw_bps(&self) -> Result<usize>430     fn get_max_hw_bps(&self) -> Result<usize> {
431         // TODO: Geniezone not support gdb currently
432         error!("Geniezone: not support get_max_hw_bps");
433         Err(Error::new(EINVAL))
434     }
435 
get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>>436     fn get_system_regs(&self) -> Result<BTreeMap<AArch64SysRegId, u64>> {
437         error!("Geniezone: not support get_system_regs");
438         Err(Error::new(EINVAL))
439     }
440 
get_cache_info(&self) -> Result<BTreeMap<u8, u64>>441     fn get_cache_info(&self) -> Result<BTreeMap<u8, u64>> {
442         error!("Geniezone: not support get_cache_info");
443         Err(Error::new(EINVAL))
444     }
445 
set_cache_info(&self, _cache_info: BTreeMap<u8, u64>) -> Result<()>446     fn set_cache_info(&self, _cache_info: BTreeMap<u8, u64>) -> Result<()> {
447         error!("Geniezone: not support set_cache_info");
448         Err(Error::new(EINVAL))
449     }
450 
hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value>451     fn hypervisor_specific_snapshot(&self) -> anyhow::Result<serde_json::Value> {
452         // TODO: Geniezone not support gdb currently
453         Err(anyhow::anyhow!(
454             "Geniezone: not support hypervisor_specific_snapshot"
455         ))
456     }
457 
hypervisor_specific_restore(&self, _data: serde_json::Value) -> anyhow::Result<()>458     fn hypervisor_specific_restore(&self, _data: serde_json::Value) -> anyhow::Result<()> {
459         // TODO: Geniezone not support gdb currently
460         Err(anyhow::anyhow!(
461             "Geniezone: not support hypervisor_specific_restore"
462         ))
463     }
464 
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>465     fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
466         // TODO: Geniezone not support gdb currently
467         error!("Geniezone: not support set_guest_debug");
468         Err(Error::new(EINVAL))
469     }
470 }
471 
472 // Wrapper around GZVM_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping
473 // from guest physical to host user pages.
474 //
475 // SAFETY:
476 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( descriptor: &SafeDescriptor, slot: MemSlot, _read_only: bool, _log_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, flags: u32, ) -> Result<()>477 unsafe fn set_user_memory_region(
478     descriptor: &SafeDescriptor,
479     slot: MemSlot,
480     _read_only: bool,
481     _log_dirty_pages: bool,
482     guest_addr: u64,
483     memory_size: u64,
484     userspace_addr: *mut u8,
485     flags: u32,
486 ) -> Result<()> {
487     let region = gzvm_userspace_memory_region {
488         slot,
489         flags,
490         guest_phys_addr: guest_addr,
491         memory_size,
492         userspace_addr: userspace_addr as u64,
493     };
494 
495     let ret = ioctl_with_ref(descriptor, GZVM_SET_USER_MEMORY_REGION, &region);
496     if ret == 0 {
497         Ok(())
498     } else {
499         errno_result()
500     }
501 }
502 
503 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
504 /// size.
505 ///
506 /// # Arguments
507 ///
508 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize509 pub fn dirty_log_bitmap_size(size: usize) -> usize {
510     let page_size = pagesize();
511     (((size + page_size - 1) / page_size) + 7) / 8
512 }
513 
514 pub struct Geniezone {
515     geniezone: SafeDescriptor,
516 }
517 
518 #[repr(u32)]
519 pub enum GeniezoneCap {
520     ArmMte,
521     ArmProtectedVm = GZVM_CAP_ARM_PROTECTED_VM,
522 }
523 
524 impl Geniezone {
new_with_path(device_path: &Path) -> Result<Geniezone>525     pub fn new_with_path(device_path: &Path) -> Result<Geniezone> {
526         let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap();
527         // SAFETY:
528         // Open calls are safe because we give a nul-terminated string and verify the result.
529         let ret = unsafe { open(c_path.as_ptr(), O_RDWR | O_CLOEXEC) };
530         if ret < 0 {
531             return errno_result();
532         }
533         Ok(Geniezone {
534             // SAFETY:
535             // Safe because we verify that ret is valid and we own the fd.
536             geniezone: unsafe { SafeDescriptor::from_raw_descriptor(ret) },
537         })
538     }
539 
540     /// Opens `/dev/gzvm/` and returns a gzvm object on success.
new() -> Result<Geniezone>541     pub fn new() -> Result<Geniezone> {
542         Geniezone::new_with_path(&PathBuf::from("/dev/gzvm"))
543     }
544 
545     /// Gets the size of the mmap required to use vcpu's `gzvm_vcpu_run` structure.
get_vcpu_mmap_size(&self) -> Result<usize>546     pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
547         // We don't use mmap, return sizeof(gzvm_vcpu_run) directly
548         let res = std::mem::size_of::<gzvm_vcpu_run>();
549         Ok(res)
550     }
551 }
552 
553 impl AsRawDescriptor for Geniezone {
as_raw_descriptor(&self) -> RawDescriptor554     fn as_raw_descriptor(&self) -> RawDescriptor {
555         self.geniezone.as_raw_descriptor()
556     }
557 }
558 
559 impl Hypervisor for Geniezone {
try_clone(&self) -> Result<Self>560     fn try_clone(&self) -> Result<Self> {
561         Ok(Geniezone {
562             geniezone: self.geniezone.try_clone()?,
563         })
564     }
565 
check_capability(&self, cap: HypervisorCap) -> bool566     fn check_capability(&self, cap: HypervisorCap) -> bool {
567         match cap {
568             HypervisorCap::UserMemory => true,
569             HypervisorCap::ArmPmuV3 => false,
570             HypervisorCap::ImmediateExit => true,
571             HypervisorCap::StaticSwiotlbAllocationRequired => true,
572             HypervisorCap::HypervisorInitializedBootContext => false,
573             HypervisorCap::S390UserSigp | HypervisorCap::TscDeadlineTimer => false,
574         }
575     }
576 }
577 
578 /// A wrapper around creating and using a Geniezone VM.
579 pub struct GeniezoneVm {
580     geniezone: Geniezone,
581     vm: SafeDescriptor,
582     guest_mem: GuestMemory,
583     mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
584     /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
585     mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
586 }
587 
588 impl GeniezoneVm {
589     /// Constructs a new `GeniezoneVm` using the given `Geniezone` instance.
new(geniezone: &Geniezone, guest_mem: GuestMemory, cfg: Config) -> Result<GeniezoneVm>590     pub fn new(geniezone: &Geniezone, guest_mem: GuestMemory, cfg: Config) -> Result<GeniezoneVm> {
591         // SAFETY:
592         // Safe because we know gzvm is a real gzvm fd as this module is the only one that can make
593         // gzvm objects.
594         let ret = unsafe { ioctl(geniezone, GZVM_CREATE_VM) };
595         if ret < 0 {
596             return errno_result();
597         }
598         // SAFETY:
599         // Safe because we verify that ret is valid and we own the fd.
600         let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
601         for region in guest_mem.regions() {
602             let flags = match region.options.purpose {
603                 MemoryRegionPurpose::GuestMemoryRegion => GZVM_USER_MEM_REGION_GUEST_MEM,
604                 MemoryRegionPurpose::ProtectedFirmwareRegion => GZVM_USER_MEM_REGION_PROTECT_FW,
605                 MemoryRegionPurpose::StaticSwiotlbRegion => GZVM_USER_MEM_REGION_STATIC_SWIOTLB,
606             };
607             // SAFETY:
608             // Safe because the guest regions are guaranteed not to overlap.
609             unsafe {
610                 set_user_memory_region(
611                     &vm_descriptor,
612                     region.index as MemSlot,
613                     false,
614                     false,
615                     region.guest_addr.offset(),
616                     region.size as u64,
617                     region.host_addr as *mut u8,
618                     flags,
619                 )
620             }?;
621         }
622 
623         let vm = GeniezoneVm {
624             geniezone: geniezone.try_clone()?,
625             vm: vm_descriptor,
626             guest_mem,
627             mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
628             mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
629         };
630         vm.init_arch(&cfg)?;
631         Ok(vm)
632     }
633 
create_vcpu(&self, id: usize) -> Result<GeniezoneVcpu>634     fn create_vcpu(&self, id: usize) -> Result<GeniezoneVcpu> {
635         // run is a data stucture shared with ko and geniezone
636         let run_mmap_size = self.geniezone.get_vcpu_mmap_size()?;
637 
638         let fd =
639             // SAFETY:
640             // Safe because we know that our file is a VM fd and we verify the return result.
641             unsafe { ioctl_with_val(self, GZVM_CREATE_VCPU, c_ulong::try_from(id).unwrap()) };
642 
643         if fd < 0 {
644             return errno_result();
645         }
646 
647         // SAFETY:
648         // Wrap the vcpu now in case the following ? returns early. This is safe because we verified
649         // the value of the fd and we own the fd.
650         let vcpu = unsafe { SafeDescriptor::from_raw_descriptor(fd) };
651 
652         // Memory mapping --> Memory allocation
653         let run_mmap = MemoryMappingBuilder::new(run_mmap_size)
654             .build()
655             .map_err(|_| Error::new(ENOSPC))?;
656 
657         Ok(GeniezoneVcpu {
658             vm: self.vm.try_clone()?,
659             vcpu,
660             id,
661             run_mmap: Arc::new(run_mmap),
662         })
663     }
664 
665     /// Creates an in kernel interrupt controller.
666     ///
667     /// See the documentation on the GZVM_CREATE_IRQCHIP ioctl.
create_irq_chip(&self) -> Result<()>668     pub fn create_irq_chip(&self) -> Result<()> {
669         // SAFETY:
670         // Safe because we know that our file is a VM fd and we verify the return result.
671         let ret = unsafe { ioctl(self, GZVM_CREATE_IRQCHIP) };
672         if ret == 0 {
673             Ok(())
674         } else {
675             errno_result()
676         }
677     }
678 
679     /// Sets the level on the given irq to 1 if `active` is true, and 0 otherwise.
set_irq_line(&self, irq: u32, active: bool) -> Result<()>680     pub fn set_irq_line(&self, irq: u32, active: bool) -> Result<()> {
681         let mut irq_level = gzvm_irq_level::default();
682         irq_level.__bindgen_anon_1.irq = irq;
683         irq_level.level = active as u32;
684 
685         // SAFETY:
686         // Safe because we know that our file is a VM fd, we know the kernel will only read the
687         // correct amount of memory from our pointer, and we verify the return result.
688         let ret = unsafe { ioctl_with_ref(self, GZVM_IRQ_LINE, &irq_level) };
689         if ret == 0 {
690             Ok(())
691         } else {
692             errno_result()
693         }
694     }
695 
696     /// Registers an event that will, when signalled, trigger the `gsi` irq, and `resample_evt`
697     /// ( when not None ) will be triggered when the irqchip is resampled.
register_irqfd( &self, gsi: u32, evt: &Event, resample_evt: Option<&Event>, ) -> Result<()>698     pub fn register_irqfd(
699         &self,
700         gsi: u32,
701         evt: &Event,
702         resample_evt: Option<&Event>,
703     ) -> Result<()> {
704         let mut irqfd = gzvm_irqfd {
705             fd: evt.as_raw_descriptor() as u32,
706             gsi,
707             ..Default::default()
708         };
709 
710         if let Some(r_evt) = resample_evt {
711             irqfd.flags = GZVM_IRQFD_FLAG_RESAMPLE;
712             irqfd.resamplefd = r_evt.as_raw_descriptor() as u32;
713         }
714 
715         // SAFETY:
716         // Safe because we know that our file is a VM fd, we know the kernel will only read the
717         // correct amount of memory from our pointer, and we verify the return result.
718         let ret = unsafe { ioctl_with_ref(self, GZVM_IRQFD, &irqfd) };
719         if ret == 0 {
720             Ok(())
721         } else {
722             errno_result()
723         }
724     }
725 
726     /// Unregisters an event that was previously registered with
727     /// `register_irqfd`.
728     ///
729     /// The `evt` and `gsi` pair must be the same as the ones passed into
730     /// `register_irqfd`.
unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()>731     pub fn unregister_irqfd(&self, gsi: u32, evt: &Event) -> Result<()> {
732         let irqfd = gzvm_irqfd {
733             fd: evt.as_raw_descriptor() as u32,
734             gsi,
735             flags: GZVM_IRQFD_FLAG_DEASSIGN,
736             ..Default::default()
737         };
738         // SAFETY:
739         // Safe because we know that our file is a VM fd, we know the kernel will only read the
740         // correct amount of memory from our pointer, and we verify the return result.
741         let ret = unsafe { ioctl_with_ref(self, GZVM_IRQFD, &irqfd) };
742         if ret == 0 {
743             Ok(())
744         } else {
745             errno_result()
746         }
747     }
748 
ioeventfd( &self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, deassign: bool, ) -> Result<()>749     fn ioeventfd(
750         &self,
751         evt: &Event,
752         addr: IoEventAddress,
753         datamatch: Datamatch,
754         deassign: bool,
755     ) -> Result<()> {
756         let (do_datamatch, datamatch_value, datamatch_len) = match datamatch {
757             Datamatch::AnyLength => (false, 0, 0),
758             Datamatch::U8(v) => match v {
759                 Some(u) => (true, u as u64, 1),
760                 None => (false, 0, 1),
761             },
762             Datamatch::U16(v) => match v {
763                 Some(u) => (true, u as u64, 2),
764                 None => (false, 0, 2),
765             },
766             Datamatch::U32(v) => match v {
767                 Some(u) => (true, u as u64, 4),
768                 None => (false, 0, 4),
769             },
770             Datamatch::U64(v) => match v {
771                 Some(u) => (true, u, 8),
772                 None => (false, 0, 8),
773             },
774         };
775         let mut flags = 0;
776         if deassign {
777             flags |= 1 << gzvm_ioeventfd_flag_nr_deassign;
778         }
779         if do_datamatch {
780             flags |= 1 << gzvm_ioeventfd_flag_nr_datamatch
781         }
782         if let IoEventAddress::Pio(_) = addr {
783             flags |= 1 << gzvm_ioeventfd_flag_nr_pio;
784         }
785         let ioeventfd = gzvm_ioeventfd {
786             datamatch: datamatch_value,
787             len: datamatch_len,
788             addr: match addr {
789                 IoEventAddress::Pio(p) => p,
790                 IoEventAddress::Mmio(m) => m,
791             },
792             fd: evt.as_raw_descriptor(),
793             flags,
794             ..Default::default()
795         };
796         // SAFETY:
797         // Safe because we know that our file is a VM fd, we know the kernel will only read the
798         // correct amount of memory from our pointer, and we verify the return result.
799         let ret = unsafe { ioctl_with_ref(self, GZVM_IOEVENTFD, &ioeventfd) };
800         if ret == 0 {
801             Ok(())
802         } else {
803             errno_result()
804         }
805     }
806 
807     /// Checks whether a particular GZVM-specific capability is available for this VM.
check_raw_capability(&self, capability: GeniezoneCap) -> bool808     fn check_raw_capability(&self, capability: GeniezoneCap) -> bool {
809         let mut cap: u64 = capability as u64;
810         // SAFETY:
811         // Safe because we know that our file is a GZVM fd, and if the cap is invalid GZVM assumes
812         // it's an unavailable extension and returns 0.
813         unsafe {
814             ioctl_with_mut_ref(self, GZVM_CHECK_EXTENSION, &mut cap);
815         }
816         cap == 1
817     }
818 
819     // Currently only used on aarch64, but works on any architecture.
820     #[allow(dead_code)]
821     /// Enables a GZVM-specific capability for this VM, with the given arguments.
822     ///
823     /// # Safety
824     /// This function is marked as unsafe because `args` may be interpreted as pointers for some
825     /// capabilities. The caller must ensure that any pointers passed in the `args` array are
826     /// allocated as the kernel expects, and that mutable pointers are owned.
ctrl_geniezone_enable_capability( &self, capability: GeniezoneCap, args: &[u64; 5], ) -> Result<gzvm_enable_cap>827     unsafe fn ctrl_geniezone_enable_capability(
828         &self,
829         capability: GeniezoneCap,
830         args: &[u64; 5],
831     ) -> Result<gzvm_enable_cap> {
832         let gzvm_cap = gzvm_enable_cap {
833             cap: capability as u64,
834             args: *args,
835         };
836         // Safe because we allocated the struct and we know the kernel will read exactly the size of
837         // the struct, and because we assume the caller has allocated the args appropriately.
838         let ret = ioctl_with_ref(self, GZVM_ENABLE_CAP, &gzvm_cap);
839         if ret == 0 {
840             Ok(gzvm_cap)
841         } else {
842             errno_result()
843         }
844     }
845 
create_geniezone_device(&self, dev: gzvm_create_device) -> Result<()>846     pub fn create_geniezone_device(&self, dev: gzvm_create_device) -> Result<()> {
847         // SAFETY:
848         // Safe because we allocated the struct and we know the kernel will modify exactly the size
849         // of the struct and the return value is checked.
850         let ret = unsafe { base::ioctl_with_ref(self, GZVM_CREATE_DEVICE, &dev) };
851         if ret == 0 {
852             Ok(())
853         } else {
854             errno_result()
855         }
856     }
857 
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>858     fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
859         match self.guest_mem.remove_range(guest_address, size) {
860             Ok(_) => Ok(()),
861             Err(vm_memory::Error::MemoryAccess(_, MmapError::SystemCallFailed(e))) => Err(e),
862             Err(_) => Err(Error::new(EIO)),
863         }
864     }
865 
handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()>866     fn handle_deflate(&mut self, _guest_address: GuestAddress, _size: u64) -> Result<()> {
867         // No-op, when the guest attempts to access the pages again, Linux/GZVM will provide them.
868         Ok(())
869     }
870 }
871 
872 impl Vm for GeniezoneVm {
try_clone(&self) -> Result<Self>873     fn try_clone(&self) -> Result<Self> {
874         Ok(GeniezoneVm {
875             geniezone: self.geniezone.try_clone()?,
876             vm: self.vm.try_clone()?,
877             guest_mem: self.guest_mem.clone(),
878             mem_regions: self.mem_regions.clone(),
879             mem_slot_gaps: self.mem_slot_gaps.clone(),
880         })
881     }
882 
check_capability(&self, c: VmCap) -> bool883     fn check_capability(&self, c: VmCap) -> bool {
884         if let Some(val) = self.check_capability_arch(c) {
885             return val;
886         }
887         match c {
888             VmCap::DirtyLog => true,
889             VmCap::PvClock => false,
890             VmCap::Protected => self.check_raw_capability(GeniezoneCap::ArmProtectedVm),
891             VmCap::EarlyInitCpuid => false,
892             VmCap::ReadOnlyMemoryRegion => false,
893             VmCap::MemNoncoherentDma => false,
894         }
895     }
896 
get_guest_phys_addr_bits(&self) -> u8897     fn get_guest_phys_addr_bits(&self) -> u8 {
898         self.geniezone.get_guest_phys_addr_bits()
899     }
900 
get_memory(&self) -> &GuestMemory901     fn get_memory(&self) -> &GuestMemory {
902         &self.guest_mem
903     }
904 
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, _cache: MemCacheType, ) -> Result<MemSlot>905     fn add_memory_region(
906         &mut self,
907         guest_addr: GuestAddress,
908         mem: Box<dyn MappedRegion>,
909         read_only: bool,
910         log_dirty_pages: bool,
911         _cache: MemCacheType,
912     ) -> Result<MemSlot> {
913         let pgsz = pagesize() as u64;
914         // GZVM require to set the user memory region with page size aligned size. Safe to extend
915         // the mem.size() to be page size aligned because the mmap will round up the size to be
916         // page size aligned if it is not.
917         let size = (mem.size() as u64 + pgsz - 1) / pgsz * pgsz;
918         let end_addr = guest_addr
919             .checked_add(size)
920             .ok_or_else(|| Error::new(EOVERFLOW))?;
921         if self.guest_mem.range_overlap(guest_addr, end_addr) {
922             return Err(Error::new(ENOSPC));
923         }
924         let mut regions = self.mem_regions.lock();
925         let mut gaps = self.mem_slot_gaps.lock();
926         let slot = match gaps.pop() {
927             Some(gap) => gap.0,
928             None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
929         };
930         let flags = 0;
931 
932         // SAFETY:
933         // Safe because we check that the given guest address is valid and has no overlaps. We also
934         // know that the pointer and size are correct because the MemoryMapping interface ensures
935         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
936         // is removed.
937         let res = unsafe {
938             set_user_memory_region(
939                 &self.vm,
940                 slot,
941                 read_only,
942                 log_dirty_pages,
943                 guest_addr.offset(),
944                 size,
945                 mem.as_ptr(),
946                 flags,
947             )
948         };
949 
950         if let Err(e) = res {
951             gaps.push(Reverse(slot));
952             return Err(e);
953         }
954         regions.insert(slot, mem);
955         Ok(slot)
956     }
957 
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>958     fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
959         let mut regions = self.mem_regions.lock();
960         let mem = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?;
961 
962         mem.msync(offset, size).map_err(|err| match err {
963             MmapError::InvalidAddress => Error::new(EFAULT),
964             MmapError::NotPageAligned => Error::new(EINVAL),
965             MmapError::SystemCallFailed(e) => e,
966             _ => Error::new(EIO),
967         })
968     }
969 
madvise_pageout_memory_region( &mut self, _slot: MemSlot, _offset: usize, _size: usize, ) -> Result<()>970     fn madvise_pageout_memory_region(
971         &mut self,
972         _slot: MemSlot,
973         _offset: usize,
974         _size: usize,
975     ) -> Result<()> {
976         Err(Error::new(ENOTSUP))
977     }
978 
madvise_remove_memory_region( &mut self, _slot: MemSlot, _offset: usize, _size: usize, ) -> Result<()>979     fn madvise_remove_memory_region(
980         &mut self,
981         _slot: MemSlot,
982         _offset: usize,
983         _size: usize,
984     ) -> Result<()> {
985         Err(Error::new(ENOTSUP))
986     }
987 
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>988     fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
989         let mut regions = self.mem_regions.lock();
990         if !regions.contains_key(&slot) {
991             return Err(Error::new(ENOENT));
992         }
993         // SAFETY:
994         // Safe because the slot is checked against the list of memory slots.
995         unsafe {
996             set_user_memory_region(&self.vm, slot, false, false, 0, 0, std::ptr::null_mut(), 0)?;
997         }
998         self.mem_slot_gaps.lock().push(Reverse(slot));
999         // This remove will always succeed because of the contains_key check above.
1000         Ok(regions.remove(&slot).unwrap())
1001     }
1002 
create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor>1003     fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
1004         // This function should not be invoked because the vgic device is created in irqchip.
1005         errno_result()
1006     }
1007 
get_dirty_log(&self, _slot: MemSlot, _dirty_log: &mut [u8]) -> Result<()>1008     fn get_dirty_log(&self, _slot: MemSlot, _dirty_log: &mut [u8]) -> Result<()> {
1009         Err(Error::new(ENOTSUP))
1010     }
1011 
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>1012     fn register_ioevent(
1013         &mut self,
1014         evt: &Event,
1015         addr: IoEventAddress,
1016         datamatch: Datamatch,
1017     ) -> Result<()> {
1018         self.ioeventfd(evt, addr, datamatch, false)
1019     }
1020 
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>1021     fn unregister_ioevent(
1022         &mut self,
1023         evt: &Event,
1024         addr: IoEventAddress,
1025         datamatch: Datamatch,
1026     ) -> Result<()> {
1027         self.ioeventfd(evt, addr, datamatch, true)
1028     }
1029 
handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()>1030     fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> {
1031         // GZVM delivers IO events in-kernel with ioeventfds, so this is a no-op
1032         Ok(())
1033     }
1034 
get_pvclock(&self) -> Result<ClockState>1035     fn get_pvclock(&self) -> Result<ClockState> {
1036         self.get_pvclock_arch()
1037     }
1038 
set_pvclock(&self, state: &ClockState) -> Result<()>1039     fn set_pvclock(&self, state: &ClockState) -> Result<()> {
1040         self.set_pvclock_arch(state)
1041     }
1042 
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>1043     fn add_fd_mapping(
1044         &mut self,
1045         slot: u32,
1046         offset: usize,
1047         size: usize,
1048         fd: &dyn AsRawDescriptor,
1049         fd_offset: u64,
1050         prot: Protection,
1051     ) -> Result<()> {
1052         let mut regions = self.mem_regions.lock();
1053         let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
1054 
1055         match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
1056             Ok(()) => Ok(()),
1057             Err(MmapError::SystemCallFailed(e)) => Err(e),
1058             Err(_) => Err(Error::new(EIO)),
1059         }
1060     }
1061 
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>1062     fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
1063         let mut regions = self.mem_regions.lock();
1064         let region = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?;
1065 
1066         match region.remove_mapping(offset, size) {
1067             Ok(()) => Ok(()),
1068             Err(MmapError::SystemCallFailed(e)) => Err(e),
1069             Err(_) => Err(Error::new(EIO)),
1070         }
1071     }
1072 
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>1073     fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
1074         match event {
1075             BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
1076             BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
1077             BalloonEvent::BalloonTargetReached(_) => Ok(()),
1078         }
1079     }
1080 }
1081 
1082 impl AsRawDescriptor for GeniezoneVm {
as_raw_descriptor(&self) -> RawDescriptor1083     fn as_raw_descriptor(&self) -> RawDescriptor {
1084         self.vm.as_raw_descriptor()
1085     }
1086 }
1087 
1088 struct GeniezoneVcpuSignalHandle {
1089     run_mmap: Arc<MemoryMapping>,
1090 }
1091 
1092 impl VcpuSignalHandleInner for GeniezoneVcpuSignalHandle {
signal_immediate_exit(&self)1093     fn signal_immediate_exit(&self) {
1094         // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the
1095         // `Arc` ensures the mapping still exists while we hold a reference to it.
1096         unsafe {
1097             let run = self.run_mmap.as_ptr() as *mut gzvm_vcpu_run;
1098             (*run).immediate_exit = 1;
1099         }
1100     }
1101 }
1102 
1103 /// A wrapper around using a Geniezone Vcpu.
1104 pub struct GeniezoneVcpu {
1105     vm: SafeDescriptor,
1106     vcpu: SafeDescriptor,
1107     id: usize,
1108     run_mmap: Arc<MemoryMapping>,
1109 }
1110 
1111 impl Vcpu for GeniezoneVcpu {
try_clone(&self) -> Result<Self>1112     fn try_clone(&self) -> Result<Self> {
1113         let vm = self.vm.try_clone()?;
1114         let vcpu = self.vcpu.try_clone()?;
1115 
1116         Ok(GeniezoneVcpu {
1117             vm,
1118             vcpu,
1119             id: self.id,
1120             run_mmap: self.run_mmap.clone(),
1121         })
1122     }
1123 
as_vcpu(&self) -> &dyn Vcpu1124     fn as_vcpu(&self) -> &dyn Vcpu {
1125         self
1126     }
1127 
id(&self) -> usize1128     fn id(&self) -> usize {
1129         self.id
1130     }
1131 
1132     #[allow(clippy::cast_ptr_alignment)]
set_immediate_exit(&self, exit: bool)1133     fn set_immediate_exit(&self, exit: bool) {
1134         // TODO(b/315998194): Add safety comment
1135         #[allow(clippy::undocumented_unsafe_blocks)]
1136         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gzvm_vcpu_run) };
1137         run.immediate_exit = exit as u8;
1138     }
1139 
signal_handle(&self) -> VcpuSignalHandle1140     fn signal_handle(&self) -> VcpuSignalHandle {
1141         VcpuSignalHandle {
1142             inner: Box::new(GeniezoneVcpuSignalHandle {
1143                 run_mmap: self.run_mmap.clone(),
1144             }),
1145         }
1146     }
1147 
on_suspend(&self) -> Result<()>1148     fn on_suspend(&self) -> Result<()> {
1149         Ok(())
1150     }
1151 
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>1152     unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
1153         Err(Error::new(libc::ENXIO))
1154     }
1155 
1156     #[allow(clippy::cast_ptr_alignment)]
1157     // The pointer is page aligned so casting to a different type is well defined, hence the clippy
1158     // allow attribute.
run(&mut self) -> Result<VcpuExit>1159     fn run(&mut self) -> Result<VcpuExit> {
1160         // SAFETY:
1161         // Safe because we know that our file is a VCPU fd and we verify the return result.
1162         let ret = unsafe { ioctl_with_val(self, GZVM_RUN, self.run_mmap.as_ptr() as u64) };
1163         if ret != 0 {
1164             return errno_result();
1165         }
1166 
1167         // SAFETY:
1168         // Safe because we know we mapped enough memory to hold the gzvm_vcpu_run struct because the
1169         // kernel told us how large it was.
1170         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gzvm_vcpu_run) };
1171 
1172         match run.exit_reason {
1173             GZVM_EXIT_MMIO => Ok(VcpuExit::Mmio),
1174             GZVM_EXIT_IRQ => Ok(VcpuExit::IrqWindowOpen),
1175             GZVM_EXIT_HVC => Ok(VcpuExit::Hypercall),
1176             GZVM_EXIT_EXCEPTION => Err(Error::new(EINVAL)),
1177             GZVM_EXIT_DEBUG => Ok(VcpuExit::Debug),
1178             GZVM_EXIT_FAIL_ENTRY => {
1179                 // SAFETY:
1180                 // Safe because the exit_reason (which comes from the kernel) told us which
1181                 // union field to use.
1182                 let hardware_entry_failure_reason = unsafe {
1183                     run.__bindgen_anon_1
1184                         .fail_entry
1185                         .hardware_entry_failure_reason
1186                 };
1187                 Ok(VcpuExit::FailEntry {
1188                     hardware_entry_failure_reason,
1189                 })
1190             }
1191             GZVM_EXIT_SYSTEM_EVENT => {
1192                 // SAFETY:
1193                 // Safe because the exit_reason (which comes from the kernel) told us which
1194                 // union field to use.
1195                 let event_type = unsafe { run.__bindgen_anon_1.system_event.type_ };
1196                 match event_type {
1197                     GZVM_SYSTEM_EVENT_SHUTDOWN => Ok(VcpuExit::SystemEventShutdown),
1198                     GZVM_SYSTEM_EVENT_RESET => Ok(VcpuExit::SystemEventReset),
1199                     GZVM_SYSTEM_EVENT_CRASH => Ok(VcpuExit::SystemEventCrash),
1200                     _ => {
1201                         error!("Unknown GZVM system event {}", event_type);
1202                         Err(Error::new(EINVAL))
1203                     }
1204                 }
1205             }
1206             GZVM_EXIT_INTERNAL_ERROR => Ok(VcpuExit::InternalError),
1207             GZVM_EXIT_SHUTDOWN => Ok(VcpuExit::Shutdown(Ok(()))),
1208             GZVM_EXIT_UNKNOWN => panic!("unknown gzvm exit reason\n"),
1209             r => panic!("unknown gzvm exit reason: {}", r),
1210         }
1211     }
1212 
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()>1213     fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
1214         // SAFETY:
1215         // Safe because we know we mapped enough memory to hold the gzvm_vcpu_run struct because the
1216         // kernel told us how large it was. The pointer is page aligned so casting to a different
1217         // type is well defined, hence the clippy allow attribute.
1218         let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gzvm_vcpu_run) };
1219 
1220         // Verify that the handler is called in the right context.
1221         assert!(run.exit_reason == GZVM_EXIT_MMIO);
1222         // SAFETY:
1223         // Safe because the exit_reason (which comes from the kernel) told us which
1224         // union field to use.
1225         let mmio = unsafe { &mut run.__bindgen_anon_1.mmio };
1226         let address = mmio.phys_addr;
1227         let data = &mut mmio.data[..mmio.size as usize];
1228 
1229         if mmio.is_write != 0 {
1230             handle_fn(IoParams {
1231                 address,
1232                 operation: IoOperation::Write(data),
1233             })
1234         } else {
1235             handle_fn(IoParams {
1236                 address,
1237                 operation: IoOperation::Read(data),
1238             })
1239         }
1240     }
1241 
handle_io(&self, _handle_fn: &mut dyn FnMut(IoParams)) -> Result<()>1242     fn handle_io(&self, _handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
1243         Err(Error::new(EINVAL))
1244     }
1245 }
1246 
1247 impl AsRawDescriptor for GeniezoneVcpu {
as_raw_descriptor(&self) -> RawDescriptor1248     fn as_raw_descriptor(&self) -> RawDescriptor {
1249         self.vcpu.as_raw_descriptor()
1250     }
1251 }
1252