// Copyright 2023 The ChromiumOS Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] mod aarch64; mod gunyah_sys; use std::cmp::Reverse; use std::collections::BTreeMap; use std::collections::BinaryHeap; use std::collections::HashSet; use std::ffi::CString; use std::fs::File; use std::mem::size_of; use std::os::raw::c_ulong; use std::os::unix::prelude::OsStrExt; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use base::errno_result; use base::info; use base::ioctl; use base::ioctl_with_ref; use base::ioctl_with_val; use base::pagesize; use base::warn; use base::Error; use base::FromRawDescriptor; use base::MemoryMapping; use base::MemoryMappingBuilder; use base::MmapError; use base::RawDescriptor; use gunyah_sys::*; use libc::open; use libc::EFAULT; use libc::EINVAL; use libc::EIO; use libc::ENOENT; use libc::ENOSPC; use libc::ENOTSUP; use libc::EOVERFLOW; use libc::O_CLOEXEC; use libc::O_RDWR; use sync::Mutex; use vm_memory::MemoryRegionPurpose; use crate::*; pub struct Gunyah { gunyah: SafeDescriptor, } impl AsRawDescriptor for Gunyah { fn as_raw_descriptor(&self) -> RawDescriptor { self.gunyah.as_raw_descriptor() } } impl Gunyah { pub fn new_with_path(device_path: &Path) -> Result { let c_path = CString::new(device_path.as_os_str().as_bytes()).unwrap(); // SAFETY: // Open calls are safe because we give a nul-terminated string and verify the result. let ret = unsafe { open(c_path.as_ptr(), O_RDWR | O_CLOEXEC) }; if ret < 0 { return errno_result(); } Ok(Gunyah { // SAFETY: // Safe because we verify that ret is valid and we own the fd. gunyah: unsafe { SafeDescriptor::from_raw_descriptor(ret) }, }) } pub fn new() -> Result { Gunyah::new_with_path(&PathBuf::from("/dev/gunyah")) } } impl Hypervisor for Gunyah { fn try_clone(&self) -> Result where Self: Sized, { Ok(Gunyah { gunyah: self.gunyah.try_clone()?, }) } fn check_capability(&self, cap: HypervisorCap) -> bool { match cap { HypervisorCap::UserMemory => true, HypervisorCap::ArmPmuV3 => false, HypervisorCap::ImmediateExit => true, HypervisorCap::StaticSwiotlbAllocationRequired => true, HypervisorCap::HypervisorInitializedBootContext => true, HypervisorCap::S390UserSigp | HypervisorCap::TscDeadlineTimer => false, #[cfg(target_arch = "x86_64")] HypervisorCap::Xcrs | HypervisorCap::CalibratedTscLeafRequired => false, } } } unsafe fn android_lend_user_memory_region( vm: &SafeDescriptor, slot: MemSlot, read_only: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()> { let mut flags = 0; flags |= GH_MEM_ALLOW_READ | GH_MEM_ALLOW_EXEC; if !read_only { flags |= GH_MEM_ALLOW_WRITE; } let region = gh_userspace_memory_region { label: slot, flags, guest_phys_addr: guest_addr, memory_size, userspace_addr: userspace_addr as u64, }; let ret = ioctl_with_ref(vm, GH_VM_ANDROID_LEND_USER_MEM, ®ion); if ret == 0 { Ok(()) } else { errno_result() } } // Wrapper around GH_SET_USER_MEMORY_REGION ioctl, which creates, modifies, or deletes a mapping // from guest physical to host user pages. // // SAFETY: // Safe when the guest regions are guaranteed not to overlap. unsafe fn set_user_memory_region( vm: &SafeDescriptor, slot: MemSlot, read_only: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()> { let mut flags = 0; flags |= GH_MEM_ALLOW_READ | GH_MEM_ALLOW_EXEC; if !read_only { flags |= GH_MEM_ALLOW_WRITE; } let region = gh_userspace_memory_region { label: slot, flags, guest_phys_addr: guest_addr, memory_size, userspace_addr: userspace_addr as u64, }; let ret = ioctl_with_ref(vm, GH_VM_SET_USER_MEM_REGION, ®ion); if ret == 0 { Ok(()) } else { errno_result() } } #[derive(PartialEq, Eq, Hash)] pub struct GunyahIrqRoute { irq: u32, level: bool, } pub struct GunyahVm { gh: Gunyah, vm: SafeDescriptor, guest_mem: GuestMemory, mem_regions: Arc, GuestAddress)>>>, /// A min heap of MemSlot numbers that were used and then removed and can now be re-used mem_slot_gaps: Arc>>>, routes: Arc>>, hv_cfg: crate::Config, } impl AsRawDescriptor for GunyahVm { fn as_raw_descriptor(&self) -> RawDescriptor { self.vm.as_raw_descriptor() } } impl GunyahVm { pub fn new(gh: &Gunyah, guest_mem: GuestMemory, cfg: Config) -> Result { // SAFETY: // Safe because we know gunyah is a real gunyah fd as this module is the only one that can // make Gunyah objects. let ret = unsafe { ioctl_with_val(gh, GH_CREATE_VM, 0 as c_ulong) }; if ret < 0 { return errno_result(); } // SAFETY: // Safe because we verify that ret is valid and we own the fd. let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) }; for region in guest_mem.regions() { let lend = if cfg.protection_type.isolates_memory() { match region.options.purpose { MemoryRegionPurpose::GuestMemoryRegion => true, #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] MemoryRegionPurpose::ProtectedFirmwareRegion => true, #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] MemoryRegionPurpose::StaticSwiotlbRegion => false, } } else { false }; if lend { // SAFETY: // Safe because the guest regions are guarnteed not to overlap. unsafe { android_lend_user_memory_region( &vm_descriptor, region.index as MemSlot, false, region.guest_addr.offset(), region.size.try_into().unwrap(), region.host_addr as *mut u8, )?; } } else { // SAFETY: // Safe because the guest regions are guarnteed not to overlap. unsafe { set_user_memory_region( &vm_descriptor, region.index as MemSlot, false, region.guest_addr.offset(), region.size.try_into().unwrap(), region.host_addr as *mut u8, )?; } } } Ok(GunyahVm { gh: gh.try_clone()?, vm: vm_descriptor, guest_mem, mem_regions: Arc::new(Mutex::new(BTreeMap::new())), mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())), routes: Arc::new(Mutex::new(HashSet::new())), hv_cfg: cfg, }) } fn create_vcpu(&self, id: usize) -> Result { let gh_fn_vcpu_arg = gh_fn_vcpu_arg { id: id.try_into().unwrap(), }; let function_desc = gh_fn_desc { type_: GH_FN_VCPU, arg_size: size_of::() as u32, // Safe because kernel is expecting pointer with non-zero arg_size arg: &gh_fn_vcpu_arg as *const gh_fn_vcpu_arg as u64, }; // SAFETY: // Safe because we know that our file is a VM fd and we verify the return result. let fd = unsafe { ioctl_with_ref(self, GH_VM_ADD_FUNCTION, &function_desc) }; if fd < 0 { return errno_result(); } // SAFETY: // Wrap the vcpu now in case the following ? returns early. This is safe because we verified // the value of the fd and we own the fd. let vcpu = unsafe { File::from_raw_descriptor(fd) }; // SAFETY: // Safe because we know this is a Gunyah VCPU let res = unsafe { ioctl(&vcpu, GH_VCPU_MMAP_SIZE) }; if res < 0 { return errno_result(); } let run_mmap_size = res as usize; let run_mmap = MemoryMappingBuilder::new(run_mmap_size) .from_file(&vcpu) .build() .map_err(|_| Error::new(ENOSPC))?; Ok(GunyahVcpu { vm: self.vm.try_clone()?, vcpu, id, run_mmap: Arc::new(run_mmap), }) } pub fn register_irqfd(&self, label: u32, evt: &Event, level: bool) -> Result<()> { let gh_fn_irqfd_arg = gh_fn_irqfd_arg { fd: evt.as_raw_descriptor() as u32, label, flags: if level { GH_IRQFD_LEVEL } else { 0 }, ..Default::default() }; let function_desc = gh_fn_desc { type_: GH_FN_IRQFD, arg_size: size_of::() as u32, // SAFETY: // Safe because kernel is expecting pointer with non-zero arg_size arg: &gh_fn_irqfd_arg as *const gh_fn_irqfd_arg as u64, }; // SAFETY: safe because the return value is checked. let ret = unsafe { ioctl_with_ref(self, GH_VM_ADD_FUNCTION, &function_desc) }; if ret == 0 { self.routes .lock() .insert(GunyahIrqRoute { irq: label, level }); Ok(()) } else { errno_result() } } pub fn unregister_irqfd(&self, label: u32, _evt: &Event) -> Result<()> { let gh_fn_irqfd_arg = gh_fn_irqfd_arg { label, ..Default::default() }; let function_desc = gh_fn_desc { type_: GH_FN_IRQFD, arg_size: size_of::() as u32, // Safe because kernel is expecting pointer with non-zero arg_size arg: &gh_fn_irqfd_arg as *const gh_fn_irqfd_arg as u64, }; // SAFETY: safe because memory is not modified and the return value is checked. let ret = unsafe { ioctl_with_ref(self, GH_VM_REMOVE_FUNCTION, &function_desc) }; if ret == 0 { Ok(()) } else { errno_result() } } pub fn try_clone(&self) -> Result where Self: Sized, { Ok(GunyahVm { gh: self.gh.try_clone()?, vm: self.vm.try_clone()?, guest_mem: self.guest_mem.clone(), mem_regions: self.mem_regions.clone(), mem_slot_gaps: self.mem_slot_gaps.clone(), routes: self.routes.clone(), hv_cfg: self.hv_cfg, }) } fn set_dtb_config(&self, fdt_address: GuestAddress, fdt_size: usize) -> Result<()> { let dtb_config = gh_vm_dtb_config { guest_phys_addr: fdt_address.offset(), size: fdt_size.try_into().unwrap(), }; // SAFETY: // Safe because we know this is a Gunyah VM let ret = unsafe { ioctl_with_ref(self, GH_VM_SET_DTB_CONFIG, &dtb_config) }; if ret == 0 { Ok(()) } else { errno_result() } } fn set_protected_vm_firmware_ipa(&self, fw_addr: GuestAddress, fw_size: u64) -> Result<()> { let fw_config = gh_vm_firmware_config { guest_phys_addr: fw_addr.offset(), size: fw_size, }; // SAFETY: // Safe because we know this is a Gunyah VM let ret = unsafe { ioctl_with_ref(self, GH_VM_ANDROID_SET_FW_CONFIG, &fw_config) }; if ret == 0 { Ok(()) } else { errno_result() } } fn start(&self) -> Result<()> { // SAFETY: safe because memory is not modified and the return value is checked. let ret = unsafe { ioctl(self, GH_VM_START) }; if ret == 0 { Ok(()) } else { errno_result() } } } impl Vm for GunyahVm { fn try_clone(&self) -> Result where Self: Sized, { Ok(GunyahVm { gh: self.gh.try_clone()?, vm: self.vm.try_clone()?, guest_mem: self.guest_mem.clone(), mem_regions: self.mem_regions.clone(), mem_slot_gaps: self.mem_slot_gaps.clone(), routes: self.routes.clone(), hv_cfg: self.hv_cfg, }) } fn check_capability(&self, c: VmCap) -> bool { match c { VmCap::DirtyLog => false, // Strictly speaking, Gunyah supports pvclock, but Gunyah takes care // of it and crosvm doesn't need to do anything for it VmCap::PvClock => false, VmCap::Protected => true, VmCap::EarlyInitCpuid => false, #[cfg(target_arch = "x86_64")] VmCap::BusLockDetect => false, VmCap::ReadOnlyMemoryRegion => false, VmCap::MemNoncoherentDma => false, } } fn get_guest_phys_addr_bits(&self) -> u8 { 40 } fn get_memory(&self) -> &GuestMemory { &self.guest_mem } fn add_memory_region( &mut self, guest_addr: GuestAddress, mem_region: Box, read_only: bool, _log_dirty_pages: bool, _cache: MemCacheType, ) -> Result { let pgsz = pagesize() as u64; // Gunyah require to set the user memory region with page size aligned size. Safe to extend // the mem.size() to be page size aligned because the mmap will round up the size to be // page size aligned if it is not. let size = (mem_region.size() as u64 + pgsz - 1) / pgsz * pgsz; let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?; if self.guest_mem.range_overlap(guest_addr, end_addr) { return Err(Error::new(ENOSPC)); } let mut regions = self.mem_regions.lock(); let mut gaps = self.mem_slot_gaps.lock(); let slot = match gaps.pop() { Some(gap) => gap.0, None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot, }; // SAFETY: safe because memory is not modified and the return value is checked. let res = unsafe { set_user_memory_region( &self.vm, slot, read_only, guest_addr.offset(), size, mem_region.as_ptr(), ) }; if let Err(e) = res { gaps.push(Reverse(slot)); return Err(e); } regions.insert(slot, (mem_region, guest_addr)); Ok(slot) } fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> { let mut regions = self.mem_regions.lock(); let (mem, _) = regions.get_mut(&slot).ok_or_else(|| Error::new(ENOENT))?; mem.msync(offset, size).map_err(|err| match err { MmapError::InvalidAddress => Error::new(EFAULT), MmapError::NotPageAligned => Error::new(EINVAL), MmapError::SystemCallFailed(e) => e, _ => Error::new(EIO), }) } fn madvise_pageout_memory_region( &mut self, _slot: MemSlot, _offset: usize, _size: usize, ) -> Result<()> { Err(Error::new(ENOTSUP)) } fn madvise_remove_memory_region( &mut self, _slot: MemSlot, _offset: usize, _size: usize, ) -> Result<()> { Err(Error::new(ENOTSUP)) } fn remove_memory_region(&mut self, _slot: MemSlot) -> Result> { unimplemented!() } fn create_device(&self, _kind: DeviceKind) -> Result { unimplemented!() } fn get_dirty_log(&self, _slot: MemSlot, _dirty_log: &mut [u8]) -> Result<()> { unimplemented!() } fn register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()> { let (do_datamatch, datamatch_value, datamatch_len) = match datamatch { Datamatch::AnyLength => (false, 0, 0), Datamatch::U8(v) => match v { Some(u) => (true, u as u64, 1), None => (false, 0, 1), }, Datamatch::U16(v) => match v { Some(u) => (true, u as u64, 2), None => (false, 0, 2), }, Datamatch::U32(v) => match v { Some(u) => (true, u as u64, 4), None => (false, 0, 4), }, Datamatch::U64(v) => match v { Some(u) => (true, u, 8), None => (false, 0, 8), }, }; let mut flags = 0; if do_datamatch { flags |= 1 << GH_IOEVENTFD_DATAMATCH; } let maddr = if let IoEventAddress::Mmio(maddr) = addr { maddr } else { todo!() }; let gh_fn_ioeventfd_arg = gh_fn_ioeventfd_arg { fd: evt.as_raw_descriptor(), datamatch: datamatch_value, len: datamatch_len, addr: maddr, flags, ..Default::default() }; let function_desc = gh_fn_desc { type_: GH_FN_IOEVENTFD, arg_size: size_of::() as u32, arg: &gh_fn_ioeventfd_arg as *const gh_fn_ioeventfd_arg as u64, }; // SAFETY: safe because memory is not modified and the return value is checked. let ret = unsafe { ioctl_with_ref(self, GH_VM_ADD_FUNCTION, &function_desc) }; if ret == 0 { Ok(()) } else { errno_result() } } fn unregister_ioevent( &mut self, _evt: &Event, addr: IoEventAddress, _datamatch: Datamatch, ) -> Result<()> { let maddr = if let IoEventAddress::Mmio(maddr) = addr { maddr } else { todo!() }; let gh_fn_ioeventfd_arg = gh_fn_ioeventfd_arg { addr: maddr, ..Default::default() }; let function_desc = gh_fn_desc { type_: GH_FN_IOEVENTFD, arg_size: size_of::() as u32, arg: &gh_fn_ioeventfd_arg as *const gh_fn_ioeventfd_arg as u64, }; // SAFETY: safe because memory is not modified and the return value is checked. let ret = unsafe { ioctl_with_ref(self, GH_VM_REMOVE_FUNCTION, &function_desc) }; if ret == 0 { Ok(()) } else { errno_result() } } fn handle_io_events(&self, _addr: IoEventAddress, _data: &[u8]) -> Result<()> { Ok(()) } fn get_pvclock(&self) -> Result { unimplemented!() } fn set_pvclock(&self, _state: &ClockState) -> Result<()> { unimplemented!() } fn add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()> { let mut regions = self.mem_regions.lock(); let (region, _) = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?; match region.add_fd_mapping(offset, size, fd, fd_offset, prot) { Ok(()) => Ok(()), Err(MmapError::SystemCallFailed(e)) => Err(e), Err(_) => Err(Error::new(EIO)), } } fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> { let mut regions = self.mem_regions.lock(); let (region, _) = regions.get_mut(&slot).ok_or_else(|| Error::new(EINVAL))?; match region.remove_mapping(offset, size) { Ok(()) => Ok(()), Err(MmapError::SystemCallFailed(e)) => Err(e), Err(_) => Err(Error::new(EIO)), } } fn handle_balloon_event(&mut self, _event: BalloonEvent) -> Result<()> { unimplemented!() } } const GH_RM_EXIT_TYPE_VM_EXIT: u16 = 0; const GH_RM_EXIT_TYPE_PSCI_POWER_OFF: u16 = 1; const GH_RM_EXIT_TYPE_PSCI_SYSTEM_RESET: u16 = 2; const GH_RM_EXIT_TYPE_PSCI_SYSTEM_RESET2: u16 = 3; const GH_RM_EXIT_TYPE_WDT_BITE: u16 = 4; const GH_RM_EXIT_TYPE_HYP_ERROR: u16 = 5; const GH_RM_EXIT_TYPE_ASYNC_EXT_ABORT: u16 = 6; const GH_RM_EXIT_TYPE_VM_FORCE_STOPPED: u16 = 7; pub struct GunyahVcpu { vm: SafeDescriptor, vcpu: File, id: usize, run_mmap: Arc, } struct GunyahVcpuSignalHandle { run_mmap: Arc, } impl VcpuSignalHandleInner for GunyahVcpuSignalHandle { fn signal_immediate_exit(&self) { // SAFETY: we ensure `run_mmap` is a valid mapping of `kvm_run` at creation time, and the // `Arc` ensures the mapping still exists while we hold a reference to it. unsafe { let run = self.run_mmap.as_ptr() as *mut gh_vcpu_run; (*run).immediate_exit = 1; } } } impl AsRawDescriptor for GunyahVcpu { fn as_raw_descriptor(&self) -> RawDescriptor { self.vcpu.as_raw_descriptor() } } impl Vcpu for GunyahVcpu { fn try_clone(&self) -> Result where Self: Sized, { let vcpu = self.vcpu.try_clone()?; Ok(GunyahVcpu { vm: self.vm.try_clone()?, vcpu, id: self.id, run_mmap: self.run_mmap.clone(), }) } fn as_vcpu(&self) -> &dyn Vcpu { self } fn run(&mut self) -> Result { // SAFETY: // Safe because we know our file is a VCPU fd and we verify the return result. let ret = unsafe { ioctl(self, GH_VCPU_RUN) }; if ret != 0 { return errno_result(); } // SAFETY: // Safe because we know we mapped enough memory to hold the gh_vcpu_run struct // because the kernel told us how large it is. let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gh_vcpu_run) }; match run.exit_reason { GH_VCPU_EXIT_MMIO => Ok(VcpuExit::Mmio), GH_VCPU_EXIT_STATUS => { // SAFETY: // Safe because the exit_reason (which comes from the kernel) told us which // union field to use. let status = unsafe { &mut run.__bindgen_anon_1.status }; match status.status { GH_VM_STATUS_GH_VM_STATUS_LOAD_FAILED => Ok(VcpuExit::FailEntry { hardware_entry_failure_reason: 0, }), GH_VM_STATUS_GH_VM_STATUS_CRASHED => Ok(VcpuExit::SystemEventCrash), GH_VM_STATUS_GH_VM_STATUS_EXITED => { info!("exit type {}", status.exit_info.type_); match status.exit_info.type_ { GH_RM_EXIT_TYPE_VM_EXIT => Ok(VcpuExit::SystemEventShutdown), GH_RM_EXIT_TYPE_PSCI_POWER_OFF => Ok(VcpuExit::SystemEventShutdown), GH_RM_EXIT_TYPE_PSCI_SYSTEM_RESET => Ok(VcpuExit::SystemEventReset), GH_RM_EXIT_TYPE_PSCI_SYSTEM_RESET2 => Ok(VcpuExit::SystemEventReset), GH_RM_EXIT_TYPE_WDT_BITE => Ok(VcpuExit::SystemEventCrash), GH_RM_EXIT_TYPE_HYP_ERROR => Ok(VcpuExit::SystemEventCrash), GH_RM_EXIT_TYPE_ASYNC_EXT_ABORT => Ok(VcpuExit::SystemEventCrash), GH_RM_EXIT_TYPE_VM_FORCE_STOPPED => Ok(VcpuExit::SystemEventShutdown), r => { warn!("Unknown exit type: {}", r); Err(Error::new(EINVAL)) } } } r => { warn!("Unknown vm status: {}", r); Err(Error::new(EINVAL)) } } } r => { warn!("unknown gh exit reason: {}", r); Err(Error::new(EINVAL)) } } } fn id(&self) -> usize { self.id } fn set_immediate_exit(&self, exit: bool) { // SAFETY: // Safe because we know we mapped enough memory to hold the kvm_run struct because the // kernel told us how large it was. The pointer is page aligned so casting to a different // type is well defined, hence the clippy allow attribute. let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gh_vcpu_run) }; run.immediate_exit = exit.into(); } fn signal_handle(&self) -> VcpuSignalHandle { VcpuSignalHandle { inner: Box::new(GunyahVcpuSignalHandle { run_mmap: self.run_mmap.clone(), }), } } fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> { // SAFETY: // Safe because we know we mapped enough memory to hold the gh_vcpu_run struct because the // kernel told us how large it was. The pointer is page aligned so casting to a different // type is well defined let run = unsafe { &mut *(self.run_mmap.as_ptr() as *mut gh_vcpu_run) }; // Verify that the handler is called in the right context. assert!(run.exit_reason == GH_VCPU_EXIT_MMIO); // SAFETY: // Safe because the exit_reason (which comes from the kernel) told us which // union field to use. let mmio = unsafe { &mut run.__bindgen_anon_1.mmio }; let address = mmio.phys_addr; let data = &mut mmio.data[..mmio.len as usize]; if mmio.is_write != 0 { handle_fn(IoParams { address, operation: IoOperation::Write(data), }) } else { handle_fn(IoParams { address, operation: IoOperation::Read(data), }) } } fn handle_io(&self, _handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> { unreachable!() } fn on_suspend(&self) -> Result<()> { Ok(()) } unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> { unimplemented!() } }