xref: /aosp_15_r20/external/crosvm/hypervisor/src/whpx/vm.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use core::ffi::c_void;
6 use std::cmp::Reverse;
7 use std::collections::BTreeMap;
8 use std::collections::BinaryHeap;
9 use std::convert::TryInto;
10 use std::sync::Arc;
11 
12 use base::error;
13 use base::info;
14 use base::pagesize;
15 use base::AsRawDescriptor;
16 use base::Error;
17 use base::Event;
18 use base::MappedRegion;
19 use base::MmapError;
20 use base::Protection;
21 use base::RawDescriptor;
22 use base::Result;
23 use base::SafeDescriptor;
24 use base::SendTube;
25 use fnv::FnvHashMap;
26 use libc::EEXIST;
27 use libc::EFAULT;
28 use libc::EINVAL;
29 use libc::EIO;
30 use libc::ENODEV;
31 use libc::ENOENT;
32 use libc::ENOSPC;
33 use libc::ENOTSUP;
34 use libc::EOVERFLOW;
35 use sync::Mutex;
36 use vm_memory::GuestAddress;
37 use vm_memory::GuestMemory;
38 use winapi::shared::winerror::ERROR_BUSY;
39 use winapi::shared::winerror::ERROR_SUCCESS;
40 use winapi::um::memoryapi::OfferVirtualMemory;
41 use winapi::um::memoryapi::ReclaimVirtualMemory;
42 use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43 use winapi::um::winnt::RtlZeroMemory;
44 
45 use super::types::*;
46 use super::*;
47 use crate::host_phys_addr_bits;
48 use crate::whpx::whpx_sys::*;
49 use crate::BalloonEvent;
50 use crate::ClockState;
51 use crate::Datamatch;
52 use crate::DeliveryMode;
53 use crate::DestinationMode;
54 use crate::DeviceKind;
55 use crate::IoEventAddress;
56 use crate::LapicState;
57 use crate::MemCacheType;
58 use crate::MemSlot;
59 use crate::TriggerMode;
60 use crate::VcpuX86_64;
61 use crate::Vm;
62 use crate::VmCap;
63 use crate::VmX86_64;
64 
65 pub struct WhpxVm {
66     whpx: Whpx,
67     // reference counted, since we need to implement try_clone or some variation.
68     // There is only ever 1 create/1 delete partition unlike dup/close handle variations.
69     vm_partition: Arc<SafePartition>,
70     guest_mem: GuestMemory,
71     mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
72     /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
73     mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
74     // WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
75     //   1. All ioevents are registered during device setup, and thus can be cloned when the vm is
76     //      cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
77     //      vcpu thread easier because no locks will need to be acquired.
78     //   2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
79     //      will make this faster.
80     //   3. We only ever register one eventfd to each address. This simplifies our data structure.
81     ioevents: FnvHashMap<IoEventAddress, Event>,
82     // Tube to send events to control.
83     vm_evt_wrtube: Option<SendTube>,
84 }
85 
86 impl WhpxVm {
new( whpx: &Whpx, cpu_count: usize, guest_mem: GuestMemory, cpuid: CpuId, apic_emulation: bool, vm_evt_wrtube: Option<SendTube>, ) -> WhpxResult<WhpxVm>87     pub fn new(
88         whpx: &Whpx,
89         cpu_count: usize,
90         guest_mem: GuestMemory,
91         cpuid: CpuId,
92         apic_emulation: bool,
93         vm_evt_wrtube: Option<SendTube>,
94     ) -> WhpxResult<WhpxVm> {
95         let partition = SafePartition::new()?;
96         // setup partition defaults.
97         let mut property: WHV_PARTITION_PROPERTY = Default::default();
98         property.ProcessorCount = cpu_count as u32;
99         // safe because we own this partition, and the partition property is allocated on the stack.
100         check_whpx!(unsafe {
101             WHvSetPartitionProperty(
102                 partition.partition,
103                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
104                 &property as *const _ as *const c_void,
105                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
106             )
107         })
108         .map_err(WhpxError::SetProcessorCount)?;
109 
110         // Pre-set any cpuid results in cpuid.
111         let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
112             .cpu_id_entries
113             .iter()
114             .map(WHV_X64_CPUID_RESULT::from)
115             .collect();
116 
117         // Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
118         cpuid_results.push(WHV_X64_CPUID_RESULT {
119             Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
120             Reserved: [0u32; 3],
121             // HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
122             Eax: HYPERV_CPUID_MIN,
123             Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
124             Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
125             Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
126         });
127 
128         // HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
129         cpuid_results.push(WHV_X64_CPUID_RESULT {
130             Function: HYPERV_CPUID_FEATURES,
131             Reserved: [0u32; 3],
132             // We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
133             // TSC scaling/offseting is handled in hardware, not the guest.
134             Eax: HV_ACCESS_FREQUENCY_MSRS
135                 | HV_ACCESS_TSC_INVARIANT
136                 | HV_MSR_REFERENCE_TSC_AVAILABLE,
137             Ebx: 0,
138             Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
139             Ecx: 0,
140         });
141 
142         // safe because we own this partition, and the cpuid_results vec is local to this function.
143         check_whpx!(unsafe {
144             WHvSetPartitionProperty(
145                 partition.partition,
146                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
147                 cpuid_results.as_ptr() as *const _ as *const c_void,
148                 (std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
149             )
150         })
151         .map_err(WhpxError::SetCpuidResultList)?;
152 
153         // Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
154         // We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
155         // that return different results per-cpu.
156         let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
157         // safe because we own this partition, and the exit_list vec local to this function.
158         check_whpx!(unsafe {
159             WHvSetPartitionProperty(
160                 partition.partition,
161                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
162                 exit_list.as_ptr() as *const _ as *const c_void,
163                 (std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
164             )
165         })
166         .map_err(WhpxError::SetCpuidExitList)?;
167 
168         // Setup exits for CPUID instruction.
169         let mut property: WHV_PARTITION_PROPERTY = Default::default();
170         // safe because we own this partition, and the partition property is allocated on the stack.
171         unsafe {
172             property
173                 .ExtendedVmExits
174                 .__bindgen_anon_1
175                 .set_X64CpuidExit(1);
176             // X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
177             // MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
178             // related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
179             // fault for any unhandled MSR accesses.
180             property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
181         }
182         // safe because we own this partition, and the partition property is allocated on the stack.
183         check_whpx!(unsafe {
184             WHvSetPartitionProperty(
185                 partition.partition,
186                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
187                 &property as *const _ as *const c_void,
188                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
189             )
190         })
191         .map_err(WhpxError::SetExtendedVmExits)?;
192 
193         if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
194             return Err(WhpxError::LocalApicEmulationNotSupported);
195         }
196 
197         // Setup apic emulation mode
198         let mut property: WHV_PARTITION_PROPERTY = Default::default();
199         property.LocalApicEmulationMode = if apic_emulation {
200             // TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
201             // enable it if it is.
202             WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
203         } else {
204             WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
205         };
206 
207         // safe because we own this partition, and the partition property is allocated on the stack.
208         check_whpx!(unsafe {
209             WHvSetPartitionProperty(
210                 partition.partition,
211                 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
212                 &property as *const _ as *const c_void,
213                 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
214             )
215         })
216         .map_err(WhpxError::SetLocalApicEmulationMode)?;
217 
218         // safe because we own this partition
219         check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
220             .map_err(WhpxError::SetupPartition)?;
221 
222         for region in guest_mem.regions() {
223             unsafe {
224                 // Safe because the guest regions are guaranteed not to overlap.
225                 set_user_memory_region(
226                     &partition,
227                     false, // read_only
228                     false, // track dirty pages
229                     region.guest_addr.offset(),
230                     region.size as u64,
231                     region.host_addr as *mut u8,
232                 )
233             }
234             .map_err(WhpxError::MapGpaRange)?;
235         }
236 
237         Ok(WhpxVm {
238             whpx: whpx.clone(),
239             vm_partition: Arc::new(partition),
240             guest_mem,
241             mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
242             mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
243             ioevents: FnvHashMap::default(),
244             vm_evt_wrtube,
245         })
246     }
247 
248     /// Get the current state of the specified VCPU's local APIC
get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>249     pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
250         let buffer = WhpxLapicState { regs: [0u32; 1024] };
251         let mut written_size = 0u32;
252         let size = std::mem::size_of::<WhpxLapicState>();
253 
254         check_whpx!(unsafe {
255             WHvGetVirtualProcessorInterruptControllerState(
256                 self.vm_partition.partition,
257                 vcpu_id as u32,
258                 buffer.regs.as_ptr() as *mut c_void,
259                 size as u32,
260                 &mut written_size,
261             )
262         })?;
263 
264         Ok(LapicState::from(&buffer))
265     }
266 
267     /// Set the current state of the specified VCPU's local APIC
set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>268     pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
269         let buffer = WhpxLapicState::from(state);
270         check_whpx!(unsafe {
271             WHvSetVirtualProcessorInterruptControllerState(
272                 self.vm_partition.partition,
273                 vcpu_id as u32,
274                 buffer.regs.as_ptr() as *mut c_void,
275                 std::mem::size_of::<WhpxLapicState>() as u32,
276             )
277         })?;
278         Ok(())
279     }
280 
281     /// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
282     /// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
request_interrupt( &self, vector: u8, dest_id: u8, dest_mode: DestinationMode, trigger: TriggerMode, delivery: DeliveryMode, ) -> Result<()>283     pub fn request_interrupt(
284         &self,
285         vector: u8,
286         dest_id: u8,
287         dest_mode: DestinationMode,
288         trigger: TriggerMode,
289         delivery: DeliveryMode,
290     ) -> Result<()> {
291         // The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
292         let mut interrupt = WHV_INTERRUPT_CONTROL {
293             Destination: dest_id as u32,
294             Vector: vector as u32,
295             ..Default::default()
296         };
297         interrupt.set_DestinationMode(match dest_mode {
298             DestinationMode::Physical => {
299                 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
300             }
301             DestinationMode::Logical => {
302                 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
303             }
304         } as u64);
305         interrupt.set_TriggerMode(match trigger {
306             TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
307             TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
308         } as u64);
309         interrupt.set_Type(match delivery {
310             DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
311             DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
312             DeliveryMode::SMI => {
313                 error!("WHPX does not support requesting an SMI");
314                 return Err(Error::new(ENOTSUP));
315             }
316             DeliveryMode::RemoteRead => {
317                 // This is also no longer supported by intel.
318                 error!("Remote Read interrupts are not supported by WHPX");
319                 return Err(Error::new(ENOTSUP));
320             }
321             DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
322             DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
323             DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
324             DeliveryMode::External => {
325                 error!("WHPX does not support requesting an external interrupt");
326                 return Err(Error::new(ENOTSUP));
327             }
328         } as u64);
329 
330         check_whpx!(unsafe {
331             WHvRequestInterrupt(
332                 self.vm_partition.partition,
333                 &interrupt,
334                 std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
335             )
336         })
337     }
338 
339     /// In order to fully unmap a memory range such that the host can reclaim the memory,
340     /// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
341     /// in the memory.
342     ///
343     /// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
344     /// uses when memory is in demand.
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>345     fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
346         info!(
347             "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
348             guest_address, size
349         );
350         // Safe because WHPX does proper error checking, even if an out-of-bounds address is
351         // provided.
352         unsafe {
353             check_whpx!(WHvUnmapGpaRange(
354                 self.vm_partition.partition,
355                 guest_address.offset(),
356                 size,
357             ))?;
358         }
359 
360         let host_address = self
361             .guest_mem
362             .get_host_address(guest_address)
363             .map_err(|_| Error::new(1))? as *mut c_void;
364 
365         // Safe because we have just successfully unmapped this range from the
366         // guest partition, so we know it's unused.
367         let result =
368             unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
369 
370         if result != ERROR_SUCCESS {
371             let err = Error::new(result);
372             error!("Freeing memory failed with error: {}", err);
373             return Err(err);
374         }
375         Ok(())
376     }
377 
378     /// Remap memory that has previously been unmapped with #handle_inflate. Note
379     /// that attempts to remap pages that were not previously unmapped, or addresses that are not
380     /// page-aligned, will result in failure.
381     ///
382     /// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
383     /// partition. Remapped memory has no guarantee of content, and the guest should not expect
384     /// it to.
handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>385     fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
386         info!(
387             "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
388             guest_address, size
389         );
390 
391         let host_address = self
392             .guest_mem
393             .get_host_address(guest_address)
394             .map_err(|_| Error::new(1))? as *const c_void;
395 
396         // Note that we aren't doing any validation here that this range was previously unmapped.
397         // However, we can avoid that expensive validation by relying on Windows error checking for
398         // ReclaimVirtualMemory. The call will fail if:
399         // - If the range is not currently "offered"
400         // - The range is outside of current guest mem (GuestMemory will fail to convert the
401         //   address)
402         // In short, security is guaranteed by ensuring the guest can never reclaim ranges it
403         // hadn't previously forfeited (and even then, the contents will be zeroed).
404         //
405         // Safe because the memory ranges in question are managed by Windows, not Rust.
406         // Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
407         let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
408 
409         if result == ERROR_BUSY || result == ERROR_SUCCESS {
410             // In either of these cases, the contents of the reclaimed memory
411             // are preserved or undefined. Regardless, zero the memory
412             // to ensure no unintentional memory contents are shared.
413             //
414             // Safe because we just reclaimed the region in question and haven't yet remapped
415             // it to the guest partition, so we know it's unused.
416             unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
417         } else {
418             let err = Error::new(result);
419             error!("Reclaiming memory failed with error: {}", err);
420             return Err(err);
421         }
422 
423         // Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
424         // Which would fail if it was called on areas which were not unmapped.
425         unsafe {
426             set_user_memory_region(
427                 &self.vm_partition,
428                 false, // read_only
429                 false, // track dirty pages
430                 guest_address.offset(),
431                 size,
432                 host_address as *mut u8,
433             )
434         }
435     }
436 }
437 
438 // Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
439 // from guest physical to host user pages.
440 //
441 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( partition: &SafePartition, read_only: bool, track_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>442 unsafe fn set_user_memory_region(
443     partition: &SafePartition,
444     read_only: bool,
445     track_dirty_pages: bool,
446     guest_addr: u64,
447     memory_size: u64,
448     userspace_addr: *mut u8,
449 ) -> Result<()> {
450     let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
451         | WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
452     if !read_only {
453         flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
454     }
455     if track_dirty_pages {
456         flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
457     }
458 
459     let ret = WHvMapGpaRange(
460         partition.partition,
461         userspace_addr as *mut c_void,
462         guest_addr,
463         memory_size,
464         flags,
465     );
466     check_whpx!(ret)
467 }
468 
469 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
470 /// size.
471 ///
472 /// # Arguments
473 ///
474 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize475 pub fn dirty_log_bitmap_size(size: usize) -> usize {
476     let page_size = pagesize();
477     (((size + page_size - 1) / page_size) + 7) / 8
478 }
479 
480 impl Vm for WhpxVm {
481     /// Makes a shallow clone of this `Vm`.
try_clone(&self) -> Result<Self>482     fn try_clone(&self) -> Result<Self> {
483         let mut ioevents = FnvHashMap::default();
484         for (addr, evt) in self.ioevents.iter() {
485             ioevents.insert(*addr, evt.try_clone()?);
486         }
487         Ok(WhpxVm {
488             whpx: self.whpx.try_clone()?,
489             vm_partition: self.vm_partition.clone(),
490             guest_mem: self.guest_mem.clone(),
491             mem_regions: self.mem_regions.clone(),
492             mem_slot_gaps: self.mem_slot_gaps.clone(),
493             ioevents,
494             vm_evt_wrtube: self
495                 .vm_evt_wrtube
496                 .as_ref()
497                 .map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
498         })
499     }
500 
check_capability(&self, c: VmCap) -> bool501     fn check_capability(&self, c: VmCap) -> bool {
502         match c {
503             VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
504                 .unwrap_or_else(|e| {
505                     error!(
506                         "failed to check whpx feature {:?}: {}",
507                         WhpxFeature::DirtyPageTracking,
508                         e
509                     );
510                     false
511                 }),
512             // there is a pvclock like thing already done w/ hyperv, but we can't get the state.
513             VmCap::PvClock => false,
514             VmCap::Protected => false,
515             // whpx initializes cpuid early during VM creation.
516             VmCap::EarlyInitCpuid => true,
517             #[cfg(target_arch = "x86_64")]
518             VmCap::BusLockDetect => false,
519             VmCap::ReadOnlyMemoryRegion => true,
520             VmCap::MemNoncoherentDma => false,
521         }
522     }
523 
get_memory(&self) -> &GuestMemory524     fn get_memory(&self) -> &GuestMemory {
525         &self.guest_mem
526     }
527 
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, _cache: MemCacheType, ) -> Result<MemSlot>528     fn add_memory_region(
529         &mut self,
530         guest_addr: GuestAddress,
531         mem: Box<dyn MappedRegion>,
532         read_only: bool,
533         log_dirty_pages: bool,
534         _cache: MemCacheType,
535     ) -> Result<MemSlot> {
536         let size = mem.size() as u64;
537         let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
538         if self.guest_mem.range_overlap(guest_addr, end_addr) {
539             return Err(Error::new(ENOSPC));
540         }
541         let mut regions = self.mem_regions.lock();
542         let mut gaps = self.mem_slot_gaps.lock();
543         let slot = match gaps.pop() {
544             Some(gap) => gap.0,
545             None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
546         };
547 
548         // Safe because we check that the given guest address is valid and has no overlaps. We also
549         // know that the pointer and size are correct because the MemoryMapping interface ensures
550         // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
551         // is removed.
552         let res = unsafe {
553             set_user_memory_region(
554                 &self.vm_partition,
555                 read_only,
556                 log_dirty_pages,
557                 guest_addr.offset(),
558                 size,
559                 mem.as_ptr(),
560             )
561         };
562 
563         if let Err(e) = res {
564             gaps.push(Reverse(slot));
565             return Err(e);
566         }
567         regions.insert(slot, (guest_addr, mem));
568         Ok(slot)
569     }
570 
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>571     fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
572         let mut regions = self.mem_regions.lock();
573         let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
574 
575         mem.msync(offset, size).map_err(|err| match err {
576             MmapError::InvalidAddress => Error::new(EFAULT),
577             MmapError::NotPageAligned => Error::new(EINVAL),
578             MmapError::SystemCallFailed(e) => e,
579             _ => Error::new(EIO),
580         })
581     }
582 
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>583     fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
584         let mut regions = self.mem_regions.lock();
585         if !regions.contains_key(&slot) {
586             return Err(Error::new(ENOENT));
587         }
588         if let Some((guest_addr, mem)) = regions.get(&slot) {
589             // Safe because the slot is checked against the list of memory slots.
590             unsafe {
591                 check_whpx!(WHvUnmapGpaRange(
592                     self.vm_partition.partition,
593                     guest_addr.offset(),
594                     mem.size() as u64,
595                 ))?;
596             }
597             self.mem_slot_gaps.lock().push(Reverse(slot));
598             Ok(regions.remove(&slot).unwrap().1)
599         } else {
600             Err(Error::new(ENOENT))
601         }
602     }
603 
create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor>604     fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
605         // Whpx does not support in-kernel devices
606         Err(Error::new(libc::ENXIO))
607     }
608 
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>609     fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
610         let regions = self.mem_regions.lock();
611         if let Some((guest_addr, mem)) = regions.get(&slot) {
612             // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
613             if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
614                 return Err(Error::new(EINVAL));
615             }
616             let bitmap_size = if dirty_log.len() % 8 == 0 {
617                 dirty_log.len() / 8
618             } else {
619                 dirty_log.len() / 8 + 1
620             };
621             let mut bitmap = vec![0u64; bitmap_size];
622             check_whpx!(unsafe {
623                 WHvQueryGpaRangeDirtyBitmap(
624                     self.vm_partition.partition,
625                     guest_addr.offset(),
626                     mem.size() as u64,
627                     bitmap.as_mut_ptr() as *mut u64,
628                     (bitmap.len() * 8) as u32,
629                 )
630             })?;
631             // safe because we have allocated a vec of u64, which we can cast to a u8 slice.
632             let buffer = unsafe {
633                 std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
634             };
635             dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
636             Ok(())
637         } else {
638             Err(Error::new(ENOENT))
639         }
640     }
641 
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>642     fn register_ioevent(
643         &mut self,
644         evt: &Event,
645         addr: IoEventAddress,
646         datamatch: Datamatch,
647     ) -> Result<()> {
648         if datamatch != Datamatch::AnyLength {
649             error!("WHPX currently only supports Datamatch::AnyLength");
650             return Err(Error::new(ENOTSUP));
651         }
652 
653         if self.ioevents.contains_key(&addr) {
654             error!("WHPX does not support multiple ioevents for the same address");
655             return Err(Error::new(EEXIST));
656         }
657 
658         self.ioevents.insert(addr, evt.try_clone()?);
659 
660         Ok(())
661     }
662 
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>663     fn unregister_ioevent(
664         &mut self,
665         evt: &Event,
666         addr: IoEventAddress,
667         datamatch: Datamatch,
668     ) -> Result<()> {
669         if datamatch != Datamatch::AnyLength {
670             error!("WHPX only supports Datamatch::AnyLength");
671             return Err(Error::new(ENOTSUP));
672         }
673 
674         match self.ioevents.get(&addr) {
675             Some(existing_evt) => {
676                 // evt should match the existing evt associated with addr
677                 if evt != existing_evt {
678                     return Err(Error::new(ENOENT));
679                 }
680                 self.ioevents.remove(&addr);
681             }
682 
683             None => {
684                 return Err(Error::new(ENOENT));
685             }
686         };
687         Ok(())
688     }
689 
690     /// Trigger any io events based on the memory mapped IO at `addr`.  If the hypervisor does
691     /// in-kernel IO event delivery, this is a no-op.
handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()>692     fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
693         match self.ioevents.get(&addr) {
694             None => {}
695             Some(evt) => {
696                 evt.signal()?;
697             }
698         };
699         Ok(())
700     }
701 
get_pvclock(&self) -> Result<ClockState>702     fn get_pvclock(&self) -> Result<ClockState> {
703         Err(Error::new(ENODEV))
704     }
705 
set_pvclock(&self, _state: &ClockState) -> Result<()>706     fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
707         Err(Error::new(ENODEV))
708     }
709 
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>710     fn add_fd_mapping(
711         &mut self,
712         slot: u32,
713         offset: usize,
714         size: usize,
715         fd: &dyn AsRawDescriptor,
716         fd_offset: u64,
717         prot: Protection,
718     ) -> Result<()> {
719         let mut regions = self.mem_regions.lock();
720         let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
721 
722         match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
723             Ok(()) => Ok(()),
724             Err(MmapError::SystemCallFailed(e)) => Err(e),
725             Err(_) => Err(Error::new(EIO)),
726         }
727     }
728 
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>729     fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
730         let mut regions = self.mem_regions.lock();
731         let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
732 
733         match region.remove_mapping(offset, size) {
734             Ok(()) => Ok(()),
735             Err(MmapError::SystemCallFailed(e)) => Err(e),
736             Err(_) => Err(Error::new(EIO)),
737         }
738     }
739 
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>740     fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
741         match event {
742             BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
743             BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
744             BalloonEvent::BalloonTargetReached(_) => Ok(()),
745         }
746     }
747 
get_guest_phys_addr_bits(&self) -> u8748     fn get_guest_phys_addr_bits(&self) -> u8 {
749         // Assume the guest physical address size is the same as the host.
750         host_phys_addr_bits()
751     }
752 }
753 
754 impl VmX86_64 for WhpxVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64755     fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
756         &self.whpx
757     }
758 
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>759     fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
760         Ok(Box::new(WhpxVcpu::new(
761             self.vm_partition.clone(),
762             id.try_into().unwrap(),
763         )?))
764     }
765 
766     /// Sets the address of the three-page region in the VM's address space.
767     /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
768     /// WHPX.
set_tss_addr(&self, _addr: GuestAddress) -> Result<()>769     fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
770         Ok(())
771     }
772 
773     /// Sets the address of a one-page region in the VM's address space.
774     /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
775     /// WHPX.
set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()>776     fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
777         Ok(())
778     }
779 }
780 
781 // NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
782 // memory.
783 #[cfg(test)]
784 mod tests {
785     use std::thread;
786     use std::time::Duration;
787 
788     use base::EventWaitResult;
789     use base::MemoryMappingBuilder;
790     use base::SharedMemory;
791 
792     use super::*;
793 
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm794     fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
795         let whpx = Whpx::new().expect("failed to instantiate whpx");
796         let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
797             .expect("failed to get whpx features");
798         WhpxVm::new(
799             &whpx,
800             cpu_count,
801             mem,
802             CpuId::new(0),
803             local_apic_supported,
804             None,
805         )
806         .expect("failed to create whpx vm")
807     }
808 
809     #[test]
create_vm()810     fn create_vm() {
811         if !Whpx::is_enabled() {
812             return;
813         }
814         let cpu_count = 1;
815         let mem =
816             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
817         new_vm(cpu_count, mem);
818     }
819 
820     #[test]
create_vcpu()821     fn create_vcpu() {
822         if !Whpx::is_enabled() {
823             return;
824         }
825         let cpu_count = 1;
826         let mem =
827             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
828         let vm = new_vm(cpu_count, mem);
829         vm.create_vcpu(0).expect("failed to create vcpu");
830     }
831 
832     #[test]
try_clone()833     fn try_clone() {
834         if !Whpx::is_enabled() {
835             return;
836         }
837         let cpu_count = 1;
838         let mem =
839             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
840         let vm = new_vm(cpu_count, mem);
841         let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
842     }
843 
844     #[test]
send_vm()845     fn send_vm() {
846         if !Whpx::is_enabled() {
847             return;
848         }
849         let cpu_count = 1;
850         let mem =
851             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
852         let vm = new_vm(cpu_count, mem);
853         thread::spawn(move || {
854             let _vm = vm;
855         })
856         .join()
857         .unwrap();
858     }
859 
860     #[test]
check_vm_capability()861     fn check_vm_capability() {
862         if !Whpx::is_enabled() {
863             return;
864         }
865         let cpu_count = 1;
866         let mem =
867             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
868         let vm = new_vm(cpu_count, mem);
869         assert!(vm.check_capability(VmCap::DirtyLog));
870         assert!(!vm.check_capability(VmCap::PvClock));
871     }
872 
873     #[test]
dirty_log_size()874     fn dirty_log_size() {
875         let page_size = pagesize();
876         assert_eq!(dirty_log_bitmap_size(0), 0);
877         assert_eq!(dirty_log_bitmap_size(page_size), 1);
878         assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
879         assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
880         assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
881     }
882 
883     #[test]
register_ioevent()884     fn register_ioevent() {
885         if !Whpx::is_enabled() {
886             return;
887         }
888         let cpu_count = 1;
889         let mem =
890             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
891         let mut vm = new_vm(cpu_count, mem);
892         let evt = Event::new().expect("failed to create event");
893         let otherevt = Event::new().expect("failed to create event");
894         vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
895             .unwrap();
896         vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
897             .unwrap();
898 
899         vm.register_ioevent(
900             &otherevt,
901             IoEventAddress::Mmio(0x1000),
902             Datamatch::AnyLength,
903         )
904         .expect_err("WHPX should not allow you to register two events for the same address");
905 
906         vm.register_ioevent(
907             &otherevt,
908             IoEventAddress::Mmio(0x1000),
909             Datamatch::U8(None),
910         )
911         .expect_err(
912             "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
913         );
914 
915         vm.register_ioevent(
916             &otherevt,
917             IoEventAddress::Mmio(0x1000),
918             Datamatch::U32(Some(0xf6)),
919         )
920         .expect_err(
921             "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
922         );
923 
924         vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
925             .expect_err("unregistering an unknown event should fail");
926         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
927             .expect_err("unregistering an unknown PIO address should fail");
928         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
929             .expect_err("unregistering an unknown PIO address should fail");
930         vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
931             .expect_err("unregistering an unknown MMIO address should fail");
932         vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
933             .unwrap();
934         vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
935             .unwrap();
936     }
937 
938     #[test]
handle_io_events()939     fn handle_io_events() {
940         if !Whpx::is_enabled() {
941             return;
942         }
943         let cpu_count = 1;
944         let mem =
945             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
946         let mut vm = new_vm(cpu_count, mem);
947         let evt = Event::new().expect("failed to create event");
948         let evt2 = Event::new().expect("failed to create event");
949         vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
950             .unwrap();
951         vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
952             .unwrap();
953 
954         // Check a pio address
955         vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
956             .expect("failed to handle_io_events");
957         assert_ne!(
958             evt.wait_timeout(Duration::from_millis(10))
959                 .expect("failed to read event"),
960             EventWaitResult::TimedOut
961         );
962         assert_eq!(
963             evt2.wait_timeout(Duration::from_millis(10))
964                 .expect("failed to read event"),
965             EventWaitResult::TimedOut
966         );
967         // Check an mmio address
968         vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
969             .expect("failed to handle_io_events");
970         assert_eq!(
971             evt.wait_timeout(Duration::from_millis(10))
972                 .expect("failed to read event"),
973             EventWaitResult::TimedOut
974         );
975         assert_ne!(
976             evt2.wait_timeout(Duration::from_millis(10))
977                 .expect("failed to read event"),
978             EventWaitResult::TimedOut
979         );
980 
981         // Check an address that does not match any registered ioevents
982         vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
983             .expect("failed to handle_io_events");
984         assert_eq!(
985             evt.wait_timeout(Duration::from_millis(10))
986                 .expect("failed to read event"),
987             EventWaitResult::TimedOut
988         );
989         assert_eq!(
990             evt2.wait_timeout(Duration::from_millis(10))
991                 .expect("failed to read event"),
992             EventWaitResult::TimedOut
993         );
994     }
995 
996     #[test]
add_memory_ro()997     fn add_memory_ro() {
998         if !Whpx::is_enabled() {
999             return;
1000         }
1001         let cpu_count = 1;
1002         let mem =
1003             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1004         let mut vm = new_vm(cpu_count, mem);
1005         let mem_size = 0x1000;
1006         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1007         let mem = MemoryMappingBuilder::new(mem_size)
1008             .from_shared_memory(&shm)
1009             .build()
1010             .unwrap();
1011         vm.add_memory_region(
1012             GuestAddress(0x1000),
1013             Box::new(mem),
1014             true,
1015             false,
1016             MemCacheType::CacheCoherent,
1017         )
1018         .unwrap();
1019     }
1020 
1021     #[test]
remove_memory()1022     fn remove_memory() {
1023         if !Whpx::is_enabled() {
1024             return;
1025         }
1026         let cpu_count = 1;
1027         let mem =
1028             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1029         let mut vm = new_vm(cpu_count, mem);
1030         let mem_size = 0x1000;
1031         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1032         let mem = MemoryMappingBuilder::new(mem_size)
1033             .from_shared_memory(&shm)
1034             .build()
1035             .unwrap();
1036         let mem_ptr = mem.as_ptr();
1037         let slot = vm
1038             .add_memory_region(
1039                 GuestAddress(0x1000),
1040                 Box::new(mem),
1041                 false,
1042                 false,
1043                 MemCacheType::CacheCoherent,
1044             )
1045             .unwrap();
1046         let removed_mem = vm.remove_memory_region(slot).unwrap();
1047         assert_eq!(removed_mem.size(), mem_size);
1048         assert_eq!(removed_mem.as_ptr(), mem_ptr);
1049     }
1050 
1051     #[test]
remove_invalid_memory()1052     fn remove_invalid_memory() {
1053         if !Whpx::is_enabled() {
1054             return;
1055         }
1056         let cpu_count = 1;
1057         let mem =
1058             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1059         let mut vm = new_vm(cpu_count, mem);
1060         assert!(vm.remove_memory_region(0).is_err());
1061     }
1062 
1063     #[test]
overlap_memory()1064     fn overlap_memory() {
1065         if !Whpx::is_enabled() {
1066             return;
1067         }
1068         let cpu_count = 1;
1069         let mem =
1070             GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1071         let mut vm = new_vm(cpu_count, mem);
1072         let mem_size = 0x2000;
1073         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1074         let mem = MemoryMappingBuilder::new(mem_size)
1075             .from_shared_memory(&shm)
1076             .build()
1077             .unwrap();
1078         assert!(vm
1079             .add_memory_region(
1080                 GuestAddress(0x2000),
1081                 Box::new(mem),
1082                 false,
1083                 false,
1084                 MemCacheType::CacheCoherent
1085             )
1086             .is_err());
1087     }
1088 
1089     #[test]
sync_memory()1090     fn sync_memory() {
1091         if !Whpx::is_enabled() {
1092             return;
1093         }
1094         let cpu_count = 1;
1095         let mem =
1096             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1097         let mut vm = new_vm(cpu_count, mem);
1098         let mem_size = 0x1000;
1099         let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1100         let mem = MemoryMappingBuilder::new(mem_size)
1101             .from_shared_memory(&shm)
1102             .build()
1103             .unwrap();
1104         let slot = vm
1105             .add_memory_region(
1106                 GuestAddress(0x10000),
1107                 Box::new(mem),
1108                 false,
1109                 false,
1110                 MemCacheType::CacheCoherent,
1111             )
1112             .unwrap();
1113         vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1114         vm.msync_memory_region(slot, 0, mem_size).unwrap();
1115         assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1116         assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1117     }
1118 }
1119