1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use core::ffi::c_void;
6 use std::cmp::Reverse;
7 use std::collections::BTreeMap;
8 use std::collections::BinaryHeap;
9 use std::convert::TryInto;
10 use std::sync::Arc;
11
12 use base::error;
13 use base::info;
14 use base::pagesize;
15 use base::AsRawDescriptor;
16 use base::Error;
17 use base::Event;
18 use base::MappedRegion;
19 use base::MmapError;
20 use base::Protection;
21 use base::RawDescriptor;
22 use base::Result;
23 use base::SafeDescriptor;
24 use base::SendTube;
25 use fnv::FnvHashMap;
26 use libc::EEXIST;
27 use libc::EFAULT;
28 use libc::EINVAL;
29 use libc::EIO;
30 use libc::ENODEV;
31 use libc::ENOENT;
32 use libc::ENOSPC;
33 use libc::ENOTSUP;
34 use libc::EOVERFLOW;
35 use sync::Mutex;
36 use vm_memory::GuestAddress;
37 use vm_memory::GuestMemory;
38 use winapi::shared::winerror::ERROR_BUSY;
39 use winapi::shared::winerror::ERROR_SUCCESS;
40 use winapi::um::memoryapi::OfferVirtualMemory;
41 use winapi::um::memoryapi::ReclaimVirtualMemory;
42 use winapi::um::memoryapi::VmOfferPriorityBelowNormal;
43 use winapi::um::winnt::RtlZeroMemory;
44
45 use super::types::*;
46 use super::*;
47 use crate::host_phys_addr_bits;
48 use crate::whpx::whpx_sys::*;
49 use crate::BalloonEvent;
50 use crate::ClockState;
51 use crate::Datamatch;
52 use crate::DeliveryMode;
53 use crate::DestinationMode;
54 use crate::DeviceKind;
55 use crate::IoEventAddress;
56 use crate::LapicState;
57 use crate::MemCacheType;
58 use crate::MemSlot;
59 use crate::TriggerMode;
60 use crate::VcpuX86_64;
61 use crate::Vm;
62 use crate::VmCap;
63 use crate::VmX86_64;
64
65 pub struct WhpxVm {
66 whpx: Whpx,
67 // reference counted, since we need to implement try_clone or some variation.
68 // There is only ever 1 create/1 delete partition unlike dup/close handle variations.
69 vm_partition: Arc<SafePartition>,
70 guest_mem: GuestMemory,
71 mem_regions: Arc<Mutex<BTreeMap<MemSlot, (GuestAddress, Box<dyn MappedRegion>)>>>,
72 /// A min heap of MemSlot numbers that were used and then removed and can now be re-used
73 mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
74 // WHPX's implementation of ioevents makes several assumptions about how crosvm uses ioevents:
75 // 1. All ioevents are registered during device setup, and thus can be cloned when the vm is
76 // cloned instead of locked in an Arc<Mutex<>>. This will make handling ioevents in each
77 // vcpu thread easier because no locks will need to be acquired.
78 // 2. All ioevents use Datamatch::AnyLength. We don't bother checking the datamatch, which
79 // will make this faster.
80 // 3. We only ever register one eventfd to each address. This simplifies our data structure.
81 ioevents: FnvHashMap<IoEventAddress, Event>,
82 // Tube to send events to control.
83 vm_evt_wrtube: Option<SendTube>,
84 }
85
86 impl WhpxVm {
new( whpx: &Whpx, cpu_count: usize, guest_mem: GuestMemory, cpuid: CpuId, apic_emulation: bool, vm_evt_wrtube: Option<SendTube>, ) -> WhpxResult<WhpxVm>87 pub fn new(
88 whpx: &Whpx,
89 cpu_count: usize,
90 guest_mem: GuestMemory,
91 cpuid: CpuId,
92 apic_emulation: bool,
93 vm_evt_wrtube: Option<SendTube>,
94 ) -> WhpxResult<WhpxVm> {
95 let partition = SafePartition::new()?;
96 // setup partition defaults.
97 let mut property: WHV_PARTITION_PROPERTY = Default::default();
98 property.ProcessorCount = cpu_count as u32;
99 // safe because we own this partition, and the partition property is allocated on the stack.
100 check_whpx!(unsafe {
101 WHvSetPartitionProperty(
102 partition.partition,
103 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeProcessorCount,
104 &property as *const _ as *const c_void,
105 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
106 )
107 })
108 .map_err(WhpxError::SetProcessorCount)?;
109
110 // Pre-set any cpuid results in cpuid.
111 let mut cpuid_results: Vec<WHV_X64_CPUID_RESULT> = cpuid
112 .cpu_id_entries
113 .iter()
114 .map(WHV_X64_CPUID_RESULT::from)
115 .collect();
116
117 // Leaf HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS tells linux that it's running under Hyper-V.
118 cpuid_results.push(WHV_X64_CPUID_RESULT {
119 Function: HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
120 Reserved: [0u32; 3],
121 // HYPERV_CPUID_MIN is the minimum leaf that we need to support returning to the guest
122 Eax: HYPERV_CPUID_MIN,
123 Ebx: u32::from_le_bytes([b'M', b'i', b'c', b'r']),
124 Ecx: u32::from_le_bytes([b'o', b's', b'o', b'f']),
125 Edx: u32::from_le_bytes([b't', b' ', b'H', b'v']),
126 });
127
128 // HYPERV_CPUID_FEATURES leaf tells linux which Hyper-V features we support
129 cpuid_results.push(WHV_X64_CPUID_RESULT {
130 Function: HYPERV_CPUID_FEATURES,
131 Reserved: [0u32; 3],
132 // We only support frequency MSRs and the HV_ACCESS_TSC_INVARIANT feature, which means
133 // TSC scaling/offseting is handled in hardware, not the guest.
134 Eax: HV_ACCESS_FREQUENCY_MSRS
135 | HV_ACCESS_TSC_INVARIANT
136 | HV_MSR_REFERENCE_TSC_AVAILABLE,
137 Ebx: 0,
138 Edx: HV_FEATURE_FREQUENCY_MSRS_AVAILABLE,
139 Ecx: 0,
140 });
141
142 // safe because we own this partition, and the cpuid_results vec is local to this function.
143 check_whpx!(unsafe {
144 WHvSetPartitionProperty(
145 partition.partition,
146 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidResultList,
147 cpuid_results.as_ptr() as *const _ as *const c_void,
148 (std::mem::size_of::<WHV_X64_CPUID_RESULT>() * cpuid_results.len()) as UINT32,
149 )
150 })
151 .map_err(WhpxError::SetCpuidResultList)?;
152
153 // Setup exiting for cpuid leaves that we want crosvm to adjust, but that we can't pre-set.
154 // We can't pre-set leaves that rely on irqchip information, and we cannot pre-set leaves
155 // that return different results per-cpu.
156 let exit_list: Vec<u32> = vec![0x1, 0x4, 0xB, 0x1F, 0x15];
157 // safe because we own this partition, and the exit_list vec local to this function.
158 check_whpx!(unsafe {
159 WHvSetPartitionProperty(
160 partition.partition,
161 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeCpuidExitList,
162 exit_list.as_ptr() as *const _ as *const c_void,
163 (std::mem::size_of::<u32>() * exit_list.len()) as UINT32,
164 )
165 })
166 .map_err(WhpxError::SetCpuidExitList)?;
167
168 // Setup exits for CPUID instruction.
169 let mut property: WHV_PARTITION_PROPERTY = Default::default();
170 // safe because we own this partition, and the partition property is allocated on the stack.
171 unsafe {
172 property
173 .ExtendedVmExits
174 .__bindgen_anon_1
175 .set_X64CpuidExit(1);
176 // X64MsrExit essentially causes WHPX to exit to crosvm when it would normally fail an
177 // MSR access and inject a GP fault. Crosvm, in turn, now handles select MSR accesses
178 // related to Hyper-V (see the handle_msr_* functions in vcpu.rs) and injects a GP
179 // fault for any unhandled MSR accesses.
180 property.ExtendedVmExits.__bindgen_anon_1.set_X64MsrExit(1);
181 }
182 // safe because we own this partition, and the partition property is allocated on the stack.
183 check_whpx!(unsafe {
184 WHvSetPartitionProperty(
185 partition.partition,
186 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeExtendedVmExits,
187 &property as *const _ as *const c_void,
188 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
189 )
190 })
191 .map_err(WhpxError::SetExtendedVmExits)?;
192
193 if apic_emulation && !Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)? {
194 return Err(WhpxError::LocalApicEmulationNotSupported);
195 }
196
197 // Setup apic emulation mode
198 let mut property: WHV_PARTITION_PROPERTY = Default::default();
199 property.LocalApicEmulationMode = if apic_emulation {
200 // TODO(b/180966070): figure out if x2apic emulation mode is available on the host and
201 // enable it if it is.
202 WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeXApic
203 } else {
204 WHV_X64_LOCAL_APIC_EMULATION_MODE_WHvX64LocalApicEmulationModeNone
205 };
206
207 // safe because we own this partition, and the partition property is allocated on the stack.
208 check_whpx!(unsafe {
209 WHvSetPartitionProperty(
210 partition.partition,
211 WHV_PARTITION_PROPERTY_CODE_WHvPartitionPropertyCodeLocalApicEmulationMode,
212 &property as *const _ as *const c_void,
213 std::mem::size_of::<WHV_PARTITION_PROPERTY>() as UINT32,
214 )
215 })
216 .map_err(WhpxError::SetLocalApicEmulationMode)?;
217
218 // safe because we own this partition
219 check_whpx!(unsafe { WHvSetupPartition(partition.partition) })
220 .map_err(WhpxError::SetupPartition)?;
221
222 for region in guest_mem.regions() {
223 unsafe {
224 // Safe because the guest regions are guaranteed not to overlap.
225 set_user_memory_region(
226 &partition,
227 false, // read_only
228 false, // track dirty pages
229 region.guest_addr.offset(),
230 region.size as u64,
231 region.host_addr as *mut u8,
232 )
233 }
234 .map_err(WhpxError::MapGpaRange)?;
235 }
236
237 Ok(WhpxVm {
238 whpx: whpx.clone(),
239 vm_partition: Arc::new(partition),
240 guest_mem,
241 mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
242 mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
243 ioevents: FnvHashMap::default(),
244 vm_evt_wrtube,
245 })
246 }
247
248 /// Get the current state of the specified VCPU's local APIC
get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState>249 pub fn get_vcpu_lapic_state(&self, vcpu_id: usize) -> Result<LapicState> {
250 let buffer = WhpxLapicState { regs: [0u32; 1024] };
251 let mut written_size = 0u32;
252 let size = std::mem::size_of::<WhpxLapicState>();
253
254 check_whpx!(unsafe {
255 WHvGetVirtualProcessorInterruptControllerState(
256 self.vm_partition.partition,
257 vcpu_id as u32,
258 buffer.regs.as_ptr() as *mut c_void,
259 size as u32,
260 &mut written_size,
261 )
262 })?;
263
264 Ok(LapicState::from(&buffer))
265 }
266
267 /// Set the current state of the specified VCPU's local APIC
set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()>268 pub fn set_vcpu_lapic_state(&mut self, vcpu_id: usize, state: &LapicState) -> Result<()> {
269 let buffer = WhpxLapicState::from(state);
270 check_whpx!(unsafe {
271 WHvSetVirtualProcessorInterruptControllerState(
272 self.vm_partition.partition,
273 vcpu_id as u32,
274 buffer.regs.as_ptr() as *mut c_void,
275 std::mem::size_of::<WhpxLapicState>() as u32,
276 )
277 })?;
278 Ok(())
279 }
280
281 /// Request an interrupt be delivered to one or more virtualized interrupt controllers. This
282 /// should only be used with ApicEmulationModeXApic or ApicEmulationModeX2Apic.
request_interrupt( &self, vector: u8, dest_id: u8, dest_mode: DestinationMode, trigger: TriggerMode, delivery: DeliveryMode, ) -> Result<()>283 pub fn request_interrupt(
284 &self,
285 vector: u8,
286 dest_id: u8,
287 dest_mode: DestinationMode,
288 trigger: TriggerMode,
289 delivery: DeliveryMode,
290 ) -> Result<()> {
291 // The WHV_INTERRUPT_CONTROL does not seem to support the dest_shorthand
292 let mut interrupt = WHV_INTERRUPT_CONTROL {
293 Destination: dest_id as u32,
294 Vector: vector as u32,
295 ..Default::default()
296 };
297 interrupt.set_DestinationMode(match dest_mode {
298 DestinationMode::Physical => {
299 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModePhysical
300 }
301 DestinationMode::Logical => {
302 WHV_INTERRUPT_DESTINATION_MODE_WHvX64InterruptDestinationModeLogical
303 }
304 } as u64);
305 interrupt.set_TriggerMode(match trigger {
306 TriggerMode::Edge => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeEdge,
307 TriggerMode::Level => WHV_INTERRUPT_TRIGGER_MODE_WHvX64InterruptTriggerModeLevel,
308 } as u64);
309 interrupt.set_Type(match delivery {
310 DeliveryMode::Fixed => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeFixed,
311 DeliveryMode::Lowest => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeLowestPriority,
312 DeliveryMode::SMI => {
313 error!("WHPX does not support requesting an SMI");
314 return Err(Error::new(ENOTSUP));
315 }
316 DeliveryMode::RemoteRead => {
317 // This is also no longer supported by intel.
318 error!("Remote Read interrupts are not supported by WHPX");
319 return Err(Error::new(ENOTSUP));
320 }
321 DeliveryMode::NMI => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeNmi,
322 DeliveryMode::Init => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeInit,
323 DeliveryMode::Startup => WHV_INTERRUPT_TYPE_WHvX64InterruptTypeSipi,
324 DeliveryMode::External => {
325 error!("WHPX does not support requesting an external interrupt");
326 return Err(Error::new(ENOTSUP));
327 }
328 } as u64);
329
330 check_whpx!(unsafe {
331 WHvRequestInterrupt(
332 self.vm_partition.partition,
333 &interrupt,
334 std::mem::size_of::<WHV_INTERRUPT_CONTROL>() as u32,
335 )
336 })
337 }
338
339 /// In order to fully unmap a memory range such that the host can reclaim the memory,
340 /// we unmap it from the hypervisor partition, and then mark crosvm's process as uninterested
341 /// in the memory.
342 ///
343 /// This will make crosvm unable to access the memory, and allow Windows to reclaim it for other
344 /// uses when memory is in demand.
handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>345 fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
346 info!(
347 "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
348 guest_address, size
349 );
350 // Safe because WHPX does proper error checking, even if an out-of-bounds address is
351 // provided.
352 unsafe {
353 check_whpx!(WHvUnmapGpaRange(
354 self.vm_partition.partition,
355 guest_address.offset(),
356 size,
357 ))?;
358 }
359
360 let host_address = self
361 .guest_mem
362 .get_host_address(guest_address)
363 .map_err(|_| Error::new(1))? as *mut c_void;
364
365 // Safe because we have just successfully unmapped this range from the
366 // guest partition, so we know it's unused.
367 let result =
368 unsafe { OfferVirtualMemory(host_address, size as usize, VmOfferPriorityBelowNormal) };
369
370 if result != ERROR_SUCCESS {
371 let err = Error::new(result);
372 error!("Freeing memory failed with error: {}", err);
373 return Err(err);
374 }
375 Ok(())
376 }
377
378 /// Remap memory that has previously been unmapped with #handle_inflate. Note
379 /// that attempts to remap pages that were not previously unmapped, or addresses that are not
380 /// page-aligned, will result in failure.
381 ///
382 /// To do this, reclaim the memory from Windows first, then remap it into the hypervisor
383 /// partition. Remapped memory has no guarantee of content, and the guest should not expect
384 /// it to.
handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>385 fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()> {
386 info!(
387 "Balloon: Requested WHPX unmap of addr: {:?}, size: {:?}",
388 guest_address, size
389 );
390
391 let host_address = self
392 .guest_mem
393 .get_host_address(guest_address)
394 .map_err(|_| Error::new(1))? as *const c_void;
395
396 // Note that we aren't doing any validation here that this range was previously unmapped.
397 // However, we can avoid that expensive validation by relying on Windows error checking for
398 // ReclaimVirtualMemory. The call will fail if:
399 // - If the range is not currently "offered"
400 // - The range is outside of current guest mem (GuestMemory will fail to convert the
401 // address)
402 // In short, security is guaranteed by ensuring the guest can never reclaim ranges it
403 // hadn't previously forfeited (and even then, the contents will be zeroed).
404 //
405 // Safe because the memory ranges in question are managed by Windows, not Rust.
406 // Also, ReclaimVirtualMemory has built-in error checking for bad parameters.
407 let result = unsafe { ReclaimVirtualMemory(host_address, size as usize) };
408
409 if result == ERROR_BUSY || result == ERROR_SUCCESS {
410 // In either of these cases, the contents of the reclaimed memory
411 // are preserved or undefined. Regardless, zero the memory
412 // to ensure no unintentional memory contents are shared.
413 //
414 // Safe because we just reclaimed the region in question and haven't yet remapped
415 // it to the guest partition, so we know it's unused.
416 unsafe { RtlZeroMemory(host_address as RawDescriptor, size as usize) };
417 } else {
418 let err = Error::new(result);
419 error!("Reclaiming memory failed with error: {}", err);
420 return Err(err);
421 }
422
423 // Safe because no-overlap is guaranteed by the success of ReclaimVirtualMemory,
424 // Which would fail if it was called on areas which were not unmapped.
425 unsafe {
426 set_user_memory_region(
427 &self.vm_partition,
428 false, // read_only
429 false, // track dirty pages
430 guest_address.offset(),
431 size,
432 host_address as *mut u8,
433 )
434 }
435 }
436 }
437
438 // Wrapper around WHvMapGpaRange, which creates, modifies, or deletes a mapping
439 // from guest physical to host user pages.
440 //
441 // Safe when the guest regions are guaranteed not to overlap.
set_user_memory_region( partition: &SafePartition, read_only: bool, track_dirty_pages: bool, guest_addr: u64, memory_size: u64, userspace_addr: *mut u8, ) -> Result<()>442 unsafe fn set_user_memory_region(
443 partition: &SafePartition,
444 read_only: bool,
445 track_dirty_pages: bool,
446 guest_addr: u64,
447 memory_size: u64,
448 userspace_addr: *mut u8,
449 ) -> Result<()> {
450 let mut flags = WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagRead
451 | WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagExecute;
452 if !read_only {
453 flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagWrite
454 }
455 if track_dirty_pages {
456 flags |= WHV_MAP_GPA_RANGE_FLAGS_WHvMapGpaRangeFlagTrackDirtyPages;
457 }
458
459 let ret = WHvMapGpaRange(
460 partition.partition,
461 userspace_addr as *mut c_void,
462 guest_addr,
463 memory_size,
464 flags,
465 );
466 check_whpx!(ret)
467 }
468
469 /// Helper function to determine the size in bytes of a dirty log bitmap for the given memory region
470 /// size.
471 ///
472 /// # Arguments
473 ///
474 /// * `size` - Number of bytes in the memory region being queried.
dirty_log_bitmap_size(size: usize) -> usize475 pub fn dirty_log_bitmap_size(size: usize) -> usize {
476 let page_size = pagesize();
477 (((size + page_size - 1) / page_size) + 7) / 8
478 }
479
480 impl Vm for WhpxVm {
481 /// Makes a shallow clone of this `Vm`.
try_clone(&self) -> Result<Self>482 fn try_clone(&self) -> Result<Self> {
483 let mut ioevents = FnvHashMap::default();
484 for (addr, evt) in self.ioevents.iter() {
485 ioevents.insert(*addr, evt.try_clone()?);
486 }
487 Ok(WhpxVm {
488 whpx: self.whpx.try_clone()?,
489 vm_partition: self.vm_partition.clone(),
490 guest_mem: self.guest_mem.clone(),
491 mem_regions: self.mem_regions.clone(),
492 mem_slot_gaps: self.mem_slot_gaps.clone(),
493 ioevents,
494 vm_evt_wrtube: self
495 .vm_evt_wrtube
496 .as_ref()
497 .map(|t| t.try_clone().expect("could not clone vm_evt_wrtube")),
498 })
499 }
500
check_capability(&self, c: VmCap) -> bool501 fn check_capability(&self, c: VmCap) -> bool {
502 match c {
503 VmCap::DirtyLog => Whpx::check_whpx_feature(WhpxFeature::DirtyPageTracking)
504 .unwrap_or_else(|e| {
505 error!(
506 "failed to check whpx feature {:?}: {}",
507 WhpxFeature::DirtyPageTracking,
508 e
509 );
510 false
511 }),
512 // there is a pvclock like thing already done w/ hyperv, but we can't get the state.
513 VmCap::PvClock => false,
514 VmCap::Protected => false,
515 // whpx initializes cpuid early during VM creation.
516 VmCap::EarlyInitCpuid => true,
517 #[cfg(target_arch = "x86_64")]
518 VmCap::BusLockDetect => false,
519 VmCap::ReadOnlyMemoryRegion => true,
520 VmCap::MemNoncoherentDma => false,
521 }
522 }
523
get_memory(&self) -> &GuestMemory524 fn get_memory(&self) -> &GuestMemory {
525 &self.guest_mem
526 }
527
add_memory_region( &mut self, guest_addr: GuestAddress, mem: Box<dyn MappedRegion>, read_only: bool, log_dirty_pages: bool, _cache: MemCacheType, ) -> Result<MemSlot>528 fn add_memory_region(
529 &mut self,
530 guest_addr: GuestAddress,
531 mem: Box<dyn MappedRegion>,
532 read_only: bool,
533 log_dirty_pages: bool,
534 _cache: MemCacheType,
535 ) -> Result<MemSlot> {
536 let size = mem.size() as u64;
537 let end_addr = guest_addr.checked_add(size).ok_or(Error::new(EOVERFLOW))?;
538 if self.guest_mem.range_overlap(guest_addr, end_addr) {
539 return Err(Error::new(ENOSPC));
540 }
541 let mut regions = self.mem_regions.lock();
542 let mut gaps = self.mem_slot_gaps.lock();
543 let slot = match gaps.pop() {
544 Some(gap) => gap.0,
545 None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
546 };
547
548 // Safe because we check that the given guest address is valid and has no overlaps. We also
549 // know that the pointer and size are correct because the MemoryMapping interface ensures
550 // this. We take ownership of the memory mapping so that it won't be unmapped until the slot
551 // is removed.
552 let res = unsafe {
553 set_user_memory_region(
554 &self.vm_partition,
555 read_only,
556 log_dirty_pages,
557 guest_addr.offset(),
558 size,
559 mem.as_ptr(),
560 )
561 };
562
563 if let Err(e) = res {
564 gaps.push(Reverse(slot));
565 return Err(e);
566 }
567 regions.insert(slot, (guest_addr, mem));
568 Ok(slot)
569 }
570
msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>571 fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()> {
572 let mut regions = self.mem_regions.lock();
573 let (_, mem) = regions.get_mut(&slot).ok_or(Error::new(ENOENT))?;
574
575 mem.msync(offset, size).map_err(|err| match err {
576 MmapError::InvalidAddress => Error::new(EFAULT),
577 MmapError::NotPageAligned => Error::new(EINVAL),
578 MmapError::SystemCallFailed(e) => e,
579 _ => Error::new(EIO),
580 })
581 }
582
remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>583 fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>> {
584 let mut regions = self.mem_regions.lock();
585 if !regions.contains_key(&slot) {
586 return Err(Error::new(ENOENT));
587 }
588 if let Some((guest_addr, mem)) = regions.get(&slot) {
589 // Safe because the slot is checked against the list of memory slots.
590 unsafe {
591 check_whpx!(WHvUnmapGpaRange(
592 self.vm_partition.partition,
593 guest_addr.offset(),
594 mem.size() as u64,
595 ))?;
596 }
597 self.mem_slot_gaps.lock().push(Reverse(slot));
598 Ok(regions.remove(&slot).unwrap().1)
599 } else {
600 Err(Error::new(ENOENT))
601 }
602 }
603
create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor>604 fn create_device(&self, _kind: DeviceKind) -> Result<SafeDescriptor> {
605 // Whpx does not support in-kernel devices
606 Err(Error::new(libc::ENXIO))
607 }
608
get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()>609 fn get_dirty_log(&self, slot: u32, dirty_log: &mut [u8]) -> Result<()> {
610 let regions = self.mem_regions.lock();
611 if let Some((guest_addr, mem)) = regions.get(&slot) {
612 // Ensures that there are as many bytes in dirty_log as there are pages in the mmap.
613 if dirty_log_bitmap_size(mem.size()) > dirty_log.len() {
614 return Err(Error::new(EINVAL));
615 }
616 let bitmap_size = if dirty_log.len() % 8 == 0 {
617 dirty_log.len() / 8
618 } else {
619 dirty_log.len() / 8 + 1
620 };
621 let mut bitmap = vec![0u64; bitmap_size];
622 check_whpx!(unsafe {
623 WHvQueryGpaRangeDirtyBitmap(
624 self.vm_partition.partition,
625 guest_addr.offset(),
626 mem.size() as u64,
627 bitmap.as_mut_ptr() as *mut u64,
628 (bitmap.len() * 8) as u32,
629 )
630 })?;
631 // safe because we have allocated a vec of u64, which we can cast to a u8 slice.
632 let buffer = unsafe {
633 std::slice::from_raw_parts(bitmap.as_ptr() as *const u8, bitmap.len() * 8)
634 };
635 dirty_log.copy_from_slice(&buffer[..dirty_log.len()]);
636 Ok(())
637 } else {
638 Err(Error::new(ENOENT))
639 }
640 }
641
register_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>642 fn register_ioevent(
643 &mut self,
644 evt: &Event,
645 addr: IoEventAddress,
646 datamatch: Datamatch,
647 ) -> Result<()> {
648 if datamatch != Datamatch::AnyLength {
649 error!("WHPX currently only supports Datamatch::AnyLength");
650 return Err(Error::new(ENOTSUP));
651 }
652
653 if self.ioevents.contains_key(&addr) {
654 error!("WHPX does not support multiple ioevents for the same address");
655 return Err(Error::new(EEXIST));
656 }
657
658 self.ioevents.insert(addr, evt.try_clone()?);
659
660 Ok(())
661 }
662
unregister_ioevent( &mut self, evt: &Event, addr: IoEventAddress, datamatch: Datamatch, ) -> Result<()>663 fn unregister_ioevent(
664 &mut self,
665 evt: &Event,
666 addr: IoEventAddress,
667 datamatch: Datamatch,
668 ) -> Result<()> {
669 if datamatch != Datamatch::AnyLength {
670 error!("WHPX only supports Datamatch::AnyLength");
671 return Err(Error::new(ENOTSUP));
672 }
673
674 match self.ioevents.get(&addr) {
675 Some(existing_evt) => {
676 // evt should match the existing evt associated with addr
677 if evt != existing_evt {
678 return Err(Error::new(ENOENT));
679 }
680 self.ioevents.remove(&addr);
681 }
682
683 None => {
684 return Err(Error::new(ENOENT));
685 }
686 };
687 Ok(())
688 }
689
690 /// Trigger any io events based on the memory mapped IO at `addr`. If the hypervisor does
691 /// in-kernel IO event delivery, this is a no-op.
handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()>692 fn handle_io_events(&self, addr: IoEventAddress, _data: &[u8]) -> Result<()> {
693 match self.ioevents.get(&addr) {
694 None => {}
695 Some(evt) => {
696 evt.signal()?;
697 }
698 };
699 Ok(())
700 }
701
get_pvclock(&self) -> Result<ClockState>702 fn get_pvclock(&self) -> Result<ClockState> {
703 Err(Error::new(ENODEV))
704 }
705
set_pvclock(&self, _state: &ClockState) -> Result<()>706 fn set_pvclock(&self, _state: &ClockState) -> Result<()> {
707 Err(Error::new(ENODEV))
708 }
709
add_fd_mapping( &mut self, slot: u32, offset: usize, size: usize, fd: &dyn AsRawDescriptor, fd_offset: u64, prot: Protection, ) -> Result<()>710 fn add_fd_mapping(
711 &mut self,
712 slot: u32,
713 offset: usize,
714 size: usize,
715 fd: &dyn AsRawDescriptor,
716 fd_offset: u64,
717 prot: Protection,
718 ) -> Result<()> {
719 let mut regions = self.mem_regions.lock();
720 let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
721
722 match region.add_fd_mapping(offset, size, fd, fd_offset, prot) {
723 Ok(()) => Ok(()),
724 Err(MmapError::SystemCallFailed(e)) => Err(e),
725 Err(_) => Err(Error::new(EIO)),
726 }
727 }
728
remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>729 fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()> {
730 let mut regions = self.mem_regions.lock();
731 let (_, region) = regions.get_mut(&slot).ok_or(Error::new(EINVAL))?;
732
733 match region.remove_mapping(offset, size) {
734 Ok(()) => Ok(()),
735 Err(MmapError::SystemCallFailed(e)) => Err(e),
736 Err(_) => Err(Error::new(EIO)),
737 }
738 }
739
handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()>740 fn handle_balloon_event(&mut self, event: BalloonEvent) -> Result<()> {
741 match event {
742 BalloonEvent::Inflate(m) => self.handle_inflate(m.guest_address, m.size),
743 BalloonEvent::Deflate(m) => self.handle_deflate(m.guest_address, m.size),
744 BalloonEvent::BalloonTargetReached(_) => Ok(()),
745 }
746 }
747
get_guest_phys_addr_bits(&self) -> u8748 fn get_guest_phys_addr_bits(&self) -> u8 {
749 // Assume the guest physical address size is the same as the host.
750 host_phys_addr_bits()
751 }
752 }
753
754 impl VmX86_64 for WhpxVm {
get_hypervisor(&self) -> &dyn HypervisorX86_64755 fn get_hypervisor(&self) -> &dyn HypervisorX86_64 {
756 &self.whpx
757 }
758
create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>>759 fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
760 Ok(Box::new(WhpxVcpu::new(
761 self.vm_partition.clone(),
762 id.try_into().unwrap(),
763 )?))
764 }
765
766 /// Sets the address of the three-page region in the VM's address space.
767 /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
768 /// WHPX.
set_tss_addr(&self, _addr: GuestAddress) -> Result<()>769 fn set_tss_addr(&self, _addr: GuestAddress) -> Result<()> {
770 Ok(())
771 }
772
773 /// Sets the address of a one-page region in the VM's address space.
774 /// This function is only necessary for unrestricted_guest_mode=0, which we do not support for
775 /// WHPX.
set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()>776 fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
777 Ok(())
778 }
779 }
780
781 // NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
782 // memory.
783 #[cfg(test)]
784 mod tests {
785 use std::thread;
786 use std::time::Duration;
787
788 use base::EventWaitResult;
789 use base::MemoryMappingBuilder;
790 use base::SharedMemory;
791
792 use super::*;
793
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm794 fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
795 let whpx = Whpx::new().expect("failed to instantiate whpx");
796 let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
797 .expect("failed to get whpx features");
798 WhpxVm::new(
799 &whpx,
800 cpu_count,
801 mem,
802 CpuId::new(0),
803 local_apic_supported,
804 None,
805 )
806 .expect("failed to create whpx vm")
807 }
808
809 #[test]
create_vm()810 fn create_vm() {
811 if !Whpx::is_enabled() {
812 return;
813 }
814 let cpu_count = 1;
815 let mem =
816 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
817 new_vm(cpu_count, mem);
818 }
819
820 #[test]
create_vcpu()821 fn create_vcpu() {
822 if !Whpx::is_enabled() {
823 return;
824 }
825 let cpu_count = 1;
826 let mem =
827 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
828 let vm = new_vm(cpu_count, mem);
829 vm.create_vcpu(0).expect("failed to create vcpu");
830 }
831
832 #[test]
try_clone()833 fn try_clone() {
834 if !Whpx::is_enabled() {
835 return;
836 }
837 let cpu_count = 1;
838 let mem =
839 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
840 let vm = new_vm(cpu_count, mem);
841 let _vm_clone = vm.try_clone().expect("failed to clone whpx vm");
842 }
843
844 #[test]
send_vm()845 fn send_vm() {
846 if !Whpx::is_enabled() {
847 return;
848 }
849 let cpu_count = 1;
850 let mem =
851 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
852 let vm = new_vm(cpu_count, mem);
853 thread::spawn(move || {
854 let _vm = vm;
855 })
856 .join()
857 .unwrap();
858 }
859
860 #[test]
check_vm_capability()861 fn check_vm_capability() {
862 if !Whpx::is_enabled() {
863 return;
864 }
865 let cpu_count = 1;
866 let mem =
867 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
868 let vm = new_vm(cpu_count, mem);
869 assert!(vm.check_capability(VmCap::DirtyLog));
870 assert!(!vm.check_capability(VmCap::PvClock));
871 }
872
873 #[test]
dirty_log_size()874 fn dirty_log_size() {
875 let page_size = pagesize();
876 assert_eq!(dirty_log_bitmap_size(0), 0);
877 assert_eq!(dirty_log_bitmap_size(page_size), 1);
878 assert_eq!(dirty_log_bitmap_size(page_size * 8), 1);
879 assert_eq!(dirty_log_bitmap_size(page_size * 8 + 1), 2);
880 assert_eq!(dirty_log_bitmap_size(page_size * 100), 13);
881 }
882
883 #[test]
register_ioevent()884 fn register_ioevent() {
885 if !Whpx::is_enabled() {
886 return;
887 }
888 let cpu_count = 1;
889 let mem =
890 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
891 let mut vm = new_vm(cpu_count, mem);
892 let evt = Event::new().expect("failed to create event");
893 let otherevt = Event::new().expect("failed to create event");
894 vm.register_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
895 .unwrap();
896 vm.register_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
897 .unwrap();
898
899 vm.register_ioevent(
900 &otherevt,
901 IoEventAddress::Mmio(0x1000),
902 Datamatch::AnyLength,
903 )
904 .expect_err("WHPX should not allow you to register two events for the same address");
905
906 vm.register_ioevent(
907 &otherevt,
908 IoEventAddress::Mmio(0x1000),
909 Datamatch::U8(None),
910 )
911 .expect_err(
912 "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
913 );
914
915 vm.register_ioevent(
916 &otherevt,
917 IoEventAddress::Mmio(0x1000),
918 Datamatch::U32(Some(0xf6)),
919 )
920 .expect_err(
921 "WHPX should not allow you to register ioevents with Datamatches other than AnyLength",
922 );
923
924 vm.unregister_ioevent(&otherevt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
925 .expect_err("unregistering an unknown event should fail");
926 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf5), Datamatch::AnyLength)
927 .expect_err("unregistering an unknown PIO address should fail");
928 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
929 .expect_err("unregistering an unknown PIO address should fail");
930 vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0xf4), Datamatch::AnyLength)
931 .expect_err("unregistering an unknown MMIO address should fail");
932 vm.unregister_ioevent(&evt, IoEventAddress::Pio(0xf4), Datamatch::AnyLength)
933 .unwrap();
934 vm.unregister_ioevent(&evt, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
935 .unwrap();
936 }
937
938 #[test]
handle_io_events()939 fn handle_io_events() {
940 if !Whpx::is_enabled() {
941 return;
942 }
943 let cpu_count = 1;
944 let mem =
945 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
946 let mut vm = new_vm(cpu_count, mem);
947 let evt = Event::new().expect("failed to create event");
948 let evt2 = Event::new().expect("failed to create event");
949 vm.register_ioevent(&evt, IoEventAddress::Pio(0x1000), Datamatch::AnyLength)
950 .unwrap();
951 vm.register_ioevent(&evt2, IoEventAddress::Mmio(0x1000), Datamatch::AnyLength)
952 .unwrap();
953
954 // Check a pio address
955 vm.handle_io_events(IoEventAddress::Pio(0x1000), &[])
956 .expect("failed to handle_io_events");
957 assert_ne!(
958 evt.wait_timeout(Duration::from_millis(10))
959 .expect("failed to read event"),
960 EventWaitResult::TimedOut
961 );
962 assert_eq!(
963 evt2.wait_timeout(Duration::from_millis(10))
964 .expect("failed to read event"),
965 EventWaitResult::TimedOut
966 );
967 // Check an mmio address
968 vm.handle_io_events(IoEventAddress::Mmio(0x1000), &[])
969 .expect("failed to handle_io_events");
970 assert_eq!(
971 evt.wait_timeout(Duration::from_millis(10))
972 .expect("failed to read event"),
973 EventWaitResult::TimedOut
974 );
975 assert_ne!(
976 evt2.wait_timeout(Duration::from_millis(10))
977 .expect("failed to read event"),
978 EventWaitResult::TimedOut
979 );
980
981 // Check an address that does not match any registered ioevents
982 vm.handle_io_events(IoEventAddress::Pio(0x1001), &[])
983 .expect("failed to handle_io_events");
984 assert_eq!(
985 evt.wait_timeout(Duration::from_millis(10))
986 .expect("failed to read event"),
987 EventWaitResult::TimedOut
988 );
989 assert_eq!(
990 evt2.wait_timeout(Duration::from_millis(10))
991 .expect("failed to read event"),
992 EventWaitResult::TimedOut
993 );
994 }
995
996 #[test]
add_memory_ro()997 fn add_memory_ro() {
998 if !Whpx::is_enabled() {
999 return;
1000 }
1001 let cpu_count = 1;
1002 let mem =
1003 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1004 let mut vm = new_vm(cpu_count, mem);
1005 let mem_size = 0x1000;
1006 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1007 let mem = MemoryMappingBuilder::new(mem_size)
1008 .from_shared_memory(&shm)
1009 .build()
1010 .unwrap();
1011 vm.add_memory_region(
1012 GuestAddress(0x1000),
1013 Box::new(mem),
1014 true,
1015 false,
1016 MemCacheType::CacheCoherent,
1017 )
1018 .unwrap();
1019 }
1020
1021 #[test]
remove_memory()1022 fn remove_memory() {
1023 if !Whpx::is_enabled() {
1024 return;
1025 }
1026 let cpu_count = 1;
1027 let mem =
1028 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1029 let mut vm = new_vm(cpu_count, mem);
1030 let mem_size = 0x1000;
1031 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1032 let mem = MemoryMappingBuilder::new(mem_size)
1033 .from_shared_memory(&shm)
1034 .build()
1035 .unwrap();
1036 let mem_ptr = mem.as_ptr();
1037 let slot = vm
1038 .add_memory_region(
1039 GuestAddress(0x1000),
1040 Box::new(mem),
1041 false,
1042 false,
1043 MemCacheType::CacheCoherent,
1044 )
1045 .unwrap();
1046 let removed_mem = vm.remove_memory_region(slot).unwrap();
1047 assert_eq!(removed_mem.size(), mem_size);
1048 assert_eq!(removed_mem.as_ptr(), mem_ptr);
1049 }
1050
1051 #[test]
remove_invalid_memory()1052 fn remove_invalid_memory() {
1053 if !Whpx::is_enabled() {
1054 return;
1055 }
1056 let cpu_count = 1;
1057 let mem =
1058 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1059 let mut vm = new_vm(cpu_count, mem);
1060 assert!(vm.remove_memory_region(0).is_err());
1061 }
1062
1063 #[test]
overlap_memory()1064 fn overlap_memory() {
1065 if !Whpx::is_enabled() {
1066 return;
1067 }
1068 let cpu_count = 1;
1069 let mem =
1070 GuestMemory::new(&[(GuestAddress(0), 0x10000)]).expect("failed to create guest memory");
1071 let mut vm = new_vm(cpu_count, mem);
1072 let mem_size = 0x2000;
1073 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1074 let mem = MemoryMappingBuilder::new(mem_size)
1075 .from_shared_memory(&shm)
1076 .build()
1077 .unwrap();
1078 assert!(vm
1079 .add_memory_region(
1080 GuestAddress(0x2000),
1081 Box::new(mem),
1082 false,
1083 false,
1084 MemCacheType::CacheCoherent
1085 )
1086 .is_err());
1087 }
1088
1089 #[test]
sync_memory()1090 fn sync_memory() {
1091 if !Whpx::is_enabled() {
1092 return;
1093 }
1094 let cpu_count = 1;
1095 let mem =
1096 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1097 let mut vm = new_vm(cpu_count, mem);
1098 let mem_size = 0x1000;
1099 let shm = SharedMemory::new("test", mem_size as u64).unwrap();
1100 let mem = MemoryMappingBuilder::new(mem_size)
1101 .from_shared_memory(&shm)
1102 .build()
1103 .unwrap();
1104 let slot = vm
1105 .add_memory_region(
1106 GuestAddress(0x10000),
1107 Box::new(mem),
1108 false,
1109 false,
1110 MemCacheType::CacheCoherent,
1111 )
1112 .unwrap();
1113 vm.msync_memory_region(slot, mem_size - 1, 0).unwrap();
1114 vm.msync_memory_region(slot, 0, mem_size).unwrap();
1115 assert!(vm.msync_memory_region(slot, mem_size, 0).is_err());
1116 assert!(vm.msync_memory_region(slot + 1, mem_size, 0).is_err());
1117 }
1118 }
1119