xref: /aosp_15_r20/external/crosvm/hypervisor/src/whpx/vcpu.rs (revision bb4ee6a4ae7042d18b07a98463b9c8b875e44b39)
1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::mem::size_of_val;
11 use std::sync::Arc;
12 
13 use base::Error;
14 use base::Result;
15 use libc::EINVAL;
16 use libc::EIO;
17 use libc::ENOENT;
18 use libc::ENXIO;
19 use vm_memory::GuestAddress;
20 use winapi::shared::winerror::E_UNEXPECTED;
21 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
22 
23 use super::types::*;
24 use super::*;
25 use crate::CpuId;
26 use crate::CpuIdEntry;
27 use crate::DebugRegs;
28 use crate::Fpu;
29 use crate::IoOperation;
30 use crate::IoParams;
31 use crate::Regs;
32 use crate::Sregs;
33 use crate::Vcpu;
34 use crate::VcpuExit;
35 use crate::VcpuX86_64;
36 use crate::Xsave;
37 
38 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
39 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
40 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
41 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
42 
43 /// This is the whpx instruction emulator, useful for deconstructing
44 /// io & memory port instructions. Whpx does not do this automatically.
45 struct SafeInstructionEmulator {
46     handle: WHV_EMULATOR_HANDLE,
47 }
48 
49 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>50     fn new() -> Result<SafeInstructionEmulator> {
51         const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
52             Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
53             Reserved: 0,
54             WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
55             WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
56             WHvEmulatorGetVirtualProcessorRegisters: Some(
57                 SafeInstructionEmulator::get_virtual_processor_registers_cb,
58             ),
59             WHvEmulatorSetVirtualProcessorRegisters: Some(
60                 SafeInstructionEmulator::set_virtual_processor_registers_cb,
61             ),
62             WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
63         };
64         let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
65         // safe because pass in valid callbacks and a emulator handle for the kernel to place the
66         // allocated handle into.
67         check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
68 
69         Ok(SafeInstructionEmulator { handle })
70     }
71 }
72 
73 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT74     extern "stdcall" fn io_port_cb(
75         context: *mut ::std::os::raw::c_void,
76         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
77     ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT78     extern "stdcall" fn memory_cb(
79         context: *mut ::std::os::raw::c_void,
80         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
81     ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT82     extern "stdcall" fn get_virtual_processor_registers_cb(
83         context: *mut ::std::os::raw::c_void,
84         register_names: *const WHV_REGISTER_NAME,
85         register_count: UINT32,
86         register_values: *mut WHV_REGISTER_VALUE,
87     ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT88     extern "stdcall" fn set_virtual_processor_registers_cb(
89         context: *mut ::std::os::raw::c_void,
90         register_names: *const WHV_REGISTER_NAME,
91         register_count: UINT32,
92         register_values: *const WHV_REGISTER_VALUE,
93     ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT94     extern "stdcall" fn translate_gva_page_cb(
95         context: *mut ::std::os::raw::c_void,
96         gva: WHV_GUEST_VIRTUAL_ADDRESS,
97         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
98         translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
99         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
100     ) -> HRESULT;
101 }
102 
103 /// Context passed into the instruction emulator when trying io or mmio emulation.
104 /// Since we need this for set/get registers and memory translation,
105 /// a single context is used that captures all necessary contextual information for the operation.
106 struct InstructionEmulatorContext<'a> {
107     vm_partition: Arc<SafePartition>,
108     index: u32,
109     handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Result<()>>,
110     handle_io: Option<&'a mut dyn FnMut(IoParams)>,
111 }
112 
113 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT114     extern "stdcall" fn io_port_cb(
115         context: *mut ::std::os::raw::c_void,
116         io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
117     ) -> HRESULT {
118         // unsafe because windows could decide to call this at any time.
119         // However, we trust the kernel to call this while the vm/vcpu is valid.
120         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
121         let Some(handle_io) = &mut ctx.handle_io else {
122             return E_UNEXPECTED;
123         };
124 
125         // safe because we trust the kernel to fill in the io_access
126         let io_access_info = unsafe { &mut *io_access };
127         let address = io_access_info.Port.into();
128         let size = io_access_info.AccessSize as usize;
129         // SAFETY: We trust the kernel to fill in the io_access
130         let data: &mut [u8] = unsafe {
131             assert!(size <= size_of_val(&io_access_info.Data));
132             std::slice::from_raw_parts_mut(&mut io_access_info.Data as *mut u32 as *mut u8, size)
133         };
134         match io_access_info.Direction {
135             WHPX_EXIT_DIRECTION_PIO_IN => {
136                 handle_io(IoParams {
137                     address,
138                     operation: IoOperation::Read(data),
139                 });
140                 S_OK
141             }
142             WHPX_EXIT_DIRECTION_PIO_OUT => {
143                 handle_io(IoParams {
144                     address,
145                     operation: IoOperation::Write(data),
146                 });
147                 S_OK
148             }
149             _ => E_UNEXPECTED,
150         }
151     }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT152     extern "stdcall" fn memory_cb(
153         context: *mut ::std::os::raw::c_void,
154         memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
155     ) -> HRESULT {
156         // unsafe because windows could decide to call this at any time.
157         // However, we trust the kernel to call this while the vm/vcpu is valid.
158         let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
159         let Some(handle_mmio) = &mut ctx.handle_mmio else {
160             return E_UNEXPECTED;
161         };
162 
163         // safe because we trust the kernel to fill in the memory_access
164         let memory_access_info = unsafe { &mut *memory_access };
165         let address = memory_access_info.GpaAddress;
166         let size = memory_access_info.AccessSize as usize;
167         let data = &mut memory_access_info.Data[..size];
168 
169         match memory_access_info.Direction {
170             WHPX_EXIT_DIRECTION_MMIO_READ => {
171                 if let Err(e) = handle_mmio(IoParams {
172                     address,
173                     operation: IoOperation::Read(data),
174                 }) {
175                     error!("handle_mmio failed with {e}");
176                     E_UNEXPECTED
177                 } else {
178                     S_OK
179                 }
180             }
181             WHPX_EXIT_DIRECTION_MMIO_WRITE => {
182                 if let Err(e) = handle_mmio(IoParams {
183                     address,
184                     operation: IoOperation::Write(data),
185                 }) {
186                     error!("handle_mmio write with {e}");
187                     E_UNEXPECTED
188                 } else {
189                     S_OK
190                 }
191             }
192             _ => E_UNEXPECTED,
193         }
194     }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT195     extern "stdcall" fn get_virtual_processor_registers_cb(
196         context: *mut ::std::os::raw::c_void,
197         register_names: *const WHV_REGISTER_NAME,
198         register_count: UINT32,
199         register_values: *mut WHV_REGISTER_VALUE,
200     ) -> HRESULT {
201         // unsafe because windows could decide to call this at any time.
202         // However, we trust the kernel to call this while the vm/vcpu is valid.
203         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
204         // safe because the ctx has a weak reference to the vm partition, which should be
205         // alive longer than the ctx
206         unsafe {
207             WHvGetVirtualProcessorRegisters(
208                 ctx.vm_partition.partition,
209                 ctx.index,
210                 register_names,
211                 register_count,
212                 register_values,
213             )
214         }
215     }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT216     extern "stdcall" fn set_virtual_processor_registers_cb(
217         context: *mut ::std::os::raw::c_void,
218         register_names: *const WHV_REGISTER_NAME,
219         register_count: UINT32,
220         register_values: *const WHV_REGISTER_VALUE,
221     ) -> HRESULT {
222         // unsafe because windows could decide to call this at any time.
223         // However, we trust the kernel to call this while the vm/vcpu is valid.
224         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
225         // safe because the ctx has a weak reference to the vm partition, which should be
226         // alive longer than the ctx
227         unsafe {
228             WHvSetVirtualProcessorRegisters(
229                 ctx.vm_partition.partition,
230                 ctx.index,
231                 register_names,
232                 register_count,
233                 register_values,
234             )
235         }
236     }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT237     extern "stdcall" fn translate_gva_page_cb(
238         context: *mut ::std::os::raw::c_void,
239         gva: WHV_GUEST_VIRTUAL_ADDRESS,
240         translate_flags: WHV_TRANSLATE_GVA_FLAGS,
241         translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
242         gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
243     ) -> HRESULT {
244         // unsafe because windows could decide to call this at any time.
245         // However, we trust the kernel to call this while the vm/vcpu is valid.
246         let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
247         let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
248         // safe because the ctx has a weak reference to the vm partition, which should be
249         // alive longer than the ctx
250         let ret = unsafe {
251             WHvTranslateGva(
252                 ctx.vm_partition.partition,
253                 ctx.index,
254                 gva,
255                 translate_flags,
256                 &mut translation_result,
257                 gpa,
258             )
259         };
260         if ret == S_OK {
261             // safe assuming the kernel passed in a valid result_code ptr
262             unsafe {
263                 *translation_result_code = translation_result.ResultCode;
264             }
265         }
266         ret
267     }
268 }
269 
270 impl Drop for SafeInstructionEmulator {
drop(&mut self)271     fn drop(&mut self) {
272         // safe because we own the instruction emulator
273         check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
274     }
275 }
276 
277 // we can send and share the instruction emulator over threads safely even though it is void*.
278 unsafe impl Send for SafeInstructionEmulator {}
279 unsafe impl Sync for SafeInstructionEmulator {}
280 
281 struct SafeVirtualProcessor {
282     vm_partition: Arc<SafePartition>,
283     index: u32,
284 }
285 
286 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>287     fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
288         // safe since the vm partition should be valid.
289         check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
290         Ok(SafeVirtualProcessor {
291             vm_partition,
292             index,
293         })
294     }
295 }
296 
297 impl Drop for SafeVirtualProcessor {
drop(&mut self)298     fn drop(&mut self) {
299         // safe because we are the owner of this windows virtual processor.
300         check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
301             .unwrap();
302     }
303 }
304 
305 pub struct WhpxVcpu {
306     index: u32,
307     safe_virtual_processor: Arc<SafeVirtualProcessor>,
308     vm_partition: Arc<SafePartition>,
309     last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
310     // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
311     instruction_emulator: Arc<SafeInstructionEmulator>,
312     tsc_frequency: Option<u64>,
313     apic_frequency: Option<u32>,
314 }
315 
316 impl WhpxVcpu {
317     /// The SafePartition passed in is weak, so that there is no circular references.
318     /// However, the SafePartition should be valid as long as this VCPU is alive. The index
319     /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>320     pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
321         let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
322         let instruction_emulator = SafeInstructionEmulator::new()?;
323         Ok(WhpxVcpu {
324             index,
325             safe_virtual_processor: Arc::new(safe_virtual_processor),
326             vm_partition,
327             last_exit_context: Arc::new(Default::default()),
328             instruction_emulator: Arc::new(instruction_emulator),
329             tsc_frequency: None,
330             apic_frequency: None,
331         })
332     }
333 
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)334     pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
335         self.tsc_frequency = tsc_frequency;
336         self.apic_frequency = Some(lapic_frequency);
337     }
338 
339     /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>340     fn handle_msr_read(&mut self, id: u32) -> Result<()> {
341         // Verify that we're only being called in a situation where the last exit reason was
342         // ExitReasonX64MsrAccess
343         if self.last_exit_context.ExitReason
344             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
345         {
346             return Err(Error::new(EINVAL));
347         }
348 
349         let value = match id {
350             HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
351             HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
352             _ => None,
353         };
354 
355         if let Some(value) = value {
356             // Get the next rip from the exit context
357             let rip = self.last_exit_context.VpContext.Rip
358                 + self.last_exit_context.VpContext.InstructionLength() as u64;
359 
360             const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
361                 WHV_REGISTER_NAME_WHvX64RegisterRip,
362                 WHV_REGISTER_NAME_WHvX64RegisterRax,
363                 WHV_REGISTER_NAME_WHvX64RegisterRdx,
364             ];
365 
366             let values = vec![
367                 WHV_REGISTER_VALUE { Reg64: rip },
368                 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
369                 WHV_REGISTER_VALUE {
370                     Reg64: (value & 0xffffffff),
371                 },
372                 WHV_REGISTER_VALUE {
373                     Reg64: (value >> 32),
374                 },
375             ];
376 
377             // safe because we have enough space for all the registers
378             check_whpx!(unsafe {
379                 WHvSetVirtualProcessorRegisters(
380                     self.vm_partition.partition,
381                     self.index,
382                     &REG_NAMES as *const WHV_REGISTER_NAME,
383                     REG_NAMES.len() as u32,
384                     values.as_ptr() as *const WHV_REGISTER_VALUE,
385                 )
386             })
387         } else {
388             self.inject_gp_fault()
389         }
390     }
391 
392     /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>393     fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
394         // Verify that we're only being called in a situation where the last exit reason was
395         // ExitReasonX64MsrAccess
396         if self.last_exit_context.ExitReason
397             != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
398         {
399             return Err(Error::new(EINVAL));
400         }
401 
402         // Do nothing, we assume TSC is always invariant
403         let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
404 
405         if !success {
406             return self.inject_gp_fault();
407         }
408 
409         // Get the next rip from the exit context
410         let rip = self.last_exit_context.VpContext.Rip
411             + self.last_exit_context.VpContext.InstructionLength() as u64;
412 
413         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
414 
415         let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
416 
417         // safe because we have enough space for all the registers
418         check_whpx!(unsafe {
419             WHvSetVirtualProcessorRegisters(
420                 self.vm_partition.partition,
421                 self.index,
422                 &REG_NAMES as *const WHV_REGISTER_NAME,
423                 REG_NAMES.len() as u32,
424                 values.as_ptr() as *const WHV_REGISTER_VALUE,
425             )
426         })
427     }
428 
inject_gp_fault(&self) -> Result<()>429     fn inject_gp_fault(&self) -> Result<()> {
430         const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
431 
432         let mut event = WHV_REGISTER_VALUE {
433             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
434                 __bindgen_anon_1: Default::default(),
435             },
436         };
437         // safe because we have enough space for all the registers
438         check_whpx!(unsafe {
439             WHvGetVirtualProcessorRegisters(
440                 self.vm_partition.partition,
441                 self.index,
442                 &REG_NAMES as *const WHV_REGISTER_NAME,
443                 REG_NAMES.len() as u32,
444                 &mut event as *mut WHV_REGISTER_VALUE,
445             )
446         })?;
447 
448         if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
449             error!("Unable to inject gp fault because pending exception exists");
450             return Err(Error::new(EINVAL));
451         }
452 
453         let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
454 
455         pending_exception.set_EventPending(1);
456         // GP faults set error code
457         pending_exception.set_DeliverErrorCode(1);
458         // GP fault error code is 0 unless the fault is segment related
459         pending_exception.ErrorCode = 0;
460         // This must be set to WHvX64PendingEventException
461         pending_exception
462             .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
463         // GP fault vector is 13
464         const GP_VECTOR: u32 = 13;
465         pending_exception.set_Vector(GP_VECTOR);
466 
467         let event = WHV_REGISTER_VALUE {
468             ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
469                 __bindgen_anon_1: pending_exception,
470             },
471         };
472 
473         // safe because we have enough space for all the registers
474         check_whpx!(unsafe {
475             WHvSetVirtualProcessorRegisters(
476                 self.vm_partition.partition,
477                 self.index,
478                 &REG_NAMES as *const WHV_REGISTER_NAME,
479                 REG_NAMES.len() as u32,
480                 &event as *const WHV_REGISTER_VALUE,
481             )
482         })
483     }
484 }
485 
486 impl Vcpu for WhpxVcpu {
487     /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>488     fn try_clone(&self) -> Result<Self> {
489         Ok(WhpxVcpu {
490             index: self.index,
491             safe_virtual_processor: self.safe_virtual_processor.clone(),
492             vm_partition: self.vm_partition.clone(),
493             last_exit_context: self.last_exit_context.clone(),
494             instruction_emulator: self.instruction_emulator.clone(),
495             tsc_frequency: self.tsc_frequency,
496             apic_frequency: self.apic_frequency,
497         })
498     }
499 
as_vcpu(&self) -> &dyn Vcpu500     fn as_vcpu(&self) -> &dyn Vcpu {
501         self
502     }
503 
504     /// Returns the vcpu id.
id(&self) -> usize505     fn id(&self) -> usize {
506         self.index.try_into().unwrap()
507     }
508 
509     /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)510     fn set_immediate_exit(&self, exit: bool) {
511         if exit {
512             // safe because we own this whpx virtual processor index, and assume the vm partition is
513             // still valid
514             unsafe {
515                 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
516             }
517         }
518     }
519 
520     /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
521     /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
522     /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>523     fn on_suspend(&self) -> Result<()> {
524         Ok(())
525     }
526 
527     /// Enables a hypervisor-specific extension on this Vcpu.  `cap` is a constant defined by the
528     /// hypervisor API (e.g., kvm.h).  `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>529     unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
530         // Whpx does not support raw capability on the vcpu.
531         Err(Error::new(ENXIO))
532     }
533 
534     /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
535     ///
536     /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
537     /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
538     /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()>539     fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
540         let mut status: WHV_EMULATOR_STATUS = Default::default();
541         let mut ctx = InstructionEmulatorContext {
542             vm_partition: self.vm_partition.clone(),
543             index: self.index,
544             handle_mmio: Some(handle_fn),
545             handle_io: None,
546         };
547         // safe as long as all callbacks occur before this fn returns.
548         check_whpx!(unsafe {
549             WHvEmulatorTryMmioEmulation(
550                 self.instruction_emulator.handle,
551                 &mut ctx as *mut _ as *mut c_void,
552                 &self.last_exit_context.VpContext,
553                 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
554                 &mut status,
555             )
556         })?;
557         // safe because we trust the kernel to fill in the union field properly.
558         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
559         if success {
560             Ok(())
561         } else {
562             self.inject_gp_fault()?;
563             // safe because we trust the kernel to fill in the union field properly.
564             Err(Error::new(unsafe { status.AsUINT32 }))
565         }
566     }
567 
568     /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
569     ///
570     /// Once called, it will determine whether an io in or io out was the reason for the io exit,
571     /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
572     /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()>573     fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
574         let mut status: WHV_EMULATOR_STATUS = Default::default();
575         let mut ctx = InstructionEmulatorContext {
576             vm_partition: self.vm_partition.clone(),
577             index: self.index,
578             handle_mmio: None,
579             handle_io: Some(handle_fn),
580         };
581         // safe as long as all callbacks occur before this fn returns.
582         check_whpx!(unsafe {
583             WHvEmulatorTryIoEmulation(
584                 self.instruction_emulator.handle,
585                 &mut ctx as *mut _ as *mut c_void,
586                 &self.last_exit_context.VpContext,
587                 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
588                 &mut status,
589             )
590         })?; // safe because we trust the kernel to fill in the union field properly.
591         let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
592         if success {
593             Ok(())
594         } else {
595             // safe because we trust the kernel to fill in the union field properly.
596             Err(Error::new(unsafe { status.AsUINT32 }))
597         }
598     }
599 
600     #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>601     fn run(&mut self) -> Result<VcpuExit> {
602         // safe because we own this whpx virtual processor index, and assume the vm partition is
603         // still valid
604         let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
605         check_whpx!(unsafe {
606             WHvRunVirtualProcessor(
607                 self.vm_partition.partition,
608                 self.index,
609                 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
610                 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
611             )
612         })?;
613 
614         match self.last_exit_context.ExitReason {
615             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
616             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
617             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
618                 Ok(VcpuExit::UnrecoverableException)
619             }
620             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
621                 Ok(VcpuExit::InvalidVpRegister)
622             }
623             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
624                 Ok(VcpuExit::UnsupportedFeature)
625             }
626             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
627                 Ok(VcpuExit::IrqWindowOpen)
628             }
629             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
630             // additional exits that are configurable
631             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
632                 // safe because we trust the kernel to fill in the union field properly.
633                 let vector = unsafe {
634                     self.last_exit_context
635                         .__bindgen_anon_1
636                         .ApicEoi
637                         .InterruptVector as u8
638                 };
639                 Ok(VcpuExit::IoapicEoi { vector })
640             }
641             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
642                 // Safe because we know this was an MSR access exit.
643                 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
644 
645                 // Safe because we know this was an MSR access exit
646                 let is_write = unsafe {
647                     self.last_exit_context
648                         .__bindgen_anon_1
649                         .MsrAccess
650                         .AccessInfo
651                         .__bindgen_anon_1
652                         .IsWrite()
653                         == 1
654                 };
655                 if is_write {
656                     // Safe because we know this was an MSR access exit
657                     let value = unsafe {
658                         // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
659                         // specific register
660                         (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
661                             | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
662                     };
663                     self.handle_msr_write(id, value)?;
664                 } else {
665                     self.handle_msr_read(id)?;
666                 }
667                 Ok(VcpuExit::MsrAccess)
668             }
669             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
670                 // Safe because we know this was a CPUID exit.
671                 let entry = unsafe {
672                     CpuIdEntry {
673                         function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
674                         index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
675                         flags: 0,
676                         cpuid: CpuidResult {
677                             eax: self
678                                 .last_exit_context
679                                 .__bindgen_anon_1
680                                 .CpuidAccess
681                                 .DefaultResultRax as u32,
682                             ebx: self
683                                 .last_exit_context
684                                 .__bindgen_anon_1
685                                 .CpuidAccess
686                                 .DefaultResultRbx as u32,
687                             ecx: self
688                                 .last_exit_context
689                                 .__bindgen_anon_1
690                                 .CpuidAccess
691                                 .DefaultResultRcx as u32,
692                             edx: self
693                                 .last_exit_context
694                                 .__bindgen_anon_1
695                                 .CpuidAccess
696                                 .DefaultResultRdx as u32,
697                         },
698                     }
699                 };
700                 Ok(VcpuExit::Cpuid { entry })
701             }
702             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
703             // undocumented exit calls from the header file, WinHvPlatformDefs.h.
704             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
705             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
706             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
707             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
708                 Ok(VcpuExit::ApicInitSipiTrap)
709             }
710             // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
711             WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
712             r => panic!("unknown exit reason: {}", r),
713         }
714     }
715 }
716 
717 impl VcpuX86_64 for WhpxVcpu {
718     /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
719     /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)720     fn set_interrupt_window_requested(&self, requested: bool) {
721         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
722             [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
723         let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
724             Default::default();
725         notifications.set_InterruptNotification(if requested { 1 } else { 0 });
726         let notify_register = WHV_REGISTER_VALUE {
727             DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
728                 __bindgen_anon_1: notifications,
729             },
730         };
731         // safe because we have enough space for all the registers
732         check_whpx!(unsafe {
733             WHvSetVirtualProcessorRegisters(
734                 self.vm_partition.partition,
735                 self.index,
736                 &REG_NAMES as *const WHV_REGISTER_NAME,
737                 REG_NAMES.len() as u32,
738                 &notify_register as *const WHV_REGISTER_VALUE,
739             )
740         })
741         .unwrap();
742     }
743 
744     /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool745     fn ready_for_interrupt(&self) -> bool {
746         // safe because InterruptionPending bit is always valid in ExecutionState struct
747         let pending = unsafe {
748             self.last_exit_context
749                 .VpContext
750                 .ExecutionState
751                 .__bindgen_anon_1
752                 .InterruptionPending()
753         };
754         // safe because InterruptShadow bit is always valid in ExecutionState struct
755         let shadow = unsafe {
756             self.last_exit_context
757                 .VpContext
758                 .ExecutionState
759                 .__bindgen_anon_1
760                 .InterruptShadow()
761         };
762 
763         let eflags = self.last_exit_context.VpContext.Rflags;
764         const IF_MASK: u64 = 0x00000200;
765 
766         // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
767         // the IF flag is clear
768         shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
769     }
770 
771     /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u8) -> Result<()>772     fn interrupt(&self, irq: u8) -> Result<()> {
773         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
774             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
775         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
776             Default::default();
777         pending_interrupt.set_InterruptionPending(1);
778         pending_interrupt
779             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
780         pending_interrupt.set_InterruptionVector(irq.into());
781         let interrupt = WHV_REGISTER_VALUE {
782             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
783                 __bindgen_anon_1: pending_interrupt,
784             },
785         };
786         // safe because we have enough space for all the registers
787         check_whpx!(unsafe {
788             WHvSetVirtualProcessorRegisters(
789                 self.vm_partition.partition,
790                 self.index,
791                 &REG_NAMES as *const WHV_REGISTER_NAME,
792                 REG_NAMES.len() as u32,
793                 &interrupt as *const WHV_REGISTER_VALUE,
794             )
795         })
796     }
797 
798     /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>799     fn inject_nmi(&self) -> Result<()> {
800         const REG_NAMES: [WHV_REGISTER_NAME; 1] =
801             [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
802         let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
803             Default::default();
804         pending_interrupt.set_InterruptionPending(1);
805         pending_interrupt
806             .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
807         const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
808         pending_interrupt.set_InterruptionVector(NMI_VECTOR);
809         let interrupt = WHV_REGISTER_VALUE {
810             PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
811                 __bindgen_anon_1: pending_interrupt,
812             },
813         };
814         // safe because we have enough space for all the registers
815         check_whpx!(unsafe {
816             WHvSetVirtualProcessorRegisters(
817                 self.vm_partition.partition,
818                 self.index,
819                 &REG_NAMES as *const WHV_REGISTER_NAME,
820                 REG_NAMES.len() as u32,
821                 &interrupt as *const WHV_REGISTER_VALUE,
822             )
823         })
824     }
825 
826     /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>827     fn get_regs(&self) -> Result<Regs> {
828         let mut whpx_regs: WhpxRegs = Default::default();
829         let reg_names = WhpxRegs::get_register_names();
830         // safe because we have enough space for all the registers
831         check_whpx!(unsafe {
832             WHvGetVirtualProcessorRegisters(
833                 self.vm_partition.partition,
834                 self.index,
835                 reg_names as *const WHV_REGISTER_NAME,
836                 reg_names.len() as u32,
837                 whpx_regs.as_mut_ptr(),
838             )
839         })?;
840         Ok(Regs::from(&whpx_regs))
841     }
842 
843     /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>844     fn set_regs(&self, regs: &Regs) -> Result<()> {
845         let whpx_regs = WhpxRegs::from(regs);
846         let reg_names = WhpxRegs::get_register_names();
847         // safe because we have enough space for all the registers
848         check_whpx!(unsafe {
849             WHvSetVirtualProcessorRegisters(
850                 self.vm_partition.partition,
851                 self.index,
852                 reg_names as *const WHV_REGISTER_NAME,
853                 reg_names.len() as u32,
854                 whpx_regs.as_ptr(),
855             )
856         })
857     }
858 
859     /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>860     fn get_sregs(&self) -> Result<Sregs> {
861         let mut whpx_sregs: WhpxSregs = Default::default();
862         let reg_names = WhpxSregs::get_register_names();
863         // safe because we have enough space for all the registers
864         check_whpx!(unsafe {
865             WHvGetVirtualProcessorRegisters(
866                 self.vm_partition.partition,
867                 self.index,
868                 reg_names as *const WHV_REGISTER_NAME,
869                 reg_names.len() as u32,
870                 whpx_sregs.as_mut_ptr(),
871             )
872         })?;
873         Ok(Sregs::from(&whpx_sregs))
874     }
875 
876     /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>877     fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
878         let whpx_sregs = WhpxSregs::from(sregs);
879         let reg_names = WhpxSregs::get_register_names();
880         // safe because we have enough space for all the registers
881         check_whpx!(unsafe {
882             WHvSetVirtualProcessorRegisters(
883                 self.vm_partition.partition,
884                 self.index,
885                 reg_names as *const WHV_REGISTER_NAME,
886                 reg_names.len() as u32,
887                 whpx_sregs.as_ptr(),
888             )
889         })
890     }
891 
892     /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>893     fn get_fpu(&self) -> Result<Fpu> {
894         let mut whpx_fpu: WhpxFpu = Default::default();
895         let reg_names = WhpxFpu::get_register_names();
896         // safe because we have enough space for all the registers
897         check_whpx!(unsafe {
898             WHvGetVirtualProcessorRegisters(
899                 self.vm_partition.partition,
900                 self.index,
901                 reg_names as *const WHV_REGISTER_NAME,
902                 reg_names.len() as u32,
903                 whpx_fpu.as_mut_ptr(),
904             )
905         })?;
906         Ok(Fpu::from(&whpx_fpu))
907     }
908 
909     /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>910     fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
911         let whpx_fpu = WhpxFpu::from(fpu);
912         let reg_names = WhpxFpu::get_register_names();
913         // safe because we have enough space for all the registers
914         check_whpx!(unsafe {
915             WHvSetVirtualProcessorRegisters(
916                 self.vm_partition.partition,
917                 self.index,
918                 reg_names as *const WHV_REGISTER_NAME,
919                 reg_names.len() as u32,
920                 whpx_fpu.as_ptr(),
921             )
922         })
923     }
924 
925     /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>926     fn get_xsave(&self) -> Result<Xsave> {
927         let mut empty_buffer = [0u8; 1];
928         let mut needed_buf_size: u32 = 0;
929 
930         // Find out how much space is needed for XSAVEs.
931         let res = unsafe {
932             WHvGetVirtualProcessorXsaveState(
933                 self.vm_partition.partition,
934                 self.index,
935                 empty_buffer.as_mut_ptr() as *mut _,
936                 0,
937                 &mut needed_buf_size,
938             )
939         };
940         if res != WHV_E_INSUFFICIENT_BUFFER.0 {
941             // This should always work, so if it doesn't, we'll return unsupported.
942             error!("failed to get size of vcpu xsave");
943             return Err(Error::new(EIO));
944         }
945 
946         let mut xsave = Xsave::new(needed_buf_size as usize);
947         // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
948         // bytes so writes are bounded within the buffer.
949         check_whpx!(unsafe {
950             WHvGetVirtualProcessorXsaveState(
951                 self.vm_partition.partition,
952                 self.index,
953                 xsave.as_mut_ptr(),
954                 xsave.len() as u32,
955                 &mut needed_buf_size,
956             )
957         })?;
958         Ok(xsave)
959     }
960 
961     /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>962     fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
963         // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
964         // length in bytes so reads are bounded within the buffer.
965         check_whpx!(unsafe {
966             WHvSetVirtualProcessorXsaveState(
967                 self.vm_partition.partition,
968                 self.index,
969                 xsave.as_ptr(),
970                 xsave.len() as u32,
971             )
972         })
973     }
974 
get_interrupt_state(&self) -> Result<serde_json::Value>975     fn get_interrupt_state(&self) -> Result<serde_json::Value> {
976         let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
977         let reg_names = WhpxInterruptRegs::get_register_names();
978         // SAFETY: we have enough space for all the registers & the memory lives for the duration
979         // of the FFI call.
980         check_whpx!(unsafe {
981             WHvGetVirtualProcessorRegisters(
982                 self.vm_partition.partition,
983                 self.index,
984                 reg_names as *const WHV_REGISTER_NAME,
985                 reg_names.len() as u32,
986                 whpx_interrupt_regs.as_mut_ptr(),
987             )
988         })?;
989 
990         serde_json::to_value(whpx_interrupt_regs.into_serializable()).map_err(|e| {
991             error!("failed to serialize interrupt state: {:?}", e);
992             Error::new(EIO)
993         })
994     }
995 
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>996     fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()> {
997         let whpx_interrupt_regs =
998             WhpxInterruptRegs::from_serializable(serde_json::from_value(data).map_err(|e| {
999                 error!("failed to serialize interrupt state: {:?}", e);
1000                 Error::new(EIO)
1001             })?);
1002         let reg_names = WhpxInterruptRegs::get_register_names();
1003         // SAFETY: we have enough space for all the registers & the memory lives for the duration
1004         // of the FFI call.
1005         check_whpx!(unsafe {
1006             WHvSetVirtualProcessorRegisters(
1007                 self.vm_partition.partition,
1008                 self.index,
1009                 reg_names as *const WHV_REGISTER_NAME,
1010                 reg_names.len() as u32,
1011                 whpx_interrupt_regs.as_ptr(),
1012             )
1013         })
1014     }
1015 
1016     /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1017     fn get_debugregs(&self) -> Result<DebugRegs> {
1018         let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1019         let reg_names = WhpxDebugRegs::get_register_names();
1020         // safe because we have enough space for all the registers
1021         check_whpx!(unsafe {
1022             WHvGetVirtualProcessorRegisters(
1023                 self.vm_partition.partition,
1024                 self.index,
1025                 reg_names as *const WHV_REGISTER_NAME,
1026                 reg_names.len() as u32,
1027                 whpx_debugregs.as_mut_ptr(),
1028             )
1029         })?;
1030         Ok(DebugRegs::from(&whpx_debugregs))
1031     }
1032 
1033     /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1034     fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1035         let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1036         let reg_names = WhpxDebugRegs::get_register_names();
1037         // safe because we have enough space for all the registers
1038         check_whpx!(unsafe {
1039             WHvSetVirtualProcessorRegisters(
1040                 self.vm_partition.partition,
1041                 self.index,
1042                 reg_names as *const WHV_REGISTER_NAME,
1043                 reg_names.len() as u32,
1044                 whpx_debugregs.as_ptr(),
1045             )
1046         })
1047     }
1048 
1049     /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1050     fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1051         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1052         let mut reg_value = WHV_REGISTER_VALUE::default();
1053         // safe because we have enough space for all the registers in whpx_regs
1054         check_whpx!(unsafe {
1055             WHvGetVirtualProcessorRegisters(
1056                 self.vm_partition.partition,
1057                 self.index,
1058                 &REG_NAME,
1059                 /* RegisterCount */ 1,
1060                 &mut reg_value,
1061             )
1062         })?;
1063 
1064         // safe because the union value, reg64, is safe to pull out assuming
1065         // kernel filled in the xcrs properly.
1066         let xcr0 = unsafe { reg_value.Reg64 };
1067 
1068         // whpx only supports xcr0
1069         let xcrs = BTreeMap::from([(0, xcr0)]);
1070         Ok(xcrs)
1071     }
1072 
1073     /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1074     fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1075         if xcr_index != 0 {
1076             // invalid xcr register provided
1077             return Err(Error::new(EINVAL));
1078         }
1079 
1080         const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1081         let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1082         // safe because we have enough space for all the registers in whpx_xcrs
1083         check_whpx!(unsafe {
1084             WHvSetVirtualProcessorRegisters(
1085                 self.vm_partition.partition,
1086                 self.index,
1087                 &REG_NAME,
1088                 /* RegisterCount */ 1,
1089                 &reg_value,
1090             )
1091         })
1092     }
1093 
1094     /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1095     fn get_msr(&self, msr_index: u32) -> Result<u64> {
1096         let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1097         let mut msr_value = WHV_REGISTER_VALUE::default();
1098         // safe because we have enough space for all the registers in whpx_regs
1099         check_whpx!(unsafe {
1100             WHvGetVirtualProcessorRegisters(
1101                 self.vm_partition.partition,
1102                 self.index,
1103                 &msr_name,
1104                 /* RegisterCount */ 1,
1105                 &mut msr_value,
1106             )
1107         })?;
1108 
1109         // safe because Reg64 will be a valid union value
1110         let value = unsafe { msr_value.Reg64 };
1111         Ok(value)
1112     }
1113 
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1114     fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1115         // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1116         // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1117         // permitted MSRs here.
1118         //
1119         // We intentionally exclude WHvRegisterPendingInterruption and
1120         // WHvRegisterInterruptState because they are included in
1121         // get_interrupt_state.
1122         //
1123         // We intentionally exclude MSR_TSC because in snapshotting it is
1124         // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1125         // consumers should use get/set_tsc_adjust to access the adjust register
1126         // if needed.
1127         const MSRS_TO_SAVE: &[u32] = &[
1128             MSR_EFER,
1129             MSR_KERNEL_GS_BASE,
1130             MSR_APIC_BASE,
1131             MSR_SYSENTER_CS,
1132             MSR_SYSENTER_EIP,
1133             MSR_SYSENTER_ESP,
1134             MSR_STAR,
1135             MSR_LSTAR,
1136             MSR_CSTAR,
1137             MSR_SFMASK,
1138         ];
1139 
1140         let registers = MSRS_TO_SAVE
1141             .iter()
1142             .map(|msr_index| {
1143                 let value = self.get_msr(*msr_index)?;
1144                 Ok((*msr_index, value))
1145             })
1146             .collect::<Result<BTreeMap<u32, u64>>>()?;
1147 
1148         Ok(registers)
1149     }
1150 
1151     /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1152     fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1153         match get_msr_name(msr_index) {
1154             Some(msr_name) => {
1155                 let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1156                 check_whpx!(unsafe {
1157                     WHvSetVirtualProcessorRegisters(
1158                         self.vm_partition.partition,
1159                         self.index,
1160                         &msr_name,
1161                         /* RegisterCount */ 1,
1162                         &msr_value,
1163                     )
1164                 })
1165             }
1166             None => {
1167                 warn!("msr 0x{msr_index:X} write unsupported by WHPX, dropping");
1168                 Ok(())
1169             }
1170         }
1171     }
1172 
1173     /// Sets up the data returned by the CPUID instruction.
1174     /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1175     fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1176         Err(Error::new(ENXIO))
1177     }
1178 
1179     /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1180     /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1181     /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1182     fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1183         // Verify that we're only being called in a situation where the last exit reason was
1184         // ExitReasonX64Cpuid
1185         if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1186             return Err(Error::new(EINVAL));
1187         }
1188 
1189         // Get the next rip from the exit context
1190         let rip = self.last_exit_context.VpContext.Rip
1191             + self.last_exit_context.VpContext.InstructionLength() as u64;
1192 
1193         const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1194             WHV_REGISTER_NAME_WHvX64RegisterRip,
1195             WHV_REGISTER_NAME_WHvX64RegisterRax,
1196             WHV_REGISTER_NAME_WHvX64RegisterRbx,
1197             WHV_REGISTER_NAME_WHvX64RegisterRcx,
1198             WHV_REGISTER_NAME_WHvX64RegisterRdx,
1199         ];
1200 
1201         let values = vec![
1202             WHV_REGISTER_VALUE { Reg64: rip },
1203             WHV_REGISTER_VALUE {
1204                 Reg64: entry.cpuid.eax as u64,
1205             },
1206             WHV_REGISTER_VALUE {
1207                 Reg64: entry.cpuid.ebx as u64,
1208             },
1209             WHV_REGISTER_VALUE {
1210                 Reg64: entry.cpuid.ecx as u64,
1211             },
1212             WHV_REGISTER_VALUE {
1213                 Reg64: entry.cpuid.edx as u64,
1214             },
1215         ];
1216 
1217         // safe because we have enough space for all the registers
1218         check_whpx!(unsafe {
1219             WHvSetVirtualProcessorRegisters(
1220                 self.vm_partition.partition,
1221                 self.index,
1222                 &REG_NAMES as *const WHV_REGISTER_NAME,
1223                 REG_NAMES.len() as u32,
1224                 values.as_ptr() as *const WHV_REGISTER_VALUE,
1225             )
1226         })
1227     }
1228 
1229     /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1230     fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1231         // TODO(b/173807302): Implement this
1232         Err(Error::new(ENOENT))
1233     }
1234 
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1235     fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1236         // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1237         // the moment it was snapshotted. This is required for virtio-pvclock
1238         // to function correctly. (virtio-pvclock assumes the offset is fixed,
1239         // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1240         // from CLOCK_MONOTONIC by setting the timebase.)
1241         self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1242     }
1243 }
1244 
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1245 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1246     VALID_MSRS.get(&msr_index).copied()
1247 }
1248 
1249 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1250 #[cfg(test)]
1251 mod tests {
1252     use vm_memory::GuestAddress;
1253     use vm_memory::GuestMemory;
1254 
1255     use super::*;
1256     use crate::VmX86_64;
1257 
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1258     fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1259         let whpx = Whpx::new().expect("failed to instantiate whpx");
1260         let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1261             .expect("failed to get whpx features");
1262         WhpxVm::new(
1263             &whpx,
1264             cpu_count,
1265             mem,
1266             CpuId::new(0),
1267             local_apic_supported,
1268             None,
1269         )
1270         .expect("failed to create whpx vm")
1271     }
1272 
1273     #[test]
try_clone()1274     fn try_clone() {
1275         if !Whpx::is_enabled() {
1276             return;
1277         }
1278         let cpu_count = 1;
1279         let mem =
1280             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1281         let vm = new_vm(cpu_count, mem);
1282         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1283         let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1284         let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1285     }
1286 
1287     #[test]
index()1288     fn index() {
1289         if !Whpx::is_enabled() {
1290             return;
1291         }
1292         let cpu_count = 2;
1293         let mem =
1294             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1295         let vm = new_vm(cpu_count, mem);
1296         let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1297         let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1298         assert_eq!(vcpu0.index, 0);
1299         vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1300         let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1301         assert_eq!(vcpu1.index, 1);
1302     }
1303 
1304     #[test]
get_regs()1305     fn get_regs() {
1306         if !Whpx::is_enabled() {
1307             return;
1308         }
1309         let cpu_count = 1;
1310         let mem =
1311             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1312         let vm = new_vm(cpu_count, mem);
1313         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1314 
1315         vcpu.get_regs().expect("failed to get regs");
1316     }
1317 
1318     #[test]
set_regs()1319     fn set_regs() {
1320         if !Whpx::is_enabled() {
1321             return;
1322         }
1323         let cpu_count = 1;
1324         let mem =
1325             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1326         let vm = new_vm(cpu_count, mem);
1327         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1328 
1329         let mut regs = vcpu.get_regs().expect("failed to get regs");
1330         let new_val = regs.rax + 2;
1331         regs.rax = new_val;
1332 
1333         vcpu.set_regs(&regs).expect("failed to set regs");
1334         let new_regs = vcpu.get_regs().expect("failed to get regs");
1335         assert_eq!(new_regs.rax, new_val);
1336     }
1337 
1338     #[test]
debugregs()1339     fn debugregs() {
1340         if !Whpx::is_enabled() {
1341             return;
1342         }
1343         let cpu_count = 1;
1344         let mem =
1345             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1346         let vm = new_vm(cpu_count, mem);
1347         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1348 
1349         let mut dregs = vcpu.get_debugregs().unwrap();
1350         dregs.dr7 += 13;
1351         vcpu.set_debugregs(&dregs).unwrap();
1352         let dregs2 = vcpu.get_debugregs().unwrap();
1353         assert_eq!(dregs.dr7, dregs2.dr7);
1354     }
1355 
1356     #[test]
sregs()1357     fn sregs() {
1358         if !Whpx::is_enabled() {
1359             return;
1360         }
1361         let cpu_count = 1;
1362         let mem =
1363             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1364         let vm = new_vm(cpu_count, mem);
1365         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1366 
1367         let mut sregs = vcpu.get_sregs().unwrap();
1368         sregs.cs.base += 7;
1369         vcpu.set_sregs(&sregs).unwrap();
1370         let sregs2 = vcpu.get_sregs().unwrap();
1371         assert_eq!(sregs.cs.base, sregs2.cs.base);
1372     }
1373 
1374     #[test]
fpu()1375     fn fpu() {
1376         if !Whpx::is_enabled() {
1377             return;
1378         }
1379         let cpu_count = 1;
1380         let mem =
1381             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1382         let vm = new_vm(cpu_count, mem);
1383         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1384 
1385         let mut fpu = vcpu.get_fpu().unwrap();
1386         fpu.fpr[0].significand += 3;
1387         vcpu.set_fpu(&fpu).unwrap();
1388         let fpu2 = vcpu.get_fpu().unwrap();
1389         assert_eq!(fpu.fpr, fpu2.fpr);
1390     }
1391 
1392     #[test]
xcrs()1393     fn xcrs() {
1394         if !Whpx::is_enabled() {
1395             return;
1396         }
1397         let whpx = Whpx::new().expect("failed to instantiate whpx");
1398         let cpu_count = 1;
1399         let mem =
1400             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1401         let vm = new_vm(cpu_count, mem);
1402         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1403         // check xsave support
1404         if !whpx.check_capability(HypervisorCap::Xcrs) {
1405             return;
1406         }
1407 
1408         vcpu.set_xcr(0, 1).unwrap();
1409         let xcrs = vcpu.get_xcrs().unwrap();
1410         let xcr0 = xcrs.get(&0).unwrap();
1411         assert_eq!(*xcr0, 1);
1412     }
1413 
1414     #[test]
set_msr()1415     fn set_msr() {
1416         if !Whpx::is_enabled() {
1417             return;
1418         }
1419         let cpu_count = 1;
1420         let mem =
1421             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1422         let vm = new_vm(cpu_count, mem);
1423         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1424 
1425         vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1426 
1427         let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1428         assert_eq!(gs_base, 42);
1429     }
1430 
1431     #[test]
get_msr()1432     fn get_msr() {
1433         if !Whpx::is_enabled() {
1434             return;
1435         }
1436         let cpu_count = 1;
1437         let mem =
1438             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1439         let vm = new_vm(cpu_count, mem);
1440         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1441 
1442         // This one should succeed
1443         let _value = vcpu.get_msr(MSR_TSC).unwrap();
1444 
1445         // This one will fail to fetch
1446         vcpu.get_msr(MSR_TSC + 1)
1447             .expect_err("invalid MSR index should fail");
1448     }
1449 
1450     #[test]
set_efer()1451     fn set_efer() {
1452         if !Whpx::is_enabled() {
1453             return;
1454         }
1455         // EFER Bits
1456         const EFER_SCE: u64 = 0x00000001;
1457         const EFER_LME: u64 = 0x00000100;
1458         const EFER_LMA: u64 = 0x00000400;
1459         const X86_CR0_PE: u64 = 0x1;
1460         const X86_CR0_PG: u64 = 0x80000000;
1461         const X86_CR4_PAE: u64 = 0x20;
1462 
1463         let cpu_count = 1;
1464         let mem =
1465             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1466         let vm = new_vm(cpu_count, mem);
1467         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1468 
1469         let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1470         // Initial value should be 0
1471         assert_eq!(sregs.efer, 0);
1472 
1473         // Enable and activate long mode
1474         sregs.cr0 |= X86_CR0_PE; // enable protected mode
1475         sregs.cr0 |= X86_CR0_PG; // enable paging
1476         sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1477         sregs.efer = EFER_LMA | EFER_LME;
1478         vcpu.set_sregs(&sregs).expect("failed to set sregs");
1479 
1480         // Verify that setting stuck
1481         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1482         assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1483         assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1484         assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1485         assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1486 
1487         let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1488         assert_eq!(efer, EFER_LMA | EFER_LME);
1489 
1490         // Enable SCE via set_msrs
1491         vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1492             .expect("failed to set msr");
1493 
1494         // Verify that setting stuck
1495         let sregs = vcpu.get_sregs().expect("failed to get sregs");
1496         assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1497         let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1498         assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1499     }
1500 
1501     #[test]
get_and_set_xsave_smoke()1502     fn get_and_set_xsave_smoke() {
1503         if !Whpx::is_enabled() {
1504             return;
1505         }
1506         let cpu_count = 1;
1507         let mem =
1508             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1509         let vm = new_vm(cpu_count, mem);
1510         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1511 
1512         // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1513         // succeed.
1514         let xsave = vcpu.get_xsave().unwrap();
1515         vcpu.set_xsave(&xsave).unwrap();
1516     }
1517 
1518     #[test]
get_and_set_interrupt_state_smoke()1519     fn get_and_set_interrupt_state_smoke() {
1520         if !Whpx::is_enabled() {
1521             return;
1522         }
1523         let cpu_count = 1;
1524         let mem =
1525             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1526         let vm = new_vm(cpu_count, mem);
1527         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1528 
1529         // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1530         // sure our syscalls succeed.
1531         let interrupt_state = vcpu.get_interrupt_state().unwrap();
1532         vcpu.set_interrupt_state(interrupt_state).unwrap();
1533     }
1534 
1535     #[test]
get_all_msrs()1536     fn get_all_msrs() {
1537         if !Whpx::is_enabled() {
1538             return;
1539         }
1540         let cpu_count = 1;
1541         let mem =
1542             GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1543         let vm = new_vm(cpu_count, mem);
1544         let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1545 
1546         let all_msrs = vcpu.get_all_msrs().unwrap();
1547 
1548         // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1549         // by asserting that we know the MSR fetch actually did get us data.
1550         let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1551         assert_ne!(*apic_base, 0);
1552     }
1553 }
1554