1 // Copyright 2022 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use core::ffi::c_void;
6 use std::arch::x86_64::CpuidResult;
7 use std::collections::BTreeMap;
8 use std::convert::TryInto;
9 use std::mem::size_of;
10 use std::mem::size_of_val;
11 use std::sync::Arc;
12
13 use base::Error;
14 use base::Result;
15 use libc::EINVAL;
16 use libc::EIO;
17 use libc::ENOENT;
18 use libc::ENXIO;
19 use vm_memory::GuestAddress;
20 use winapi::shared::winerror::E_UNEXPECTED;
21 use windows::Win32::Foundation::WHV_E_INSUFFICIENT_BUFFER;
22
23 use super::types::*;
24 use super::*;
25 use crate::CpuId;
26 use crate::CpuIdEntry;
27 use crate::DebugRegs;
28 use crate::Fpu;
29 use crate::IoOperation;
30 use crate::IoParams;
31 use crate::Regs;
32 use crate::Sregs;
33 use crate::Vcpu;
34 use crate::VcpuExit;
35 use crate::VcpuX86_64;
36 use crate::Xsave;
37
38 const WHPX_EXIT_DIRECTION_MMIO_READ: u8 = 0;
39 const WHPX_EXIT_DIRECTION_MMIO_WRITE: u8 = 1;
40 const WHPX_EXIT_DIRECTION_PIO_IN: u8 = 0;
41 const WHPX_EXIT_DIRECTION_PIO_OUT: u8 = 1;
42
43 /// This is the whpx instruction emulator, useful for deconstructing
44 /// io & memory port instructions. Whpx does not do this automatically.
45 struct SafeInstructionEmulator {
46 handle: WHV_EMULATOR_HANDLE,
47 }
48
49 impl SafeInstructionEmulator {
new() -> Result<SafeInstructionEmulator>50 fn new() -> Result<SafeInstructionEmulator> {
51 const EMULATOR_CALLBACKS: WHV_EMULATOR_CALLBACKS = WHV_EMULATOR_CALLBACKS {
52 Size: size_of::<WHV_EMULATOR_CALLBACKS>() as u32,
53 Reserved: 0,
54 WHvEmulatorIoPortCallback: Some(SafeInstructionEmulator::io_port_cb),
55 WHvEmulatorMemoryCallback: Some(SafeInstructionEmulator::memory_cb),
56 WHvEmulatorGetVirtualProcessorRegisters: Some(
57 SafeInstructionEmulator::get_virtual_processor_registers_cb,
58 ),
59 WHvEmulatorSetVirtualProcessorRegisters: Some(
60 SafeInstructionEmulator::set_virtual_processor_registers_cb,
61 ),
62 WHvEmulatorTranslateGvaPage: Some(SafeInstructionEmulator::translate_gva_page_cb),
63 };
64 let mut handle: WHV_EMULATOR_HANDLE = std::ptr::null_mut();
65 // safe because pass in valid callbacks and a emulator handle for the kernel to place the
66 // allocated handle into.
67 check_whpx!(unsafe { WHvEmulatorCreateEmulator(&EMULATOR_CALLBACKS, &mut handle) })?;
68
69 Ok(SafeInstructionEmulator { handle })
70 }
71 }
72
73 trait InstructionEmulatorCallbacks {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT74 extern "stdcall" fn io_port_cb(
75 context: *mut ::std::os::raw::c_void,
76 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
77 ) -> HRESULT;
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT78 extern "stdcall" fn memory_cb(
79 context: *mut ::std::os::raw::c_void,
80 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
81 ) -> HRESULT;
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT82 extern "stdcall" fn get_virtual_processor_registers_cb(
83 context: *mut ::std::os::raw::c_void,
84 register_names: *const WHV_REGISTER_NAME,
85 register_count: UINT32,
86 register_values: *mut WHV_REGISTER_VALUE,
87 ) -> HRESULT;
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT88 extern "stdcall" fn set_virtual_processor_registers_cb(
89 context: *mut ::std::os::raw::c_void,
90 register_names: *const WHV_REGISTER_NAME,
91 register_count: UINT32,
92 register_values: *const WHV_REGISTER_VALUE,
93 ) -> HRESULT;
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT94 extern "stdcall" fn translate_gva_page_cb(
95 context: *mut ::std::os::raw::c_void,
96 gva: WHV_GUEST_VIRTUAL_ADDRESS,
97 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
98 translation_result: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
99 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
100 ) -> HRESULT;
101 }
102
103 /// Context passed into the instruction emulator when trying io or mmio emulation.
104 /// Since we need this for set/get registers and memory translation,
105 /// a single context is used that captures all necessary contextual information for the operation.
106 struct InstructionEmulatorContext<'a> {
107 vm_partition: Arc<SafePartition>,
108 index: u32,
109 handle_mmio: Option<&'a mut dyn FnMut(IoParams) -> Result<()>>,
110 handle_io: Option<&'a mut dyn FnMut(IoParams)>,
111 }
112
113 impl InstructionEmulatorCallbacks for SafeInstructionEmulator {
io_port_cb( context: *mut ::std::os::raw::c_void, io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO, ) -> HRESULT114 extern "stdcall" fn io_port_cb(
115 context: *mut ::std::os::raw::c_void,
116 io_access: *mut WHV_EMULATOR_IO_ACCESS_INFO,
117 ) -> HRESULT {
118 // unsafe because windows could decide to call this at any time.
119 // However, we trust the kernel to call this while the vm/vcpu is valid.
120 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
121 let Some(handle_io) = &mut ctx.handle_io else {
122 return E_UNEXPECTED;
123 };
124
125 // safe because we trust the kernel to fill in the io_access
126 let io_access_info = unsafe { &mut *io_access };
127 let address = io_access_info.Port.into();
128 let size = io_access_info.AccessSize as usize;
129 // SAFETY: We trust the kernel to fill in the io_access
130 let data: &mut [u8] = unsafe {
131 assert!(size <= size_of_val(&io_access_info.Data));
132 std::slice::from_raw_parts_mut(&mut io_access_info.Data as *mut u32 as *mut u8, size)
133 };
134 match io_access_info.Direction {
135 WHPX_EXIT_DIRECTION_PIO_IN => {
136 handle_io(IoParams {
137 address,
138 operation: IoOperation::Read(data),
139 });
140 S_OK
141 }
142 WHPX_EXIT_DIRECTION_PIO_OUT => {
143 handle_io(IoParams {
144 address,
145 operation: IoOperation::Write(data),
146 });
147 S_OK
148 }
149 _ => E_UNEXPECTED,
150 }
151 }
memory_cb( context: *mut ::std::os::raw::c_void, memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO, ) -> HRESULT152 extern "stdcall" fn memory_cb(
153 context: *mut ::std::os::raw::c_void,
154 memory_access: *mut WHV_EMULATOR_MEMORY_ACCESS_INFO,
155 ) -> HRESULT {
156 // unsafe because windows could decide to call this at any time.
157 // However, we trust the kernel to call this while the vm/vcpu is valid.
158 let ctx = unsafe { &mut *(context as *mut InstructionEmulatorContext) };
159 let Some(handle_mmio) = &mut ctx.handle_mmio else {
160 return E_UNEXPECTED;
161 };
162
163 // safe because we trust the kernel to fill in the memory_access
164 let memory_access_info = unsafe { &mut *memory_access };
165 let address = memory_access_info.GpaAddress;
166 let size = memory_access_info.AccessSize as usize;
167 let data = &mut memory_access_info.Data[..size];
168
169 match memory_access_info.Direction {
170 WHPX_EXIT_DIRECTION_MMIO_READ => {
171 if let Err(e) = handle_mmio(IoParams {
172 address,
173 operation: IoOperation::Read(data),
174 }) {
175 error!("handle_mmio failed with {e}");
176 E_UNEXPECTED
177 } else {
178 S_OK
179 }
180 }
181 WHPX_EXIT_DIRECTION_MMIO_WRITE => {
182 if let Err(e) = handle_mmio(IoParams {
183 address,
184 operation: IoOperation::Write(data),
185 }) {
186 error!("handle_mmio write with {e}");
187 E_UNEXPECTED
188 } else {
189 S_OK
190 }
191 }
192 _ => E_UNEXPECTED,
193 }
194 }
get_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *mut WHV_REGISTER_VALUE, ) -> HRESULT195 extern "stdcall" fn get_virtual_processor_registers_cb(
196 context: *mut ::std::os::raw::c_void,
197 register_names: *const WHV_REGISTER_NAME,
198 register_count: UINT32,
199 register_values: *mut WHV_REGISTER_VALUE,
200 ) -> HRESULT {
201 // unsafe because windows could decide to call this at any time.
202 // However, we trust the kernel to call this while the vm/vcpu is valid.
203 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
204 // safe because the ctx has a weak reference to the vm partition, which should be
205 // alive longer than the ctx
206 unsafe {
207 WHvGetVirtualProcessorRegisters(
208 ctx.vm_partition.partition,
209 ctx.index,
210 register_names,
211 register_count,
212 register_values,
213 )
214 }
215 }
set_virtual_processor_registers_cb( context: *mut ::std::os::raw::c_void, register_names: *const WHV_REGISTER_NAME, register_count: UINT32, register_values: *const WHV_REGISTER_VALUE, ) -> HRESULT216 extern "stdcall" fn set_virtual_processor_registers_cb(
217 context: *mut ::std::os::raw::c_void,
218 register_names: *const WHV_REGISTER_NAME,
219 register_count: UINT32,
220 register_values: *const WHV_REGISTER_VALUE,
221 ) -> HRESULT {
222 // unsafe because windows could decide to call this at any time.
223 // However, we trust the kernel to call this while the vm/vcpu is valid.
224 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
225 // safe because the ctx has a weak reference to the vm partition, which should be
226 // alive longer than the ctx
227 unsafe {
228 WHvSetVirtualProcessorRegisters(
229 ctx.vm_partition.partition,
230 ctx.index,
231 register_names,
232 register_count,
233 register_values,
234 )
235 }
236 }
translate_gva_page_cb( context: *mut ::std::os::raw::c_void, gva: WHV_GUEST_VIRTUAL_ADDRESS, translate_flags: WHV_TRANSLATE_GVA_FLAGS, translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE, gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS, ) -> HRESULT237 extern "stdcall" fn translate_gva_page_cb(
238 context: *mut ::std::os::raw::c_void,
239 gva: WHV_GUEST_VIRTUAL_ADDRESS,
240 translate_flags: WHV_TRANSLATE_GVA_FLAGS,
241 translation_result_code: *mut WHV_TRANSLATE_GVA_RESULT_CODE,
242 gpa: *mut WHV_GUEST_PHYSICAL_ADDRESS,
243 ) -> HRESULT {
244 // unsafe because windows could decide to call this at any time.
245 // However, we trust the kernel to call this while the vm/vcpu is valid.
246 let ctx = unsafe { &*(context as *const InstructionEmulatorContext) };
247 let mut translation_result: WHV_TRANSLATE_GVA_RESULT = Default::default();
248 // safe because the ctx has a weak reference to the vm partition, which should be
249 // alive longer than the ctx
250 let ret = unsafe {
251 WHvTranslateGva(
252 ctx.vm_partition.partition,
253 ctx.index,
254 gva,
255 translate_flags,
256 &mut translation_result,
257 gpa,
258 )
259 };
260 if ret == S_OK {
261 // safe assuming the kernel passed in a valid result_code ptr
262 unsafe {
263 *translation_result_code = translation_result.ResultCode;
264 }
265 }
266 ret
267 }
268 }
269
270 impl Drop for SafeInstructionEmulator {
drop(&mut self)271 fn drop(&mut self) {
272 // safe because we own the instruction emulator
273 check_whpx!(unsafe { WHvEmulatorDestroyEmulator(self.handle) }).unwrap();
274 }
275 }
276
277 // we can send and share the instruction emulator over threads safely even though it is void*.
278 unsafe impl Send for SafeInstructionEmulator {}
279 unsafe impl Sync for SafeInstructionEmulator {}
280
281 struct SafeVirtualProcessor {
282 vm_partition: Arc<SafePartition>,
283 index: u32,
284 }
285
286 impl SafeVirtualProcessor {
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor>287 fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<SafeVirtualProcessor> {
288 // safe since the vm partition should be valid.
289 check_whpx!(unsafe { WHvCreateVirtualProcessor(vm_partition.partition, index, 0) })?;
290 Ok(SafeVirtualProcessor {
291 vm_partition,
292 index,
293 })
294 }
295 }
296
297 impl Drop for SafeVirtualProcessor {
drop(&mut self)298 fn drop(&mut self) {
299 // safe because we are the owner of this windows virtual processor.
300 check_whpx!(unsafe { WHvDeleteVirtualProcessor(self.vm_partition.partition, self.index,) })
301 .unwrap();
302 }
303 }
304
305 pub struct WhpxVcpu {
306 index: u32,
307 safe_virtual_processor: Arc<SafeVirtualProcessor>,
308 vm_partition: Arc<SafePartition>,
309 last_exit_context: Arc<WHV_RUN_VP_EXIT_CONTEXT>,
310 // must be arc, since we cannot "dupe" an instruction emulator similar to a handle.
311 instruction_emulator: Arc<SafeInstructionEmulator>,
312 tsc_frequency: Option<u64>,
313 apic_frequency: Option<u32>,
314 }
315
316 impl WhpxVcpu {
317 /// The SafePartition passed in is weak, so that there is no circular references.
318 /// However, the SafePartition should be valid as long as this VCPU is alive. The index
319 /// is the index for this vcpu.
new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu>320 pub(super) fn new(vm_partition: Arc<SafePartition>, index: u32) -> Result<WhpxVcpu> {
321 let safe_virtual_processor = SafeVirtualProcessor::new(vm_partition.clone(), index)?;
322 let instruction_emulator = SafeInstructionEmulator::new()?;
323 Ok(WhpxVcpu {
324 index,
325 safe_virtual_processor: Arc::new(safe_virtual_processor),
326 vm_partition,
327 last_exit_context: Arc::new(Default::default()),
328 instruction_emulator: Arc::new(instruction_emulator),
329 tsc_frequency: None,
330 apic_frequency: None,
331 })
332 }
333
set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32)334 pub fn set_frequencies(&mut self, tsc_frequency: Option<u64>, lapic_frequency: u32) {
335 self.tsc_frequency = tsc_frequency;
336 self.apic_frequency = Some(lapic_frequency);
337 }
338
339 /// Handle reading the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_read(&mut self, id: u32) -> Result<()>340 fn handle_msr_read(&mut self, id: u32) -> Result<()> {
341 // Verify that we're only being called in a situation where the last exit reason was
342 // ExitReasonX64MsrAccess
343 if self.last_exit_context.ExitReason
344 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
345 {
346 return Err(Error::new(EINVAL));
347 }
348
349 let value = match id {
350 HV_X64_MSR_TSC_FREQUENCY => Some(self.tsc_frequency.unwrap_or(0)),
351 HV_X64_MSR_APIC_FREQUENCY => Some(self.apic_frequency.unwrap_or(0) as u64),
352 _ => None,
353 };
354
355 if let Some(value) = value {
356 // Get the next rip from the exit context
357 let rip = self.last_exit_context.VpContext.Rip
358 + self.last_exit_context.VpContext.InstructionLength() as u64;
359
360 const REG_NAMES: [WHV_REGISTER_NAME; 3] = [
361 WHV_REGISTER_NAME_WHvX64RegisterRip,
362 WHV_REGISTER_NAME_WHvX64RegisterRax,
363 WHV_REGISTER_NAME_WHvX64RegisterRdx,
364 ];
365
366 let values = vec![
367 WHV_REGISTER_VALUE { Reg64: rip },
368 // RDMSR instruction puts lower 32 bits in EAX and upper 32 bits in EDX
369 WHV_REGISTER_VALUE {
370 Reg64: (value & 0xffffffff),
371 },
372 WHV_REGISTER_VALUE {
373 Reg64: (value >> 32),
374 },
375 ];
376
377 // safe because we have enough space for all the registers
378 check_whpx!(unsafe {
379 WHvSetVirtualProcessorRegisters(
380 self.vm_partition.partition,
381 self.index,
382 ®_NAMES as *const WHV_REGISTER_NAME,
383 REG_NAMES.len() as u32,
384 values.as_ptr() as *const WHV_REGISTER_VALUE,
385 )
386 })
387 } else {
388 self.inject_gp_fault()
389 }
390 }
391
392 /// Handle writing the MSR with id `id`. If MSR `id` is not supported, inject a GP fault.
handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()>393 fn handle_msr_write(&mut self, id: u32, _value: u64) -> Result<()> {
394 // Verify that we're only being called in a situation where the last exit reason was
395 // ExitReasonX64MsrAccess
396 if self.last_exit_context.ExitReason
397 != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess
398 {
399 return Err(Error::new(EINVAL));
400 }
401
402 // Do nothing, we assume TSC is always invariant
403 let success = matches!(id, HV_X64_MSR_TSC_INVARIANT_CONTROL);
404
405 if !success {
406 return self.inject_gp_fault();
407 }
408
409 // Get the next rip from the exit context
410 let rip = self.last_exit_context.VpContext.Rip
411 + self.last_exit_context.VpContext.InstructionLength() as u64;
412
413 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvX64RegisterRip];
414
415 let values = vec![WHV_REGISTER_VALUE { Reg64: rip }];
416
417 // safe because we have enough space for all the registers
418 check_whpx!(unsafe {
419 WHvSetVirtualProcessorRegisters(
420 self.vm_partition.partition,
421 self.index,
422 ®_NAMES as *const WHV_REGISTER_NAME,
423 REG_NAMES.len() as u32,
424 values.as_ptr() as *const WHV_REGISTER_VALUE,
425 )
426 })
427 }
428
inject_gp_fault(&self) -> Result<()>429 fn inject_gp_fault(&self) -> Result<()> {
430 const REG_NAMES: [WHV_REGISTER_NAME; 1] = [WHV_REGISTER_NAME_WHvRegisterPendingEvent];
431
432 let mut event = WHV_REGISTER_VALUE {
433 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
434 __bindgen_anon_1: Default::default(),
435 },
436 };
437 // safe because we have enough space for all the registers
438 check_whpx!(unsafe {
439 WHvGetVirtualProcessorRegisters(
440 self.vm_partition.partition,
441 self.index,
442 ®_NAMES as *const WHV_REGISTER_NAME,
443 REG_NAMES.len() as u32,
444 &mut event as *mut WHV_REGISTER_VALUE,
445 )
446 })?;
447
448 if unsafe { event.ExceptionEvent.__bindgen_anon_1.EventPending() } != 0 {
449 error!("Unable to inject gp fault because pending exception exists");
450 return Err(Error::new(EINVAL));
451 }
452
453 let mut pending_exception = unsafe { event.ExceptionEvent.__bindgen_anon_1 };
454
455 pending_exception.set_EventPending(1);
456 // GP faults set error code
457 pending_exception.set_DeliverErrorCode(1);
458 // GP fault error code is 0 unless the fault is segment related
459 pending_exception.ErrorCode = 0;
460 // This must be set to WHvX64PendingEventException
461 pending_exception
462 .set_EventType(WHV_X64_PENDING_EVENT_TYPE_WHvX64PendingEventException as u32);
463 // GP fault vector is 13
464 const GP_VECTOR: u32 = 13;
465 pending_exception.set_Vector(GP_VECTOR);
466
467 let event = WHV_REGISTER_VALUE {
468 ExceptionEvent: WHV_X64_PENDING_EXCEPTION_EVENT {
469 __bindgen_anon_1: pending_exception,
470 },
471 };
472
473 // safe because we have enough space for all the registers
474 check_whpx!(unsafe {
475 WHvSetVirtualProcessorRegisters(
476 self.vm_partition.partition,
477 self.index,
478 ®_NAMES as *const WHV_REGISTER_NAME,
479 REG_NAMES.len() as u32,
480 &event as *const WHV_REGISTER_VALUE,
481 )
482 })
483 }
484 }
485
486 impl Vcpu for WhpxVcpu {
487 /// Makes a shallow clone of this `Vcpu`.
try_clone(&self) -> Result<Self>488 fn try_clone(&self) -> Result<Self> {
489 Ok(WhpxVcpu {
490 index: self.index,
491 safe_virtual_processor: self.safe_virtual_processor.clone(),
492 vm_partition: self.vm_partition.clone(),
493 last_exit_context: self.last_exit_context.clone(),
494 instruction_emulator: self.instruction_emulator.clone(),
495 tsc_frequency: self.tsc_frequency,
496 apic_frequency: self.apic_frequency,
497 })
498 }
499
as_vcpu(&self) -> &dyn Vcpu500 fn as_vcpu(&self) -> &dyn Vcpu {
501 self
502 }
503
504 /// Returns the vcpu id.
id(&self) -> usize505 fn id(&self) -> usize {
506 self.index.try_into().unwrap()
507 }
508
509 /// Exits the vcpu immediately if exit is true
set_immediate_exit(&self, exit: bool)510 fn set_immediate_exit(&self, exit: bool) {
511 if exit {
512 // safe because we own this whpx virtual processor index, and assume the vm partition is
513 // still valid
514 unsafe {
515 WHvCancelRunVirtualProcessor(self.vm_partition.partition, self.index, 0);
516 }
517 }
518 }
519
520 /// Signals to the hypervisor that this guest is being paused by userspace. On some hypervisors,
521 /// this is used to control the pvclock. On WHPX, we handle it separately with virtio-pvclock.
522 /// So the correct implementation here is to do nothing.
on_suspend(&self) -> Result<()>523 fn on_suspend(&self) -> Result<()> {
524 Ok(())
525 }
526
527 /// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the
528 /// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any.
enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()>529 unsafe fn enable_raw_capability(&self, _cap: u32, _args: &[u64; 4]) -> Result<()> {
530 // Whpx does not support raw capability on the vcpu.
531 Err(Error::new(ENXIO))
532 }
533
534 /// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`.
535 ///
536 /// Once called, it will determine whether a mmio read or mmio write was the reason for the mmio
537 /// exit, call `handle_fn` with the respective IoOperation to perform the mmio read or
538 /// write, and set the return data in the vcpu so that the vcpu can resume running.
handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()>539 fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Result<()>) -> Result<()> {
540 let mut status: WHV_EMULATOR_STATUS = Default::default();
541 let mut ctx = InstructionEmulatorContext {
542 vm_partition: self.vm_partition.clone(),
543 index: self.index,
544 handle_mmio: Some(handle_fn),
545 handle_io: None,
546 };
547 // safe as long as all callbacks occur before this fn returns.
548 check_whpx!(unsafe {
549 WHvEmulatorTryMmioEmulation(
550 self.instruction_emulator.handle,
551 &mut ctx as *mut _ as *mut c_void,
552 &self.last_exit_context.VpContext,
553 &self.last_exit_context.__bindgen_anon_1.MemoryAccess,
554 &mut status,
555 )
556 })?;
557 // safe because we trust the kernel to fill in the union field properly.
558 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
559 if success {
560 Ok(())
561 } else {
562 self.inject_gp_fault()?;
563 // safe because we trust the kernel to fill in the union field properly.
564 Err(Error::new(unsafe { status.AsUINT32 }))
565 }
566 }
567
568 /// This function should be called after `Vcpu::run` returns `VcpuExit::Io`.
569 ///
570 /// Once called, it will determine whether an io in or io out was the reason for the io exit,
571 /// call `handle_fn` with the respective IoOperation to perform the io in or io out,
572 /// and set the return data in the vcpu so that the vcpu can resume running.
handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()>573 fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams)) -> Result<()> {
574 let mut status: WHV_EMULATOR_STATUS = Default::default();
575 let mut ctx = InstructionEmulatorContext {
576 vm_partition: self.vm_partition.clone(),
577 index: self.index,
578 handle_mmio: None,
579 handle_io: Some(handle_fn),
580 };
581 // safe as long as all callbacks occur before this fn returns.
582 check_whpx!(unsafe {
583 WHvEmulatorTryIoEmulation(
584 self.instruction_emulator.handle,
585 &mut ctx as *mut _ as *mut c_void,
586 &self.last_exit_context.VpContext,
587 &self.last_exit_context.__bindgen_anon_1.IoPortAccess,
588 &mut status,
589 )
590 })?; // safe because we trust the kernel to fill in the union field properly.
591 let success = unsafe { status.__bindgen_anon_1.EmulationSuccessful() > 0 };
592 if success {
593 Ok(())
594 } else {
595 // safe because we trust the kernel to fill in the union field properly.
596 Err(Error::new(unsafe { status.AsUINT32 }))
597 }
598 }
599
600 #[allow(non_upper_case_globals)]
run(&mut self) -> Result<VcpuExit>601 fn run(&mut self) -> Result<VcpuExit> {
602 // safe because we own this whpx virtual processor index, and assume the vm partition is
603 // still valid
604 let exit_context_ptr = Arc::as_ptr(&self.last_exit_context);
605 check_whpx!(unsafe {
606 WHvRunVirtualProcessor(
607 self.vm_partition.partition,
608 self.index,
609 exit_context_ptr as *mut WHV_RUN_VP_EXIT_CONTEXT as *mut c_void,
610 size_of::<WHV_RUN_VP_EXIT_CONTEXT>() as u32,
611 )
612 })?;
613
614 match self.last_exit_context.ExitReason {
615 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonMemoryAccess => Ok(VcpuExit::Mmio),
616 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64IoPortAccess => Ok(VcpuExit::Io),
617 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnrecoverableException => {
618 Ok(VcpuExit::UnrecoverableException)
619 }
620 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonInvalidVpRegisterValue => {
621 Ok(VcpuExit::InvalidVpRegister)
622 }
623 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonUnsupportedFeature => {
624 Ok(VcpuExit::UnsupportedFeature)
625 }
626 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64InterruptWindow => {
627 Ok(VcpuExit::IrqWindowOpen)
628 }
629 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Halt => Ok(VcpuExit::Hlt),
630 // additional exits that are configurable
631 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicEoi => {
632 // safe because we trust the kernel to fill in the union field properly.
633 let vector = unsafe {
634 self.last_exit_context
635 .__bindgen_anon_1
636 .ApicEoi
637 .InterruptVector as u8
638 };
639 Ok(VcpuExit::IoapicEoi { vector })
640 }
641 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64MsrAccess => {
642 // Safe because we know this was an MSR access exit.
643 let id = unsafe { self.last_exit_context.__bindgen_anon_1.MsrAccess.MsrNumber };
644
645 // Safe because we know this was an MSR access exit
646 let is_write = unsafe {
647 self.last_exit_context
648 .__bindgen_anon_1
649 .MsrAccess
650 .AccessInfo
651 .__bindgen_anon_1
652 .IsWrite()
653 == 1
654 };
655 if is_write {
656 // Safe because we know this was an MSR access exit
657 let value = unsafe {
658 // WRMSR writes the contents of registers EDX:EAX into the 64-bit model
659 // specific register
660 (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rdx << 32)
661 | (self.last_exit_context.__bindgen_anon_1.MsrAccess.Rax & 0xffffffff)
662 };
663 self.handle_msr_write(id, value)?;
664 } else {
665 self.handle_msr_read(id)?;
666 }
667 Ok(VcpuExit::MsrAccess)
668 }
669 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid => {
670 // Safe because we know this was a CPUID exit.
671 let entry = unsafe {
672 CpuIdEntry {
673 function: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rax as u32,
674 index: self.last_exit_context.__bindgen_anon_1.CpuidAccess.Rcx as u32,
675 flags: 0,
676 cpuid: CpuidResult {
677 eax: self
678 .last_exit_context
679 .__bindgen_anon_1
680 .CpuidAccess
681 .DefaultResultRax as u32,
682 ebx: self
683 .last_exit_context
684 .__bindgen_anon_1
685 .CpuidAccess
686 .DefaultResultRbx as u32,
687 ecx: self
688 .last_exit_context
689 .__bindgen_anon_1
690 .CpuidAccess
691 .DefaultResultRcx as u32,
692 edx: self
693 .last_exit_context
694 .__bindgen_anon_1
695 .CpuidAccess
696 .DefaultResultRdx as u32,
697 },
698 }
699 };
700 Ok(VcpuExit::Cpuid { entry })
701 }
702 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonException => Ok(VcpuExit::Exception),
703 // undocumented exit calls from the header file, WinHvPlatformDefs.h.
704 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Rdtsc => Ok(VcpuExit::RdTsc),
705 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicSmiTrap => Ok(VcpuExit::ApicSmiTrap),
706 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonHypercall => Ok(VcpuExit::Hypercall),
707 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64ApicInitSipiTrap => {
708 Ok(VcpuExit::ApicInitSipiTrap)
709 }
710 // exit caused by host cancellation thorugh WHvCancelRunVirtualProcessor,
711 WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonCanceled => Ok(VcpuExit::Canceled),
712 r => panic!("unknown exit reason: {}", r),
713 }
714 }
715 }
716
717 impl VcpuX86_64 for WhpxVcpu {
718 /// Sets or clears the flag that requests the VCPU to exit when it becomes possible to inject
719 /// interrupts into the guest.
set_interrupt_window_requested(&self, requested: bool)720 fn set_interrupt_window_requested(&self, requested: bool) {
721 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
722 [WHV_REGISTER_NAME_WHvX64RegisterDeliverabilityNotifications];
723 let mut notifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER__bindgen_ty_1 =
724 Default::default();
725 notifications.set_InterruptNotification(if requested { 1 } else { 0 });
726 let notify_register = WHV_REGISTER_VALUE {
727 DeliverabilityNotifications: WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER {
728 __bindgen_anon_1: notifications,
729 },
730 };
731 // safe because we have enough space for all the registers
732 check_whpx!(unsafe {
733 WHvSetVirtualProcessorRegisters(
734 self.vm_partition.partition,
735 self.index,
736 ®_NAMES as *const WHV_REGISTER_NAME,
737 REG_NAMES.len() as u32,
738 ¬ify_register as *const WHV_REGISTER_VALUE,
739 )
740 })
741 .unwrap();
742 }
743
744 /// Checks if we can inject an interrupt into the VCPU.
ready_for_interrupt(&self) -> bool745 fn ready_for_interrupt(&self) -> bool {
746 // safe because InterruptionPending bit is always valid in ExecutionState struct
747 let pending = unsafe {
748 self.last_exit_context
749 .VpContext
750 .ExecutionState
751 .__bindgen_anon_1
752 .InterruptionPending()
753 };
754 // safe because InterruptShadow bit is always valid in ExecutionState struct
755 let shadow = unsafe {
756 self.last_exit_context
757 .VpContext
758 .ExecutionState
759 .__bindgen_anon_1
760 .InterruptShadow()
761 };
762
763 let eflags = self.last_exit_context.VpContext.Rflags;
764 const IF_MASK: u64 = 0x00000200;
765
766 // can't inject an interrupt if InterruptShadow or InterruptPending bits are set, or if
767 // the IF flag is clear
768 shadow == 0 && pending == 0 && (eflags & IF_MASK) != 0
769 }
770
771 /// Injects interrupt vector `irq` into the VCPU.
interrupt(&self, irq: u8) -> Result<()>772 fn interrupt(&self, irq: u8) -> Result<()> {
773 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
774 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
775 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
776 Default::default();
777 pending_interrupt.set_InterruptionPending(1);
778 pending_interrupt
779 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingInterrupt as u32);
780 pending_interrupt.set_InterruptionVector(irq.into());
781 let interrupt = WHV_REGISTER_VALUE {
782 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
783 __bindgen_anon_1: pending_interrupt,
784 },
785 };
786 // safe because we have enough space for all the registers
787 check_whpx!(unsafe {
788 WHvSetVirtualProcessorRegisters(
789 self.vm_partition.partition,
790 self.index,
791 ®_NAMES as *const WHV_REGISTER_NAME,
792 REG_NAMES.len() as u32,
793 &interrupt as *const WHV_REGISTER_VALUE,
794 )
795 })
796 }
797
798 /// Injects a non-maskable interrupt into the VCPU.
inject_nmi(&self) -> Result<()>799 fn inject_nmi(&self) -> Result<()> {
800 const REG_NAMES: [WHV_REGISTER_NAME; 1] =
801 [WHV_REGISTER_NAME_WHvRegisterPendingInterruption];
802 let mut pending_interrupt: WHV_X64_PENDING_INTERRUPTION_REGISTER__bindgen_ty_1 =
803 Default::default();
804 pending_interrupt.set_InterruptionPending(1);
805 pending_interrupt
806 .set_InterruptionType(WHV_X64_PENDING_INTERRUPTION_TYPE_WHvX64PendingNmi as u32);
807 const NMI_VECTOR: u32 = 2; // 2 is the NMI vector.
808 pending_interrupt.set_InterruptionVector(NMI_VECTOR);
809 let interrupt = WHV_REGISTER_VALUE {
810 PendingInterruption: WHV_X64_PENDING_INTERRUPTION_REGISTER {
811 __bindgen_anon_1: pending_interrupt,
812 },
813 };
814 // safe because we have enough space for all the registers
815 check_whpx!(unsafe {
816 WHvSetVirtualProcessorRegisters(
817 self.vm_partition.partition,
818 self.index,
819 ®_NAMES as *const WHV_REGISTER_NAME,
820 REG_NAMES.len() as u32,
821 &interrupt as *const WHV_REGISTER_VALUE,
822 )
823 })
824 }
825
826 /// Gets the VCPU general purpose registers.
get_regs(&self) -> Result<Regs>827 fn get_regs(&self) -> Result<Regs> {
828 let mut whpx_regs: WhpxRegs = Default::default();
829 let reg_names = WhpxRegs::get_register_names();
830 // safe because we have enough space for all the registers
831 check_whpx!(unsafe {
832 WHvGetVirtualProcessorRegisters(
833 self.vm_partition.partition,
834 self.index,
835 reg_names as *const WHV_REGISTER_NAME,
836 reg_names.len() as u32,
837 whpx_regs.as_mut_ptr(),
838 )
839 })?;
840 Ok(Regs::from(&whpx_regs))
841 }
842
843 /// Sets the VCPU general purpose registers.
set_regs(&self, regs: &Regs) -> Result<()>844 fn set_regs(&self, regs: &Regs) -> Result<()> {
845 let whpx_regs = WhpxRegs::from(regs);
846 let reg_names = WhpxRegs::get_register_names();
847 // safe because we have enough space for all the registers
848 check_whpx!(unsafe {
849 WHvSetVirtualProcessorRegisters(
850 self.vm_partition.partition,
851 self.index,
852 reg_names as *const WHV_REGISTER_NAME,
853 reg_names.len() as u32,
854 whpx_regs.as_ptr(),
855 )
856 })
857 }
858
859 /// Gets the VCPU special registers.
get_sregs(&self) -> Result<Sregs>860 fn get_sregs(&self) -> Result<Sregs> {
861 let mut whpx_sregs: WhpxSregs = Default::default();
862 let reg_names = WhpxSregs::get_register_names();
863 // safe because we have enough space for all the registers
864 check_whpx!(unsafe {
865 WHvGetVirtualProcessorRegisters(
866 self.vm_partition.partition,
867 self.index,
868 reg_names as *const WHV_REGISTER_NAME,
869 reg_names.len() as u32,
870 whpx_sregs.as_mut_ptr(),
871 )
872 })?;
873 Ok(Sregs::from(&whpx_sregs))
874 }
875
876 /// Sets the VCPU special registers.
set_sregs(&self, sregs: &Sregs) -> Result<()>877 fn set_sregs(&self, sregs: &Sregs) -> Result<()> {
878 let whpx_sregs = WhpxSregs::from(sregs);
879 let reg_names = WhpxSregs::get_register_names();
880 // safe because we have enough space for all the registers
881 check_whpx!(unsafe {
882 WHvSetVirtualProcessorRegisters(
883 self.vm_partition.partition,
884 self.index,
885 reg_names as *const WHV_REGISTER_NAME,
886 reg_names.len() as u32,
887 whpx_sregs.as_ptr(),
888 )
889 })
890 }
891
892 /// Gets the VCPU FPU registers.
get_fpu(&self) -> Result<Fpu>893 fn get_fpu(&self) -> Result<Fpu> {
894 let mut whpx_fpu: WhpxFpu = Default::default();
895 let reg_names = WhpxFpu::get_register_names();
896 // safe because we have enough space for all the registers
897 check_whpx!(unsafe {
898 WHvGetVirtualProcessorRegisters(
899 self.vm_partition.partition,
900 self.index,
901 reg_names as *const WHV_REGISTER_NAME,
902 reg_names.len() as u32,
903 whpx_fpu.as_mut_ptr(),
904 )
905 })?;
906 Ok(Fpu::from(&whpx_fpu))
907 }
908
909 /// Sets the VCPU FPU registers.
set_fpu(&self, fpu: &Fpu) -> Result<()>910 fn set_fpu(&self, fpu: &Fpu) -> Result<()> {
911 let whpx_fpu = WhpxFpu::from(fpu);
912 let reg_names = WhpxFpu::get_register_names();
913 // safe because we have enough space for all the registers
914 check_whpx!(unsafe {
915 WHvSetVirtualProcessorRegisters(
916 self.vm_partition.partition,
917 self.index,
918 reg_names as *const WHV_REGISTER_NAME,
919 reg_names.len() as u32,
920 whpx_fpu.as_ptr(),
921 )
922 })
923 }
924
925 /// Gets the VCPU XSAVE.
get_xsave(&self) -> Result<Xsave>926 fn get_xsave(&self) -> Result<Xsave> {
927 let mut empty_buffer = [0u8; 1];
928 let mut needed_buf_size: u32 = 0;
929
930 // Find out how much space is needed for XSAVEs.
931 let res = unsafe {
932 WHvGetVirtualProcessorXsaveState(
933 self.vm_partition.partition,
934 self.index,
935 empty_buffer.as_mut_ptr() as *mut _,
936 0,
937 &mut needed_buf_size,
938 )
939 };
940 if res != WHV_E_INSUFFICIENT_BUFFER.0 {
941 // This should always work, so if it doesn't, we'll return unsupported.
942 error!("failed to get size of vcpu xsave");
943 return Err(Error::new(EIO));
944 }
945
946 let mut xsave = Xsave::new(needed_buf_size as usize);
947 // SAFETY: xsave_data is valid for the duration of the FFI call, and we pass its length in
948 // bytes so writes are bounded within the buffer.
949 check_whpx!(unsafe {
950 WHvGetVirtualProcessorXsaveState(
951 self.vm_partition.partition,
952 self.index,
953 xsave.as_mut_ptr(),
954 xsave.len() as u32,
955 &mut needed_buf_size,
956 )
957 })?;
958 Ok(xsave)
959 }
960
961 /// Sets the VCPU XSAVE.
set_xsave(&self, xsave: &Xsave) -> Result<()>962 fn set_xsave(&self, xsave: &Xsave) -> Result<()> {
963 // SAFETY: the xsave buffer is valid for the duration of the FFI call, and we pass its
964 // length in bytes so reads are bounded within the buffer.
965 check_whpx!(unsafe {
966 WHvSetVirtualProcessorXsaveState(
967 self.vm_partition.partition,
968 self.index,
969 xsave.as_ptr(),
970 xsave.len() as u32,
971 )
972 })
973 }
974
get_interrupt_state(&self) -> Result<serde_json::Value>975 fn get_interrupt_state(&self) -> Result<serde_json::Value> {
976 let mut whpx_interrupt_regs: WhpxInterruptRegs = Default::default();
977 let reg_names = WhpxInterruptRegs::get_register_names();
978 // SAFETY: we have enough space for all the registers & the memory lives for the duration
979 // of the FFI call.
980 check_whpx!(unsafe {
981 WHvGetVirtualProcessorRegisters(
982 self.vm_partition.partition,
983 self.index,
984 reg_names as *const WHV_REGISTER_NAME,
985 reg_names.len() as u32,
986 whpx_interrupt_regs.as_mut_ptr(),
987 )
988 })?;
989
990 serde_json::to_value(whpx_interrupt_regs.into_serializable()).map_err(|e| {
991 error!("failed to serialize interrupt state: {:?}", e);
992 Error::new(EIO)
993 })
994 }
995
set_interrupt_state(&self, data: serde_json::Value) -> Result<()>996 fn set_interrupt_state(&self, data: serde_json::Value) -> Result<()> {
997 let whpx_interrupt_regs =
998 WhpxInterruptRegs::from_serializable(serde_json::from_value(data).map_err(|e| {
999 error!("failed to serialize interrupt state: {:?}", e);
1000 Error::new(EIO)
1001 })?);
1002 let reg_names = WhpxInterruptRegs::get_register_names();
1003 // SAFETY: we have enough space for all the registers & the memory lives for the duration
1004 // of the FFI call.
1005 check_whpx!(unsafe {
1006 WHvSetVirtualProcessorRegisters(
1007 self.vm_partition.partition,
1008 self.index,
1009 reg_names as *const WHV_REGISTER_NAME,
1010 reg_names.len() as u32,
1011 whpx_interrupt_regs.as_ptr(),
1012 )
1013 })
1014 }
1015
1016 /// Gets the VCPU debug registers.
get_debugregs(&self) -> Result<DebugRegs>1017 fn get_debugregs(&self) -> Result<DebugRegs> {
1018 let mut whpx_debugregs: WhpxDebugRegs = Default::default();
1019 let reg_names = WhpxDebugRegs::get_register_names();
1020 // safe because we have enough space for all the registers
1021 check_whpx!(unsafe {
1022 WHvGetVirtualProcessorRegisters(
1023 self.vm_partition.partition,
1024 self.index,
1025 reg_names as *const WHV_REGISTER_NAME,
1026 reg_names.len() as u32,
1027 whpx_debugregs.as_mut_ptr(),
1028 )
1029 })?;
1030 Ok(DebugRegs::from(&whpx_debugregs))
1031 }
1032
1033 /// Sets the VCPU debug registers.
set_debugregs(&self, debugregs: &DebugRegs) -> Result<()>1034 fn set_debugregs(&self, debugregs: &DebugRegs) -> Result<()> {
1035 let whpx_debugregs = WhpxDebugRegs::from(debugregs);
1036 let reg_names = WhpxDebugRegs::get_register_names();
1037 // safe because we have enough space for all the registers
1038 check_whpx!(unsafe {
1039 WHvSetVirtualProcessorRegisters(
1040 self.vm_partition.partition,
1041 self.index,
1042 reg_names as *const WHV_REGISTER_NAME,
1043 reg_names.len() as u32,
1044 whpx_debugregs.as_ptr(),
1045 )
1046 })
1047 }
1048
1049 /// Gets the VCPU extended control registers.
get_xcrs(&self) -> Result<BTreeMap<u32, u64>>1050 fn get_xcrs(&self) -> Result<BTreeMap<u32, u64>> {
1051 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1052 let mut reg_value = WHV_REGISTER_VALUE::default();
1053 // safe because we have enough space for all the registers in whpx_regs
1054 check_whpx!(unsafe {
1055 WHvGetVirtualProcessorRegisters(
1056 self.vm_partition.partition,
1057 self.index,
1058 ®_NAME,
1059 /* RegisterCount */ 1,
1060 &mut reg_value,
1061 )
1062 })?;
1063
1064 // safe because the union value, reg64, is safe to pull out assuming
1065 // kernel filled in the xcrs properly.
1066 let xcr0 = unsafe { reg_value.Reg64 };
1067
1068 // whpx only supports xcr0
1069 let xcrs = BTreeMap::from([(0, xcr0)]);
1070 Ok(xcrs)
1071 }
1072
1073 /// Sets a VCPU extended control register.
set_xcr(&self, xcr_index: u32, value: u64) -> Result<()>1074 fn set_xcr(&self, xcr_index: u32, value: u64) -> Result<()> {
1075 if xcr_index != 0 {
1076 // invalid xcr register provided
1077 return Err(Error::new(EINVAL));
1078 }
1079
1080 const REG_NAME: WHV_REGISTER_NAME = WHV_REGISTER_NAME_WHvX64RegisterXCr0;
1081 let reg_value = WHV_REGISTER_VALUE { Reg64: value };
1082 // safe because we have enough space for all the registers in whpx_xcrs
1083 check_whpx!(unsafe {
1084 WHvSetVirtualProcessorRegisters(
1085 self.vm_partition.partition,
1086 self.index,
1087 ®_NAME,
1088 /* RegisterCount */ 1,
1089 ®_value,
1090 )
1091 })
1092 }
1093
1094 /// Gets the value of a single model-specific register.
get_msr(&self, msr_index: u32) -> Result<u64>1095 fn get_msr(&self, msr_index: u32) -> Result<u64> {
1096 let msr_name = get_msr_name(msr_index).ok_or(Error::new(libc::ENOENT))?;
1097 let mut msr_value = WHV_REGISTER_VALUE::default();
1098 // safe because we have enough space for all the registers in whpx_regs
1099 check_whpx!(unsafe {
1100 WHvGetVirtualProcessorRegisters(
1101 self.vm_partition.partition,
1102 self.index,
1103 &msr_name,
1104 /* RegisterCount */ 1,
1105 &mut msr_value,
1106 )
1107 })?;
1108
1109 // safe because Reg64 will be a valid union value
1110 let value = unsafe { msr_value.Reg64 };
1111 Ok(value)
1112 }
1113
get_all_msrs(&self) -> Result<BTreeMap<u32, u64>>1114 fn get_all_msrs(&self) -> Result<BTreeMap<u32, u64>> {
1115 // Note that some members of VALID_MSRS cannot be fetched from WHPX with
1116 // WHvGetVirtualProcessorRegisters per the HTLFS, so we enumerate all of
1117 // permitted MSRs here.
1118 //
1119 // We intentionally exclude WHvRegisterPendingInterruption and
1120 // WHvRegisterInterruptState because they are included in
1121 // get_interrupt_state.
1122 //
1123 // We intentionally exclude MSR_TSC because in snapshotting it is
1124 // handled by the generic x86_64 VCPU snapshot/restore. Non snapshot
1125 // consumers should use get/set_tsc_adjust to access the adjust register
1126 // if needed.
1127 const MSRS_TO_SAVE: &[u32] = &[
1128 MSR_EFER,
1129 MSR_KERNEL_GS_BASE,
1130 MSR_APIC_BASE,
1131 MSR_SYSENTER_CS,
1132 MSR_SYSENTER_EIP,
1133 MSR_SYSENTER_ESP,
1134 MSR_STAR,
1135 MSR_LSTAR,
1136 MSR_CSTAR,
1137 MSR_SFMASK,
1138 ];
1139
1140 let registers = MSRS_TO_SAVE
1141 .iter()
1142 .map(|msr_index| {
1143 let value = self.get_msr(*msr_index)?;
1144 Ok((*msr_index, value))
1145 })
1146 .collect::<Result<BTreeMap<u32, u64>>>()?;
1147
1148 Ok(registers)
1149 }
1150
1151 /// Sets the value of a single model-specific register.
set_msr(&self, msr_index: u32, value: u64) -> Result<()>1152 fn set_msr(&self, msr_index: u32, value: u64) -> Result<()> {
1153 match get_msr_name(msr_index) {
1154 Some(msr_name) => {
1155 let msr_value = WHV_REGISTER_VALUE { Reg64: value };
1156 check_whpx!(unsafe {
1157 WHvSetVirtualProcessorRegisters(
1158 self.vm_partition.partition,
1159 self.index,
1160 &msr_name,
1161 /* RegisterCount */ 1,
1162 &msr_value,
1163 )
1164 })
1165 }
1166 None => {
1167 warn!("msr 0x{msr_index:X} write unsupported by WHPX, dropping");
1168 Ok(())
1169 }
1170 }
1171 }
1172
1173 /// Sets up the data returned by the CPUID instruction.
1174 /// For WHPX, this is not valid on the vcpu, and needs to be setup on the vm.
set_cpuid(&self, _cpuid: &CpuId) -> Result<()>1175 fn set_cpuid(&self, _cpuid: &CpuId) -> Result<()> {
1176 Err(Error::new(ENXIO))
1177 }
1178
1179 /// This function should be called after `Vcpu::run` returns `VcpuExit::Cpuid`, and `entry`
1180 /// should represent the result of emulating the CPUID instruction. The `handle_cpuid` function
1181 /// will then set the appropriate registers on the vcpu.
handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()>1182 fn handle_cpuid(&mut self, entry: &CpuIdEntry) -> Result<()> {
1183 // Verify that we're only being called in a situation where the last exit reason was
1184 // ExitReasonX64Cpuid
1185 if self.last_exit_context.ExitReason != WHV_RUN_VP_EXIT_REASON_WHvRunVpExitReasonX64Cpuid {
1186 return Err(Error::new(EINVAL));
1187 }
1188
1189 // Get the next rip from the exit context
1190 let rip = self.last_exit_context.VpContext.Rip
1191 + self.last_exit_context.VpContext.InstructionLength() as u64;
1192
1193 const REG_NAMES: [WHV_REGISTER_NAME; 5] = [
1194 WHV_REGISTER_NAME_WHvX64RegisterRip,
1195 WHV_REGISTER_NAME_WHvX64RegisterRax,
1196 WHV_REGISTER_NAME_WHvX64RegisterRbx,
1197 WHV_REGISTER_NAME_WHvX64RegisterRcx,
1198 WHV_REGISTER_NAME_WHvX64RegisterRdx,
1199 ];
1200
1201 let values = vec![
1202 WHV_REGISTER_VALUE { Reg64: rip },
1203 WHV_REGISTER_VALUE {
1204 Reg64: entry.cpuid.eax as u64,
1205 },
1206 WHV_REGISTER_VALUE {
1207 Reg64: entry.cpuid.ebx as u64,
1208 },
1209 WHV_REGISTER_VALUE {
1210 Reg64: entry.cpuid.ecx as u64,
1211 },
1212 WHV_REGISTER_VALUE {
1213 Reg64: entry.cpuid.edx as u64,
1214 },
1215 ];
1216
1217 // safe because we have enough space for all the registers
1218 check_whpx!(unsafe {
1219 WHvSetVirtualProcessorRegisters(
1220 self.vm_partition.partition,
1221 self.index,
1222 ®_NAMES as *const WHV_REGISTER_NAME,
1223 REG_NAMES.len() as u32,
1224 values.as_ptr() as *const WHV_REGISTER_VALUE,
1225 )
1226 })
1227 }
1228
1229 /// Sets up debug registers and configure vcpu for handling guest debug events.
set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()>1230 fn set_guest_debug(&self, _addrs: &[GuestAddress], _enable_singlestep: bool) -> Result<()> {
1231 // TODO(b/173807302): Implement this
1232 Err(Error::new(ENOENT))
1233 }
1234
restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()>1235 fn restore_timekeeping(&self, host_tsc_reference_moment: u64, tsc_offset: u64) -> Result<()> {
1236 // Set the guest TSC such that it has the same TSC_OFFSET as it did at
1237 // the moment it was snapshotted. This is required for virtio-pvclock
1238 // to function correctly. (virtio-pvclock assumes the offset is fixed,
1239 // and adjusts CLOCK_BOOTTIME accordingly. It also hides the TSC jump
1240 // from CLOCK_MONOTONIC by setting the timebase.)
1241 self.set_tsc_value(host_tsc_reference_moment.wrapping_add(tsc_offset))
1242 }
1243 }
1244
get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME>1245 fn get_msr_name(msr_index: u32) -> Option<WHV_REGISTER_NAME> {
1246 VALID_MSRS.get(&msr_index).copied()
1247 }
1248
1249 // run calls are tested with the integration tests since the full vcpu needs to be setup for it.
1250 #[cfg(test)]
1251 mod tests {
1252 use vm_memory::GuestAddress;
1253 use vm_memory::GuestMemory;
1254
1255 use super::*;
1256 use crate::VmX86_64;
1257
new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm1258 fn new_vm(cpu_count: usize, mem: GuestMemory) -> WhpxVm {
1259 let whpx = Whpx::new().expect("failed to instantiate whpx");
1260 let local_apic_supported = Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation)
1261 .expect("failed to get whpx features");
1262 WhpxVm::new(
1263 &whpx,
1264 cpu_count,
1265 mem,
1266 CpuId::new(0),
1267 local_apic_supported,
1268 None,
1269 )
1270 .expect("failed to create whpx vm")
1271 }
1272
1273 #[test]
try_clone()1274 fn try_clone() {
1275 if !Whpx::is_enabled() {
1276 return;
1277 }
1278 let cpu_count = 1;
1279 let mem =
1280 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1281 let vm = new_vm(cpu_count, mem);
1282 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1283 let vcpu: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1284 let _vcpu_clone = vcpu.try_clone().expect("failed to clone whpx vcpu");
1285 }
1286
1287 #[test]
index()1288 fn index() {
1289 if !Whpx::is_enabled() {
1290 return;
1291 }
1292 let cpu_count = 2;
1293 let mem =
1294 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1295 let vm = new_vm(cpu_count, mem);
1296 let mut vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1297 let vcpu0: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1298 assert_eq!(vcpu0.index, 0);
1299 vcpu = vm.create_vcpu(1).expect("failed to create vcpu");
1300 let vcpu1: &WhpxVcpu = vcpu.downcast_ref().expect("Expected a WhpxVcpu");
1301 assert_eq!(vcpu1.index, 1);
1302 }
1303
1304 #[test]
get_regs()1305 fn get_regs() {
1306 if !Whpx::is_enabled() {
1307 return;
1308 }
1309 let cpu_count = 1;
1310 let mem =
1311 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1312 let vm = new_vm(cpu_count, mem);
1313 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1314
1315 vcpu.get_regs().expect("failed to get regs");
1316 }
1317
1318 #[test]
set_regs()1319 fn set_regs() {
1320 if !Whpx::is_enabled() {
1321 return;
1322 }
1323 let cpu_count = 1;
1324 let mem =
1325 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1326 let vm = new_vm(cpu_count, mem);
1327 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1328
1329 let mut regs = vcpu.get_regs().expect("failed to get regs");
1330 let new_val = regs.rax + 2;
1331 regs.rax = new_val;
1332
1333 vcpu.set_regs(®s).expect("failed to set regs");
1334 let new_regs = vcpu.get_regs().expect("failed to get regs");
1335 assert_eq!(new_regs.rax, new_val);
1336 }
1337
1338 #[test]
debugregs()1339 fn debugregs() {
1340 if !Whpx::is_enabled() {
1341 return;
1342 }
1343 let cpu_count = 1;
1344 let mem =
1345 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1346 let vm = new_vm(cpu_count, mem);
1347 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1348
1349 let mut dregs = vcpu.get_debugregs().unwrap();
1350 dregs.dr7 += 13;
1351 vcpu.set_debugregs(&dregs).unwrap();
1352 let dregs2 = vcpu.get_debugregs().unwrap();
1353 assert_eq!(dregs.dr7, dregs2.dr7);
1354 }
1355
1356 #[test]
sregs()1357 fn sregs() {
1358 if !Whpx::is_enabled() {
1359 return;
1360 }
1361 let cpu_count = 1;
1362 let mem =
1363 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1364 let vm = new_vm(cpu_count, mem);
1365 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1366
1367 let mut sregs = vcpu.get_sregs().unwrap();
1368 sregs.cs.base += 7;
1369 vcpu.set_sregs(&sregs).unwrap();
1370 let sregs2 = vcpu.get_sregs().unwrap();
1371 assert_eq!(sregs.cs.base, sregs2.cs.base);
1372 }
1373
1374 #[test]
fpu()1375 fn fpu() {
1376 if !Whpx::is_enabled() {
1377 return;
1378 }
1379 let cpu_count = 1;
1380 let mem =
1381 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1382 let vm = new_vm(cpu_count, mem);
1383 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1384
1385 let mut fpu = vcpu.get_fpu().unwrap();
1386 fpu.fpr[0].significand += 3;
1387 vcpu.set_fpu(&fpu).unwrap();
1388 let fpu2 = vcpu.get_fpu().unwrap();
1389 assert_eq!(fpu.fpr, fpu2.fpr);
1390 }
1391
1392 #[test]
xcrs()1393 fn xcrs() {
1394 if !Whpx::is_enabled() {
1395 return;
1396 }
1397 let whpx = Whpx::new().expect("failed to instantiate whpx");
1398 let cpu_count = 1;
1399 let mem =
1400 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1401 let vm = new_vm(cpu_count, mem);
1402 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1403 // check xsave support
1404 if !whpx.check_capability(HypervisorCap::Xcrs) {
1405 return;
1406 }
1407
1408 vcpu.set_xcr(0, 1).unwrap();
1409 let xcrs = vcpu.get_xcrs().unwrap();
1410 let xcr0 = xcrs.get(&0).unwrap();
1411 assert_eq!(*xcr0, 1);
1412 }
1413
1414 #[test]
set_msr()1415 fn set_msr() {
1416 if !Whpx::is_enabled() {
1417 return;
1418 }
1419 let cpu_count = 1;
1420 let mem =
1421 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1422 let vm = new_vm(cpu_count, mem);
1423 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1424
1425 vcpu.set_msr(MSR_KERNEL_GS_BASE, 42).unwrap();
1426
1427 let gs_base = vcpu.get_msr(MSR_KERNEL_GS_BASE).unwrap();
1428 assert_eq!(gs_base, 42);
1429 }
1430
1431 #[test]
get_msr()1432 fn get_msr() {
1433 if !Whpx::is_enabled() {
1434 return;
1435 }
1436 let cpu_count = 1;
1437 let mem =
1438 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1439 let vm = new_vm(cpu_count, mem);
1440 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1441
1442 // This one should succeed
1443 let _value = vcpu.get_msr(MSR_TSC).unwrap();
1444
1445 // This one will fail to fetch
1446 vcpu.get_msr(MSR_TSC + 1)
1447 .expect_err("invalid MSR index should fail");
1448 }
1449
1450 #[test]
set_efer()1451 fn set_efer() {
1452 if !Whpx::is_enabled() {
1453 return;
1454 }
1455 // EFER Bits
1456 const EFER_SCE: u64 = 0x00000001;
1457 const EFER_LME: u64 = 0x00000100;
1458 const EFER_LMA: u64 = 0x00000400;
1459 const X86_CR0_PE: u64 = 0x1;
1460 const X86_CR0_PG: u64 = 0x80000000;
1461 const X86_CR4_PAE: u64 = 0x20;
1462
1463 let cpu_count = 1;
1464 let mem =
1465 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1466 let vm = new_vm(cpu_count, mem);
1467 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1468
1469 let mut sregs = vcpu.get_sregs().expect("failed to get sregs");
1470 // Initial value should be 0
1471 assert_eq!(sregs.efer, 0);
1472
1473 // Enable and activate long mode
1474 sregs.cr0 |= X86_CR0_PE; // enable protected mode
1475 sregs.cr0 |= X86_CR0_PG; // enable paging
1476 sregs.cr4 |= X86_CR4_PAE; // enable physical address extension
1477 sregs.efer = EFER_LMA | EFER_LME;
1478 vcpu.set_sregs(&sregs).expect("failed to set sregs");
1479
1480 // Verify that setting stuck
1481 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1482 assert_eq!(sregs.efer, EFER_LMA | EFER_LME);
1483 assert_eq!(sregs.cr0 & X86_CR0_PE, X86_CR0_PE);
1484 assert_eq!(sregs.cr0 & X86_CR0_PG, X86_CR0_PG);
1485 assert_eq!(sregs.cr4 & X86_CR4_PAE, X86_CR4_PAE);
1486
1487 let efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1488 assert_eq!(efer, EFER_LMA | EFER_LME);
1489
1490 // Enable SCE via set_msrs
1491 vcpu.set_msr(MSR_EFER, efer | EFER_SCE)
1492 .expect("failed to set msr");
1493
1494 // Verify that setting stuck
1495 let sregs = vcpu.get_sregs().expect("failed to get sregs");
1496 assert_eq!(sregs.efer, EFER_SCE | EFER_LME | EFER_LMA);
1497 let new_efer = vcpu.get_msr(MSR_EFER).expect("failed to get msr");
1498 assert_eq!(new_efer, EFER_SCE | EFER_LME | EFER_LMA);
1499 }
1500
1501 #[test]
get_and_set_xsave_smoke()1502 fn get_and_set_xsave_smoke() {
1503 if !Whpx::is_enabled() {
1504 return;
1505 }
1506 let cpu_count = 1;
1507 let mem =
1508 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1509 let vm = new_vm(cpu_count, mem);
1510 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1511
1512 // XSAVE is essentially opaque for our purposes. We just want to make sure our syscalls
1513 // succeed.
1514 let xsave = vcpu.get_xsave().unwrap();
1515 vcpu.set_xsave(&xsave).unwrap();
1516 }
1517
1518 #[test]
get_and_set_interrupt_state_smoke()1519 fn get_and_set_interrupt_state_smoke() {
1520 if !Whpx::is_enabled() {
1521 return;
1522 }
1523 let cpu_count = 1;
1524 let mem =
1525 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1526 let vm = new_vm(cpu_count, mem);
1527 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1528
1529 // For the sake of snapshotting, interrupt state is essentially opaque. We just want to make
1530 // sure our syscalls succeed.
1531 let interrupt_state = vcpu.get_interrupt_state().unwrap();
1532 vcpu.set_interrupt_state(interrupt_state).unwrap();
1533 }
1534
1535 #[test]
get_all_msrs()1536 fn get_all_msrs() {
1537 if !Whpx::is_enabled() {
1538 return;
1539 }
1540 let cpu_count = 1;
1541 let mem =
1542 GuestMemory::new(&[(GuestAddress(0), 0x1000)]).expect("failed to create guest memory");
1543 let vm = new_vm(cpu_count, mem);
1544 let vcpu = vm.create_vcpu(0).expect("failed to create vcpu");
1545
1546 let all_msrs = vcpu.get_all_msrs().unwrap();
1547
1548 // Our MSR buffer is init'ed to zeros in the registers. The APIC base will be non-zero, so
1549 // by asserting that we know the MSR fetch actually did get us data.
1550 let apic_base = all_msrs.get(&MSR_APIC_BASE).unwrap();
1551 assert_ne!(*apic_base, 0);
1552 }
1553 }
1554