1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * KVM_GET/SET_* tests
4 *
5 * Copyright (C) 2018, Red Hat, Inc.
6 *
7 * Tests for vCPU state save/restore, including nested guest state.
8 */
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/ioctl.h>
14
15 #include "test_util.h"
16
17 #include "kvm_util.h"
18 #include "processor.h"
19 #include "vmx.h"
20 #include "svm_util.h"
21
22 #define L2_GUEST_STACK_SIZE 256
23
svm_l2_guest_code(void)24 void svm_l2_guest_code(void)
25 {
26 GUEST_SYNC(4);
27 /* Exit to L1 */
28 vmcall();
29 GUEST_SYNC(6);
30 /* Done, exit to L1 and never come back. */
31 vmcall();
32 }
33
svm_l1_guest_code(struct svm_test_data * svm)34 static void svm_l1_guest_code(struct svm_test_data *svm)
35 {
36 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
37 struct vmcb *vmcb = svm->vmcb;
38
39 GUEST_ASSERT(svm->vmcb_gpa);
40 /* Prepare for L2 execution. */
41 generic_svm_setup(svm, svm_l2_guest_code,
42 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
43
44 GUEST_SYNC(3);
45 run_guest(vmcb, svm->vmcb_gpa);
46 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
47 GUEST_SYNC(5);
48 vmcb->save.rip += 3;
49 run_guest(vmcb, svm->vmcb_gpa);
50 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
51 GUEST_SYNC(7);
52 }
53
vmx_l2_guest_code(void)54 void vmx_l2_guest_code(void)
55 {
56 GUEST_SYNC(6);
57
58 /* Exit to L1 */
59 vmcall();
60
61 /* L1 has now set up a shadow VMCS for us. */
62 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
63 GUEST_SYNC(10);
64 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
65 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
66 GUEST_SYNC(11);
67 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
68 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
69 GUEST_SYNC(12);
70
71 /* Done, exit to L1 and never come back. */
72 vmcall();
73 }
74
vmx_l1_guest_code(struct vmx_pages * vmx_pages)75 static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
76 {
77 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
78
79 GUEST_ASSERT(vmx_pages->vmcs_gpa);
80 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
81 GUEST_SYNC(3);
82 GUEST_ASSERT(load_vmcs(vmx_pages));
83 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
84
85 GUEST_SYNC(4);
86 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
87
88 prepare_vmcs(vmx_pages, vmx_l2_guest_code,
89 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
90
91 GUEST_SYNC(5);
92 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
93 GUEST_ASSERT(!vmlaunch());
94 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
95 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
96
97 /* Check that the launched state is preserved. */
98 GUEST_ASSERT(vmlaunch());
99
100 GUEST_ASSERT(!vmresume());
101 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
102
103 GUEST_SYNC(7);
104 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
105
106 GUEST_ASSERT(!vmresume());
107 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
108
109 vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
110
111 vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
112 vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
113
114 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
115 GUEST_ASSERT(vmlaunch());
116 GUEST_SYNC(8);
117 GUEST_ASSERT(vmlaunch());
118 GUEST_ASSERT(vmresume());
119
120 vmwrite(GUEST_RIP, 0xc0ffee);
121 GUEST_SYNC(9);
122 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
123
124 GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
125 GUEST_ASSERT(!vmresume());
126 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
127
128 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
129 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
130 GUEST_ASSERT(vmlaunch());
131 GUEST_ASSERT(vmresume());
132 GUEST_SYNC(13);
133 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
134 GUEST_ASSERT(vmlaunch());
135 GUEST_ASSERT(vmresume());
136 }
137
guest_code(void * arg)138 static void __attribute__((__flatten__)) guest_code(void *arg)
139 {
140 GUEST_SYNC(1);
141
142 if (this_cpu_has(X86_FEATURE_XSAVE)) {
143 uint64_t supported_xcr0 = this_cpu_supported_xcr0();
144 uint8_t buffer[4096];
145
146 memset(buffer, 0xcc, sizeof(buffer));
147
148 /*
149 * Modify state for all supported xfeatures to take them out of
150 * their "init" state, i.e. to make them show up in XSTATE_BV.
151 *
152 * Note off-by-default features, e.g. AMX, are out of scope for
153 * this particular testcase as they have a different ABI.
154 */
155 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
156 asm volatile ("fincstp");
157
158 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
159 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
160
161 if (supported_xcr0 & XFEATURE_MASK_YMM)
162 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
163
164 if (supported_xcr0 & XFEATURE_MASK_AVX512) {
165 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
166 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
167 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
168 }
169
170 if (this_cpu_has(X86_FEATURE_MPX)) {
171 uint64_t bounds[2] = { 10, 0xffffffffull };
172 uint64_t output[2] = { };
173
174 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
175 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
176
177 /*
178 * Don't bother trying to get BNDCSR into the INUSE
179 * state. MSR_IA32_BNDCFGS doesn't count as it isn't
180 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
181 * modified by XRSTOR. Stuffing XSTATE_BV in the host
182 * is simpler than doing XRSTOR here in the guest.
183 *
184 * However, temporarily enable MPX in BNDCFGS so that
185 * BNDMOV actually loads BND1. If MPX isn't *fully*
186 * enabled, all MPX instructions are treated as NOPs.
187 *
188 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
189 * mnemonics/registers has been removed from gcc and
190 * clang (and was never fully supported by clang).
191 */
192 wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
193 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
194 /*
195 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
196 * that BND1 actually got loaded.
197 */
198 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
199 wrmsr(MSR_IA32_BNDCFGS, 0);
200
201 GUEST_ASSERT_EQ(bounds[0], output[0]);
202 GUEST_ASSERT_EQ(bounds[1], output[1]);
203 }
204 if (this_cpu_has(X86_FEATURE_PKU)) {
205 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
206 set_cr4(get_cr4() | X86_CR4_PKE);
207 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
208
209 wrpkru(-1u);
210 }
211 }
212
213 GUEST_SYNC(2);
214
215 if (arg) {
216 if (this_cpu_has(X86_FEATURE_SVM))
217 svm_l1_guest_code(arg);
218 else
219 vmx_l1_guest_code(arg);
220 }
221
222 GUEST_DONE();
223 }
224
main(int argc,char * argv[])225 int main(int argc, char *argv[])
226 {
227 uint64_t *xstate_bv, saved_xstate_bv;
228 vm_vaddr_t nested_gva = 0;
229 struct kvm_cpuid2 empty_cpuid = {};
230 struct kvm_regs regs1, regs2;
231 struct kvm_vcpu *vcpu, *vcpuN;
232 struct kvm_vm *vm;
233 struct kvm_x86_state *state;
234 struct ucall uc;
235 int stage;
236
237 /* Create VM */
238 vm = vm_create_with_one_vcpu(&vcpu, guest_code);
239
240 vcpu_regs_get(vcpu, ®s1);
241
242 if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
243 if (kvm_cpu_has(X86_FEATURE_SVM))
244 vcpu_alloc_svm(vm, &nested_gva);
245 else if (kvm_cpu_has(X86_FEATURE_VMX))
246 vcpu_alloc_vmx(vm, &nested_gva);
247 }
248
249 if (!nested_gva)
250 pr_info("will skip nested state checks\n");
251
252 vcpu_args_set(vcpu, 1, nested_gva);
253
254 for (stage = 1;; stage++) {
255 vcpu_run(vcpu);
256 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
257
258 switch (get_ucall(vcpu, &uc)) {
259 case UCALL_ABORT:
260 REPORT_GUEST_ASSERT(uc);
261 /* NOT REACHED */
262 case UCALL_SYNC:
263 break;
264 case UCALL_DONE:
265 goto done;
266 default:
267 TEST_FAIL("Unknown ucall %lu", uc.cmd);
268 }
269
270 /* UCALL_SYNC is handled here. */
271 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
272 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
273 stage, (ulong)uc.args[1]);
274
275 state = vcpu_save_state(vcpu);
276 memset(®s1, 0, sizeof(regs1));
277 vcpu_regs_get(vcpu, ®s1);
278
279 kvm_vm_release(vm);
280
281 /* Restore state in a new VM. */
282 vcpu = vm_recreate_with_one_vcpu(vm);
283 vcpu_load_state(vcpu, state);
284
285 /*
286 * Restore XSAVE state in a dummy vCPU, first without doing
287 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
288 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
289 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
290 * load only XSAVE state, MSRs in particular have a much more
291 * convoluted ABI.
292 *
293 * Load two versions of XSAVE state: one with the actual guest
294 * XSAVE state, and one with all supported features forced "on"
295 * in xstate_bv, e.g. to ensure that KVM allows loading all
296 * supported features, even if something goes awry in saving
297 * the original snapshot.
298 */
299 xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
300 saved_xstate_bv = *xstate_bv;
301
302 vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
303 vcpu_xsave_set(vcpuN, state->xsave);
304 *xstate_bv = kvm_cpu_supported_xcr0();
305 vcpu_xsave_set(vcpuN, state->xsave);
306
307 vcpu_init_cpuid(vcpuN, &empty_cpuid);
308 vcpu_xsave_set(vcpuN, state->xsave);
309 *xstate_bv = saved_xstate_bv;
310 vcpu_xsave_set(vcpuN, state->xsave);
311
312 kvm_x86_state_cleanup(state);
313
314 memset(®s2, 0, sizeof(regs2));
315 vcpu_regs_get(vcpu, ®s2);
316 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)),
317 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
318 (ulong) regs2.rdi, (ulong) regs2.rsi);
319 }
320
321 done:
322 kvm_vm_free(vm);
323 }
324