1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Usage: to be run via nx_huge_page_test.sh, which does the necessary
4  * environment setup and teardown
5  *
6  * Copyright (C) 2022, Google LLC.
7  */
8 #include <fcntl.h>
9 #include <stdint.h>
10 #include <time.h>
11 
12 #include <test_util.h>
13 #include "kvm_util.h"
14 #include "processor.h"
15 
16 #define HPAGE_SLOT		10
17 #define HPAGE_GPA		(4UL << 30) /* 4G prevents collision w/ slot 0 */
18 #define HPAGE_GVA		HPAGE_GPA /* GVA is arbitrary, so use GPA. */
19 #define PAGES_PER_2MB_HUGE_PAGE 512
20 #define HPAGE_SLOT_NPAGES	(3 * PAGES_PER_2MB_HUGE_PAGE)
21 
22 /*
23  * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
24  * being run without it.
25  */
26 #define MAGIC_TOKEN 887563923
27 
28 /*
29  * x86 opcode for the return instruction. Used to call into, and then
30  * immediately return from, memory backed with hugepages.
31  */
32 #define RETURN_OPCODE 0xC3
33 
34 /* Call the specified memory address. */
guest_do_CALL(uint64_t target)35 static void guest_do_CALL(uint64_t target)
36 {
37 	((void (*)(void)) target)();
38 }
39 
40 /*
41  * Exit the VM after each memory access so that the userspace component of the
42  * test can make assertions about the pages backing the VM.
43  *
44  * See the below for an explanation of how each access should affect the
45  * backing mappings.
46  */
guest_code(void)47 void guest_code(void)
48 {
49 	uint64_t hpage_1 = HPAGE_GVA;
50 	uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
51 	uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
52 
53 	READ_ONCE(*(uint64_t *)hpage_1);
54 	GUEST_SYNC(1);
55 
56 	READ_ONCE(*(uint64_t *)hpage_2);
57 	GUEST_SYNC(2);
58 
59 	guest_do_CALL(hpage_1);
60 	GUEST_SYNC(3);
61 
62 	guest_do_CALL(hpage_3);
63 	GUEST_SYNC(4);
64 
65 	READ_ONCE(*(uint64_t *)hpage_1);
66 	GUEST_SYNC(5);
67 
68 	READ_ONCE(*(uint64_t *)hpage_3);
69 	GUEST_SYNC(6);
70 }
71 
check_2m_page_count(struct kvm_vm * vm,int expected_pages_2m)72 static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
73 {
74 	int actual_pages_2m;
75 
76 	actual_pages_2m = vm_get_stat(vm, "pages_2m");
77 
78 	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
79 		    "Unexpected 2m page count. Expected %d, got %d",
80 		    expected_pages_2m, actual_pages_2m);
81 }
82 
check_split_count(struct kvm_vm * vm,int expected_splits)83 static void check_split_count(struct kvm_vm *vm, int expected_splits)
84 {
85 	int actual_splits;
86 
87 	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
88 
89 	TEST_ASSERT(actual_splits == expected_splits,
90 		    "Unexpected NX huge page split count. Expected %d, got %d",
91 		    expected_splits, actual_splits);
92 }
93 
wait_for_reclaim(int reclaim_period_ms)94 static void wait_for_reclaim(int reclaim_period_ms)
95 {
96 	long reclaim_wait_ms;
97 	struct timespec ts;
98 
99 	reclaim_wait_ms = reclaim_period_ms * 5;
100 	ts.tv_sec = reclaim_wait_ms / 1000;
101 	ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
102 	nanosleep(&ts, NULL);
103 }
104 
run_test(int reclaim_period_ms,bool disable_nx_huge_pages,bool reboot_permissions)105 void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
106 	      bool reboot_permissions)
107 {
108 	struct kvm_vcpu *vcpu;
109 	struct kvm_vm *vm;
110 	uint64_t nr_bytes;
111 	void *hva;
112 	int r;
113 
114 	vm = vm_create(1);
115 
116 	if (disable_nx_huge_pages) {
117 		r = __vm_disable_nx_huge_pages(vm);
118 		if (reboot_permissions) {
119 			TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
120 		} else {
121 			TEST_ASSERT(r == -1 && errno == EPERM,
122 				    "This process should not have permission to disable NX huge pages");
123 			return;
124 		}
125 	}
126 
127 	vcpu = vm_vcpu_add(vm, 0, guest_code);
128 
129 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
130 				    HPAGE_GPA, HPAGE_SLOT,
131 				    HPAGE_SLOT_NPAGES, 0);
132 
133 	nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
134 
135 	/*
136 	 * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
137 	 * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
138 	 * whenever KVM is shadowing the guest page tables).
139 	 *
140 	 * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
141 	 * pages irrespective of the guest page size, so map with 4KiB pages
142 	 * to test that that is the case.
143 	 */
144 	if (kvm_is_tdp_enabled())
145 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
146 	else
147 		virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
148 
149 	hva = addr_gpa2hva(vm, HPAGE_GPA);
150 	memset(hva, RETURN_OPCODE, nr_bytes);
151 
152 	check_2m_page_count(vm, 0);
153 	check_split_count(vm, 0);
154 
155 	/*
156 	 * The guest code will first read from the first hugepage, resulting
157 	 * in a huge page mapping being created.
158 	 */
159 	vcpu_run(vcpu);
160 	check_2m_page_count(vm, 1);
161 	check_split_count(vm, 0);
162 
163 	/*
164 	 * Then the guest code will read from the second hugepage, resulting
165 	 * in another huge page mapping being created.
166 	 */
167 	vcpu_run(vcpu);
168 	check_2m_page_count(vm, 2);
169 	check_split_count(vm, 0);
170 
171 	/*
172 	 * Next, the guest will execute from the first huge page, causing it
173 	 * to be remapped at 4k.
174 	 *
175 	 * If NX huge pages are disabled, this should have no effect.
176 	 */
177 	vcpu_run(vcpu);
178 	check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
179 	check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
180 
181 	/*
182 	 * Executing from the third huge page (previously unaccessed) will
183 	 * cause part to be mapped at 4k.
184 	 *
185 	 * If NX huge pages are disabled, it should be mapped at 2M.
186 	 */
187 	vcpu_run(vcpu);
188 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
189 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
190 
191 	/* Reading from the first huge page again should have no effect. */
192 	vcpu_run(vcpu);
193 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
194 	check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
195 
196 	/* Give recovery thread time to run. */
197 	wait_for_reclaim(reclaim_period_ms);
198 
199 	/*
200 	 * Now that the reclaimer has run, all the split pages should be gone.
201 	 *
202 	 * If NX huge pages are disabled, the relaimer will not run, so
203 	 * nothing should change from here on.
204 	 */
205 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
206 	check_split_count(vm, 0);
207 
208 	/*
209 	 * The 4k mapping on hpage 3 should have been removed, so check that
210 	 * reading from it causes a huge page mapping to be installed.
211 	 */
212 	vcpu_run(vcpu);
213 	check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
214 	check_split_count(vm, 0);
215 
216 	kvm_vm_free(vm);
217 }
218 
help(char * name)219 static void help(char *name)
220 {
221 	puts("");
222 	printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
223 	puts("");
224 	printf(" -p: The NX reclaim period in milliseconds.\n");
225 	printf(" -t: The magic token to indicate environment setup is done.\n");
226 	printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
227 	puts("");
228 	exit(0);
229 }
230 
main(int argc,char ** argv)231 int main(int argc, char **argv)
232 {
233 	int reclaim_period_ms = 0, token = 0, opt;
234 	bool reboot_permissions = false;
235 
236 	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
237 		switch (opt) {
238 		case 'p':
239 			reclaim_period_ms = atoi_positive("Reclaim period", optarg);
240 			break;
241 		case 't':
242 			token = atoi_paranoid(optarg);
243 			break;
244 		case 'r':
245 			reboot_permissions = true;
246 			break;
247 		case 'h':
248 		default:
249 			help(argv[0]);
250 			break;
251 		}
252 	}
253 
254 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
255 
256 	__TEST_REQUIRE(token == MAGIC_TOKEN,
257 		       "This test must be run with the magic token via '-t %d'.\n"
258 		       "Running via nx_huge_pages_test.sh, which also handles "
259 		       "environment setup, is strongly recommended.", MAGIC_TOKEN);
260 
261 	run_test(reclaim_period_ms, false, reboot_permissions);
262 	run_test(reclaim_period_ms, true, reboot_permissions);
263 
264 	return 0;
265 }
266 
267