1 /*
2 * Copyright (c) 2019 Alexey Dobriyan <[email protected]>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16 /*
17 * Fork and exec tiny 1 page executable which precisely controls its VM.
18 * Test /proc/$PID/maps
19 * Test /proc/$PID/smaps
20 * Test /proc/$PID/smaps_rollup
21 * Test /proc/$PID/statm
22 *
23 * FIXME require CONFIG_TMPFS which can be disabled
24 * FIXME test other values from "smaps"
25 * FIXME support other archs
26 */
27 #undef NDEBUG
28 #include <assert.h>
29 #include <errno.h>
30 #include <sched.h>
31 #include <signal.h>
32 #include <stdbool.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <sys/mount.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41 #include <fcntl.h>
42 #include <unistd.h>
43 #include <sys/syscall.h>
44 #include <sys/uio.h>
45 #include <linux/kdev_t.h>
46 #include <sys/time.h>
47 #include <sys/resource.h>
48
49 #include "../kselftest.h"
50
sys_execveat(int dirfd,const char * pathname,char ** argv,char ** envp,int flags)51 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
52 {
53 return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
54 }
55
make_private_tmp(void)56 static void make_private_tmp(void)
57 {
58 if (unshare(CLONE_NEWNS) == -1) {
59 if (errno == ENOSYS || errno == EPERM) {
60 exit(4);
61 }
62 exit(1);
63 }
64 if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
65 exit(1);
66 }
67 if (mount(NULL, "/tmp", "tmpfs", 0, NULL) == -1) {
68 exit(1);
69 }
70 }
71
72 static pid_t pid = -1;
ate(void)73 static void ate(void)
74 {
75 if (pid > 0) {
76 kill(pid, SIGTERM);
77 }
78 }
79
80 struct elf64_hdr {
81 uint8_t e_ident[16];
82 uint16_t e_type;
83 uint16_t e_machine;
84 uint32_t e_version;
85 uint64_t e_entry;
86 uint64_t e_phoff;
87 uint64_t e_shoff;
88 uint32_t e_flags;
89 uint16_t e_ehsize;
90 uint16_t e_phentsize;
91 uint16_t e_phnum;
92 uint16_t e_shentsize;
93 uint16_t e_shnum;
94 uint16_t e_shstrndx;
95 };
96
97 struct elf64_phdr {
98 uint32_t p_type;
99 uint32_t p_flags;
100 uint64_t p_offset;
101 uint64_t p_vaddr;
102 uint64_t p_paddr;
103 uint64_t p_filesz;
104 uint64_t p_memsz;
105 uint64_t p_align;
106 };
107
108 #ifdef __x86_64__
109 #define PAGE_SIZE 4096
110 #define VADDR (1UL << 32)
111 #define MAPS_OFFSET 73
112
113 #define syscall 0x0f, 0x05
114 #define mov_rdi(x) \
115 0x48, 0xbf, \
116 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
117 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
118
119 #define mov_rsi(x) \
120 0x48, 0xbe, \
121 (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff, \
122 ((x)>>32)&0xff, ((x)>>40)&0xff, ((x)>>48)&0xff, ((x)>>56)&0xff
123
124 #define mov_eax(x) \
125 0xb8, (x)&0xff, ((x)>>8)&0xff, ((x)>>16)&0xff, ((x)>>24)&0xff
126
127 static const uint8_t payload[] = {
128 /* Casually unmap stack, vDSO and everything else. */
129 /* munmap */
130 mov_rdi(VADDR + 4096),
131 mov_rsi((1ULL << 47) - 4096 - VADDR - 4096),
132 mov_eax(11),
133 syscall,
134
135 /* Ping parent. */
136 /* write(0, &c, 1); */
137 0x31, 0xff, /* xor edi, edi */
138 0x48, 0x8d, 0x35, 0x00, 0x00, 0x00, 0x00, /* lea rsi, [rip] */
139 0xba, 0x01, 0x00, 0x00, 0x00, /* mov edx, 1 */
140 mov_eax(1),
141 syscall,
142
143 /* 1: pause(); */
144 mov_eax(34),
145 syscall,
146
147 0xeb, 0xf7, /* jmp 1b */
148 };
149
make_exe(const uint8_t * payload,size_t len)150 static int make_exe(const uint8_t *payload, size_t len)
151 {
152 struct elf64_hdr h;
153 struct elf64_phdr ph;
154
155 struct iovec iov[3] = {
156 {&h, sizeof(struct elf64_hdr)},
157 {&ph, sizeof(struct elf64_phdr)},
158 {(void *)payload, len},
159 };
160 int fd, fd1;
161 char buf[64];
162
163 memset(&h, 0, sizeof(h));
164 h.e_ident[0] = 0x7f;
165 h.e_ident[1] = 'E';
166 h.e_ident[2] = 'L';
167 h.e_ident[3] = 'F';
168 h.e_ident[4] = 2;
169 h.e_ident[5] = 1;
170 h.e_ident[6] = 1;
171 h.e_ident[7] = 0;
172 h.e_type = 2;
173 h.e_machine = 0x3e;
174 h.e_version = 1;
175 h.e_entry = VADDR + sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr);
176 h.e_phoff = sizeof(struct elf64_hdr);
177 h.e_shoff = 0;
178 h.e_flags = 0;
179 h.e_ehsize = sizeof(struct elf64_hdr);
180 h.e_phentsize = sizeof(struct elf64_phdr);
181 h.e_phnum = 1;
182 h.e_shentsize = 0;
183 h.e_shnum = 0;
184 h.e_shstrndx = 0;
185
186 memset(&ph, 0, sizeof(ph));
187 ph.p_type = 1;
188 ph.p_flags = (1<<2)|1;
189 ph.p_offset = 0;
190 ph.p_vaddr = VADDR;
191 ph.p_paddr = 0;
192 ph.p_filesz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
193 ph.p_memsz = sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len;
194 ph.p_align = 4096;
195
196 fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_EXCL|O_TMPFILE, 0700);
197 if (fd == -1) {
198 exit(1);
199 }
200
201 if (writev(fd, iov, 3) != sizeof(struct elf64_hdr) + sizeof(struct elf64_phdr) + len) {
202 exit(1);
203 }
204
205 /* Avoid ETXTBSY on exec. */
206 snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
207 fd1 = open(buf, O_RDONLY|O_CLOEXEC);
208 close(fd);
209
210 return fd1;
211 }
212 #endif
213
214 /*
215 * 0: vsyscall VMA doesn't exist vsyscall=none
216 * 1: vsyscall VMA is --xp vsyscall=xonly
217 * 2: vsyscall VMA is r-xp vsyscall=emulate
218 */
219 static volatile int g_vsyscall;
220 static const char *str_vsyscall;
221
222 static const char str_vsyscall_0[] = "";
223 static const char str_vsyscall_1[] =
224 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
225 static const char str_vsyscall_2[] =
226 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
227
228 #ifdef __x86_64__
sigaction_SIGSEGV(int _,siginfo_t * __,void * ___)229 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
230 {
231 _exit(g_vsyscall);
232 }
233
234 /*
235 * vsyscall page can't be unmapped, probe it directly.
236 */
vsyscall(void)237 static void vsyscall(void)
238 {
239 pid_t pid;
240 int wstatus;
241
242 pid = fork();
243 if (pid < 0) {
244 fprintf(stderr, "fork, errno %d\n", errno);
245 exit(1);
246 }
247 if (pid == 0) {
248 struct rlimit rlim = {0, 0};
249 (void)setrlimit(RLIMIT_CORE, &rlim);
250
251 /* Hide "segfault at ffffffffff600000" messages. */
252 struct sigaction act;
253 memset(&act, 0, sizeof(struct sigaction));
254 act.sa_flags = SA_SIGINFO;
255 act.sa_sigaction = sigaction_SIGSEGV;
256 (void)sigaction(SIGSEGV, &act, NULL);
257
258 g_vsyscall = 0;
259 /* gettimeofday(NULL, NULL); */
260 asm volatile (
261 "call %P0"
262 :
263 : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
264 : "rax", "rcx", "r11"
265 );
266
267 g_vsyscall = 1;
268 *(volatile int *)0xffffffffff600000UL;
269
270 g_vsyscall = 2;
271 exit(g_vsyscall);
272 }
273 waitpid(pid, &wstatus, 0);
274 if (WIFEXITED(wstatus)) {
275 g_vsyscall = WEXITSTATUS(wstatus);
276 } else {
277 fprintf(stderr, "error: wstatus %08x\n", wstatus);
278 exit(1);
279 }
280 }
281
main(void)282 int main(void)
283 {
284 int pipefd[2];
285 int exec_fd;
286
287 vsyscall();
288 switch (g_vsyscall) {
289 case 0:
290 str_vsyscall = str_vsyscall_0;
291 break;
292 case 1:
293 str_vsyscall = str_vsyscall_1;
294 break;
295 case 2:
296 str_vsyscall = str_vsyscall_2;
297 break;
298 default:
299 abort();
300 }
301
302 atexit(ate);
303
304 make_private_tmp();
305
306 /* Reserve fd 0 for 1-byte pipe ping from child. */
307 close(0);
308 if (open("/", O_RDONLY|O_DIRECTORY|O_PATH) != 0) {
309 return 1;
310 }
311
312 exec_fd = make_exe(payload, sizeof(payload));
313
314 if (pipe(pipefd) == -1) {
315 return 1;
316 }
317 if (dup2(pipefd[1], 0) != 0) {
318 return 1;
319 }
320
321 pid = fork();
322 if (pid == -1) {
323 return 1;
324 }
325 if (pid == 0) {
326 sys_execveat(exec_fd, "", NULL, NULL, AT_EMPTY_PATH);
327 return 1;
328 }
329
330 char _;
331 if (read(pipefd[0], &_, 1) != 1) {
332 return 1;
333 }
334
335 struct stat st;
336 if (fstat(exec_fd, &st) == -1) {
337 return 1;
338 }
339
340 /* Generate "head -n1 /proc/$PID/maps" */
341 char buf0[256];
342 memset(buf0, ' ', sizeof(buf0));
343 int len = snprintf(buf0, sizeof(buf0),
344 "%08lx-%08lx r-xp 00000000 %02lx:%02lx %llu",
345 VADDR, VADDR + PAGE_SIZE,
346 MAJOR(st.st_dev), MINOR(st.st_dev),
347 (unsigned long long)st.st_ino);
348 buf0[len] = ' ';
349 snprintf(buf0 + MAPS_OFFSET, sizeof(buf0) - MAPS_OFFSET,
350 "/tmp/#%llu (deleted)\n", (unsigned long long)st.st_ino);
351
352 /* Test /proc/$PID/maps */
353 {
354 const size_t len = strlen(buf0) + strlen(str_vsyscall);
355 char buf[256];
356 ssize_t rv;
357 int fd;
358
359 snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
360 fd = open(buf, O_RDONLY);
361 if (fd == -1) {
362 return 1;
363 }
364 rv = read(fd, buf, sizeof(buf));
365 assert(rv == len);
366 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
367 if (g_vsyscall > 0) {
368 assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
369 }
370 }
371
372 /* Test /proc/$PID/smaps */
373 {
374 char buf[4096];
375 ssize_t rv;
376 int fd;
377
378 snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
379 fd = open(buf, O_RDONLY);
380 if (fd == -1) {
381 return 1;
382 }
383 rv = read(fd, buf, sizeof(buf));
384 assert(0 <= rv && rv <= sizeof(buf));
385
386 assert(rv >= strlen(buf0));
387 assert(memcmp(buf, buf0, strlen(buf0)) == 0);
388
389 #define RSS1 "Rss: 4 kB\n"
390 #define RSS2 "Rss: 0 kB\n"
391 #define PSS1 "Pss: 4 kB\n"
392 #define PSS2 "Pss: 0 kB\n"
393 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
394 memmem(buf, rv, RSS2, strlen(RSS2)));
395 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
396 memmem(buf, rv, PSS2, strlen(PSS2)));
397
398 static const char *S[] = {
399 "Size: 4 kB\n",
400 "KernelPageSize: 4 kB\n",
401 "MMUPageSize: 4 kB\n",
402 "Anonymous: 0 kB\n",
403 "AnonHugePages: 0 kB\n",
404 "Shared_Hugetlb: 0 kB\n",
405 "Private_Hugetlb: 0 kB\n",
406 "Locked: 0 kB\n",
407 };
408 int i;
409
410 for (i = 0; i < ARRAY_SIZE(S); i++) {
411 assert(memmem(buf, rv, S[i], strlen(S[i])));
412 }
413
414 if (g_vsyscall > 0) {
415 assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
416 }
417 }
418
419 /* Test /proc/$PID/smaps_rollup */
420 {
421 char bufr[256];
422 memset(bufr, ' ', sizeof(bufr));
423 len = snprintf(bufr, sizeof(bufr),
424 "%08lx-%08lx ---p 00000000 00:00 0",
425 VADDR, VADDR + PAGE_SIZE);
426 bufr[len] = ' ';
427 snprintf(bufr + MAPS_OFFSET, sizeof(bufr) - MAPS_OFFSET,
428 "[rollup]\n");
429
430 char buf[1024];
431 ssize_t rv;
432 int fd;
433
434 snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
435 fd = open(buf, O_RDONLY);
436 if (fd == -1) {
437 return 1;
438 }
439 rv = read(fd, buf, sizeof(buf));
440 assert(0 <= rv && rv <= sizeof(buf));
441
442 assert(rv >= strlen(bufr));
443 assert(memcmp(buf, bufr, strlen(bufr)) == 0);
444
445 assert(memmem(buf, rv, RSS1, strlen(RSS1)) ||
446 memmem(buf, rv, RSS2, strlen(RSS2)));
447 assert(memmem(buf, rv, PSS1, strlen(PSS1)) ||
448 memmem(buf, rv, PSS2, strlen(PSS2)));
449
450 static const char *S[] = {
451 "Anonymous: 0 kB\n",
452 "AnonHugePages: 0 kB\n",
453 "Shared_Hugetlb: 0 kB\n",
454 "Private_Hugetlb: 0 kB\n",
455 "Locked: 0 kB\n",
456 };
457 int i;
458
459 for (i = 0; i < ARRAY_SIZE(S); i++) {
460 assert(memmem(buf, rv, S[i], strlen(S[i])));
461 }
462 }
463
464 /* Test /proc/$PID/statm */
465 {
466 char buf[64];
467 ssize_t rv;
468 int fd;
469
470 snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
471 fd = open(buf, O_RDONLY);
472 if (fd == -1) {
473 return 1;
474 }
475 rv = read(fd, buf, sizeof(buf));
476 assert(rv == 7 * 2);
477
478 assert(buf[0] == '1'); /* ->total_vm */
479 assert(buf[1] == ' ');
480 assert(buf[2] == '0' || buf[2] == '1'); /* rss */
481 assert(buf[3] == ' ');
482 assert(buf[4] == '0' || buf[2] == '1'); /* file rss */
483 assert(buf[5] == ' ');
484 assert(buf[6] == '1'); /* ELF executable segments */
485 assert(buf[7] == ' ');
486 assert(buf[8] == '0');
487 assert(buf[9] == ' ');
488 assert(buf[10] == '0'); /* ->data_vm + ->stack_vm */
489 assert(buf[11] == ' ');
490 assert(buf[12] == '0');
491 assert(buf[13] == '\n');
492 }
493
494 return 0;
495 }
496 #else
main(void)497 int main(void)
498 {
499 return 4;
500 }
501 #endif
502