xref: /aosp_15_r20/external/liburing/test/io_uring_register.c (revision 25da2bea747f3a93b4c30fd9708b0618ef55a0e6)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * io_uring_register.c
4  *
5  * Description: Unit tests for the io_uring_register system call.
6  *
7  * Copyright 2019, Red Hat, Inc.
8  * Author: Jeff Moyer <[email protected]>
9  */
10 #include <stdio.h>
11 #include <fcntl.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <sys/sysinfo.h>
17 #include <poll.h>
18 #include <assert.h>
19 #include <sys/uio.h>
20 #include <sys/mman.h>
21 #include <linux/mman.h>
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 #include <limits.h>
25 
26 #include "helpers.h"
27 #include "liburing.h"
28 #include "../src/syscall.h"
29 
30 static int pagesize;
31 static rlim_t mlock_limit;
32 static int devnull;
33 
34 #if !defined(CONFIG_HAVE_MEMFD_CREATE)
35 #include <sys/syscall.h>
36 #include <linux/memfd.h>
37 
memfd_create(const char * name,unsigned int flags)38 static int memfd_create(const char *name, unsigned int flags)
39 {
40 	return (int)syscall(SYS_memfd_create, name, flags);
41 }
42 #endif
43 
44 
expect_fail(int fd,unsigned int opcode,void * arg,unsigned int nr_args,int error)45 static int expect_fail(int fd, unsigned int opcode, void *arg,
46 	    unsigned int nr_args, int error)
47 {
48 	int ret;
49 
50 	ret = __sys_io_uring_register(fd, opcode, arg, nr_args);
51 	if (ret != -1) {
52 		int ret2 = 0;
53 
54 		fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
55 		if (opcode == IORING_REGISTER_BUFFERS) {
56 			ret2 = __sys_io_uring_register(fd,
57 					IORING_UNREGISTER_BUFFERS, 0, 0);
58 		} else if (opcode == IORING_REGISTER_FILES) {
59 			ret2 = __sys_io_uring_register(fd,
60 					IORING_UNREGISTER_FILES, 0, 0);
61 		}
62 		if (ret2) {
63 			fprintf(stderr, "internal error: failed to unregister\n");
64 			exit(1);
65 		}
66 		return 1;
67 	}
68 
69 	if (errno != error) {
70 		fprintf(stderr, "expected %d, got %d\n", error, errno);
71 		return 1;
72 	}
73 	return 0;
74 }
75 
new_io_uring(int entries,struct io_uring_params * p)76 static int new_io_uring(int entries, struct io_uring_params *p)
77 {
78 	int fd;
79 
80 	fd = __sys_io_uring_setup(entries, p);
81 	if (fd < 0) {
82 		perror("io_uring_setup");
83 		exit(1);
84 	}
85 	return fd;
86 }
87 
88 #define MAXFDS (UINT_MAX * sizeof(int))
89 
map_filebacked(size_t size)90 static void *map_filebacked(size_t size)
91 {
92 	int fd, ret;
93 	void *addr;
94 	char template[32] = "io_uring_register-test-XXXXXXXX";
95 
96 	fd = mkstemp(template);
97 	if (fd < 0) {
98 		perror("mkstemp");
99 		return NULL;
100 	}
101 	unlink(template);
102 
103 	ret = ftruncate(fd, size);
104 	if (ret < 0) {
105 		perror("ftruncate");
106 		close(fd);
107 		return NULL;
108 	}
109 
110 	addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
111 	if (addr == MAP_FAILED) {
112 		perror("mmap");
113 		close(fd);
114 		return NULL;
115 	}
116 
117 	close(fd);
118 	return addr;
119 }
120 
121 /*
122  * NOTE: this is now limited by SCM_MAX_FD (253).  Keep the code for now,
123  * but probably should augment it to test 253 and 254, specifically.
124  */
test_max_fds(int uring_fd)125 static int test_max_fds(int uring_fd)
126 {
127 	int status = 1;
128 	int ret;
129 	void *fd_as; /* file descriptor address space */
130 	int fdtable_fd; /* fd for the file that will be mapped over and over */
131 	int io_fd; /* the valid fd for I/O -- /dev/null */
132 	int *fds; /* used to map the file into the address space */
133 	char template[32] = "io_uring_register-test-XXXXXXXX";
134 	unsigned long long i, nr_maps, nr_fds;
135 
136 	/*
137 	 * First, mmap anonymous the full size.  That will guarantee the
138 	 * mapping will fit in the memory area selected by mmap.  Then,
139 	 * over-write that mapping using a file-backed mapping, 128MiB at
140 	 * a time using MAP_FIXED.
141 	 */
142 	fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
143 		     MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
144 	if (fd_as == MAP_FAILED) {
145 		if (errno == ENOMEM)
146 			return 0;
147 		perror("mmap fd_as");
148 		exit(1);
149 	}
150 
151 	fdtable_fd = mkstemp(template);
152 	if (fdtable_fd < 0) {
153 		perror("mkstemp");
154 		exit(1);
155 	}
156 	unlink(template);
157 	ret = ftruncate(fdtable_fd, 128*1024*1024);
158 	if (ret < 0) {
159 		perror("ftruncate");
160 		exit(1);
161 	}
162 
163 	io_fd = open("/dev/null", O_RDWR);
164 	if (io_fd < 0) {
165 		perror("open /dev/null");
166 		exit(1);
167 	}
168 	fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
169 		   MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
170 	if (fds == MAP_FAILED) {
171 		perror("mmap fdtable");
172 		exit(1);
173 	}
174 
175 	/* fill the fd table */
176 	nr_fds = 128*1024*1024 / sizeof(int);
177 	for (i = 0; i < nr_fds; i++)
178 		fds[i] = io_fd;
179 
180 	/* map the file through the rest of the address space */
181 	nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
182 	for (i = 0; i < nr_maps; i++) {
183 		fds = &fds[nr_fds]; /* advance fds by 128MiB */
184 		fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
185 			   MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
186 		if (fds == MAP_FAILED) {
187 			fprintf(stderr, "mmap failed at offset %lu\n",
188 			       (unsigned long)((char *)fd_as - (char *)fds));
189 			exit(1);
190 		}
191 	}
192 
193 	/* Now fd_as points to the file descriptor array. */
194 	/*
195 	 * We may not be able to map all of these files.  Let's back off
196 	 * until success.
197 	 */
198 	nr_fds = UINT_MAX;
199 	while (nr_fds) {
200 		ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES,
201 						fd_as, nr_fds);
202 		if (ret != 0) {
203 			nr_fds /= 2;
204 			continue;
205 		}
206 		status = 0;
207 		ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES,
208 						0, 0);
209 		if (ret < 0) {
210 			ret = errno;
211 			errno = ret;
212 			perror("io_uring_register UNREGISTER_FILES");
213 			exit(1);
214 		}
215 		break;
216 	}
217 
218 	close(io_fd);
219 	close(fdtable_fd);
220 	ret = munmap(fd_as, UINT_MAX * sizeof(int));
221 	if (ret != 0) {
222 		fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
223 		exit(1);
224 	}
225 
226 	return status;
227 }
228 
test_memlock_exceeded(int fd)229 static int test_memlock_exceeded(int fd)
230 {
231 	int ret;
232 	void *buf;
233 	struct iovec iov;
234 
235 	/* if limit is larger than 2gb, just skip this test */
236 	if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
237 		return 0;
238 
239 	iov.iov_len = mlock_limit * 2;
240 	buf = t_malloc(iov.iov_len);
241 	iov.iov_base = buf;
242 
243 	while (iov.iov_len) {
244 		ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
245 		if (ret < 0) {
246 			if (errno == ENOMEM) {
247 				iov.iov_len /= 2;
248 				continue;
249 			}
250 			if (errno == EFAULT) {
251 				free(buf);
252 				return 0;
253 			}
254 			fprintf(stderr, "expected success or EFAULT, got %d\n", errno);
255 			free(buf);
256 			return 1;
257 		}
258 		ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
259 						NULL, 0);
260 		if (ret != 0) {
261 			fprintf(stderr, "error: unregister failed with %d\n", errno);
262 			free(buf);
263 			return 1;
264 		}
265 		break;
266 	}
267 	if (!iov.iov_len)
268 		printf("Unable to register buffers.  Check memlock rlimit.\n");
269 
270 	free(buf);
271 	return 0;
272 }
273 
test_iovec_nr(int fd)274 static int test_iovec_nr(int fd)
275 {
276 	int i, ret, status = 0;
277 	unsigned int nr = 1000000;
278 	struct iovec *iovs;
279 	void *buf;
280 
281 	iovs = malloc(nr * sizeof(struct iovec));
282 	if (!iovs) {
283 		fprintf(stdout, "can't allocate iovecs, skip\n");
284 		return 0;
285 	}
286 	buf = t_malloc(pagesize);
287 
288 	for (i = 0; i < nr; i++) {
289 		iovs[i].iov_base = buf;
290 		iovs[i].iov_len = pagesize;
291 	}
292 
293 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
294 
295 	/* reduce to UIO_MAXIOV */
296 	nr = UIO_MAXIOV;
297 	ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
298 	if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) {
299 		fprintf(stderr, "can't register large iovec for regular users, skip\n");
300 	} else if (ret != 0) {
301 		fprintf(stderr, "expected success, got %d\n", errno);
302 		status = 1;
303 	} else {
304 		__sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
305 	}
306 	free(buf);
307 	free(iovs);
308 	return status;
309 }
310 
311 /*
312  * io_uring limit is 1G.  iov_len limit is ~OUL, I think
313  */
test_iovec_size(int fd)314 static int test_iovec_size(int fd)
315 {
316 	unsigned int status = 0;
317 	int ret;
318 	struct iovec iov;
319 	void *buf;
320 
321 	/* NULL pointer for base */
322 	iov.iov_base = 0;
323 	iov.iov_len = 4096;
324 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
325 
326 	/* valid base, 0 length */
327 	iov.iov_base = &buf;
328 	iov.iov_len = 0;
329 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
330 
331 	/* valid base, length exceeds size */
332 	/* this requires an unampped page directly after buf */
333 	buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
334 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
335 	assert(buf != MAP_FAILED);
336 	ret = munmap(buf + pagesize, pagesize);
337 	assert(ret == 0);
338 	iov.iov_base = buf;
339 	iov.iov_len = 2 * pagesize;
340 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
341 	munmap(buf, pagesize);
342 
343 	/* huge page */
344 	buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
345 		   MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
346 		   -1, 0);
347 	if (buf == MAP_FAILED) {
348 		printf("Unable to map a huge page.  Try increasing "
349 		       "/proc/sys/vm/nr_hugepages by at least 1.\n");
350 		printf("Skipping the hugepage test\n");
351 	} else {
352 		/*
353 		 * This should succeed, so long as RLIMIT_MEMLOCK is
354 		 * not exceeded
355 		 */
356 		iov.iov_base = buf;
357 		iov.iov_len = 2*1024*1024;
358 		ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
359 		if (ret < 0) {
360 			if (errno == ENOMEM)
361 				printf("Unable to test registering of a huge "
362 				       "page.  Try increasing the "
363 				       "RLIMIT_MEMLOCK resource limit by at "
364 				       "least 2MB.");
365 			else {
366 				fprintf(stderr, "expected success, got %d\n", errno);
367 				status = 1;
368 			}
369 		} else {
370 			ret = __sys_io_uring_register(fd,
371 					IORING_UNREGISTER_BUFFERS, 0, 0);
372 			if (ret < 0) {
373 				perror("io_uring_unregister");
374 				status = 1;
375 			}
376 		}
377 	}
378 	ret = munmap(iov.iov_base, iov.iov_len);
379 	assert(ret == 0);
380 
381 	/* file-backed buffers -- not supported */
382 	buf = map_filebacked(2*1024*1024);
383 	if (!buf)
384 		status = 1;
385 	iov.iov_base = buf;
386 	iov.iov_len = 2*1024*1024;
387 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP);
388 	munmap(buf, 2*1024*1024);
389 
390 	/* bump up against the soft limit and make sure we get EFAULT
391 	 * or whatever we're supposed to get.  NOTE: this requires
392 	 * running the test as non-root. */
393 	if (getuid() != 0)
394 		status |= test_memlock_exceeded(fd);
395 
396 	return status;
397 }
398 
ioring_poll(struct io_uring * ring,int fd,int fixed)399 static int ioring_poll(struct io_uring *ring, int fd, int fixed)
400 {
401 	int ret;
402 	struct io_uring_sqe *sqe;
403 	struct io_uring_cqe *cqe;
404 
405 	sqe = io_uring_get_sqe(ring);
406 	memset(sqe, 0, sizeof(*sqe));
407 	sqe->opcode = IORING_OP_POLL_ADD;
408 	if (fixed)
409 		sqe->flags = IOSQE_FIXED_FILE;
410 	sqe->fd = fd;
411 	sqe->poll_events = POLLIN|POLLOUT;
412 
413 	ret = io_uring_submit(ring);
414 	if (ret != 1) {
415 		fprintf(stderr, "failed to submit poll sqe: %d.\n", errno);
416 		return 1;
417 	}
418 
419 	ret = io_uring_wait_cqe(ring, &cqe);
420 	if (ret < 0) {
421 		fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
422 		return 1;
423 	}
424 	ret = 0;
425 	if (cqe->res != POLLOUT) {
426 		fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
427 		       POLLOUT, cqe->res);
428 		ret = 1;
429 	}
430 
431 	io_uring_cqe_seen(ring, cqe);
432 	return ret;
433 }
434 
test_poll_ringfd(void)435 static int test_poll_ringfd(void)
436 {
437 	int status = 0;
438 	int ret;
439 	int fd;
440 	struct io_uring ring;
441 
442 	ret = io_uring_queue_init(1, &ring, 0);
443 	if (ret) {
444 		perror("io_uring_queue_init");
445 		return 1;
446 	}
447 	fd = ring.ring_fd;
448 
449 	/* try polling the ring fd */
450 	status = ioring_poll(&ring, fd, 0);
451 
452 	/*
453 	 * now register the ring fd, and try the poll again.  This should
454 	 * fail, because the kernel does not allow registering of the
455 	 * ring_fd.
456 	 */
457 	status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF);
458 
459 	/* tear down queue */
460 	io_uring_queue_exit(&ring);
461 
462 	return status;
463 }
464 
test_shmem(void)465 static int test_shmem(void)
466 {
467 	const char pattern = 0xEA;
468 	const int len = 4096;
469 	struct io_uring_sqe *sqe;
470 	struct io_uring_cqe *cqe;
471 	struct io_uring ring;
472 	struct iovec iov;
473 	int memfd, ret, i;
474 	char *mem;
475 	int pipefd[2] = {-1, -1};
476 
477 	ret = io_uring_queue_init(8, &ring, 0);
478 	if (ret)
479 		return 1;
480 
481 	if (pipe(pipefd)) {
482 		perror("pipe");
483 		return 1;
484 	}
485 	memfd = memfd_create("uring-shmem-test", 0);
486 	if (memfd < 0) {
487 		fprintf(stderr, "memfd_create() failed %i\n", -errno);
488 		return 1;
489 	}
490 	if (ftruncate(memfd, len)) {
491 		fprintf(stderr, "can't truncate memfd\n");
492 		return 1;
493 	}
494 	mem = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, memfd, 0);
495 	if (!mem) {
496 		fprintf(stderr, "mmap failed\n");
497 		return 1;
498 	}
499 	for (i = 0; i < len; i++)
500 		mem[i] = pattern;
501 
502 	iov.iov_base = mem;
503 	iov.iov_len = len;
504 	ret = io_uring_register_buffers(&ring, &iov, 1);
505 	if (ret) {
506 		if (ret == -EOPNOTSUPP) {
507 			fprintf(stdout, "memfd registration isn't supported, "
508 					"skip\n");
509 			goto out;
510 		}
511 
512 		fprintf(stderr, "buffer reg failed: %d\n", ret);
513 		return 1;
514 	}
515 
516 	/* check that we can read and write from/to shmem reg buffer */
517 	sqe = io_uring_get_sqe(&ring);
518 	io_uring_prep_write_fixed(sqe, pipefd[1], mem, 512, 0, 0);
519 	sqe->user_data = 1;
520 
521 	ret = io_uring_submit(&ring);
522 	if (ret != 1) {
523 		fprintf(stderr, "submit write failed\n");
524 		return 1;
525 	}
526 	ret = io_uring_wait_cqe(&ring, &cqe);
527 	if (ret < 0 || cqe->user_data != 1 || cqe->res != 512) {
528 		fprintf(stderr, "reading from shmem failed\n");
529 		return 1;
530 	}
531 	io_uring_cqe_seen(&ring, cqe);
532 
533 	/* clean it, should be populated with the pattern back from the pipe */
534 	memset(mem, 0, 512);
535 	sqe = io_uring_get_sqe(&ring);
536 	io_uring_prep_read_fixed(sqe, pipefd[0], mem, 512, 0, 0);
537 	sqe->user_data = 2;
538 
539 	ret = io_uring_submit(&ring);
540 	if (ret != 1) {
541 		fprintf(stderr, "submit write failed\n");
542 		return 1;
543 	}
544 	ret = io_uring_wait_cqe(&ring, &cqe);
545 	if (ret < 0 || cqe->user_data != 2 || cqe->res != 512) {
546 		fprintf(stderr, "reading from shmem failed\n");
547 		return 1;
548 	}
549 	io_uring_cqe_seen(&ring, cqe);
550 
551 	for (i = 0; i < 512; i++) {
552 		if (mem[i] != pattern) {
553 			fprintf(stderr, "data integrity fail\n");
554 			return 1;
555 		}
556 	}
557 
558 	ret = io_uring_unregister_buffers(&ring);
559 	if (ret) {
560 		fprintf(stderr, "buffer unreg failed: %d\n", ret);
561 		return 1;
562 	}
563 out:
564 	io_uring_queue_exit(&ring);
565 	close(pipefd[0]);
566 	close(pipefd[1]);
567 	munmap(mem, len);
568 	close(memfd);
569 	return 0;
570 }
571 
main(int argc,char ** argv)572 int main(int argc, char **argv)
573 {
574 	int fd, ret;
575 	unsigned int status = 0;
576 	struct io_uring_params p;
577 	struct rlimit rlim;
578 
579 	if (argc > 1)
580 		return 0;
581 
582 	/* setup globals */
583 	pagesize = getpagesize();
584 	ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
585 	if (ret < 0) {
586 		perror("getrlimit");
587 		return 1;
588 	}
589 	mlock_limit = rlim.rlim_cur;
590 	devnull = open("/dev/null", O_RDWR);
591 	if (devnull < 0) {
592 		perror("open /dev/null");
593 		exit(1);
594 	}
595 
596 	/* invalid fd */
597 	status |= expect_fail(-1, 0, NULL, 0, EBADF);
598 	/* valid fd that is not an io_uring fd */
599 	status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP);
600 
601 	/* invalid opcode */
602 	memset(&p, 0, sizeof(p));
603 	fd = new_io_uring(1, &p);
604 	ret = expect_fail(fd, ~0U, NULL, 0, EINVAL);
605 	if (ret) {
606 		/* if this succeeds, tear down the io_uring instance
607 		 * and start clean for the next test. */
608 		close(fd);
609 		fd = new_io_uring(1, &p);
610 	}
611 
612 	/* IORING_REGISTER_BUFFERS */
613 	status |= test_iovec_size(fd);
614 	status |= test_iovec_nr(fd);
615 	/* IORING_REGISTER_FILES */
616 	status |= test_max_fds(fd);
617 	close(fd);
618 	/* uring poll on the uring fd */
619 	status |= test_poll_ringfd();
620 
621 	if (status)
622 		fprintf(stderr, "FAIL\n");
623 
624 	ret = test_shmem();
625 	if (ret) {
626 		fprintf(stderr, "test_shmem() failed\n");
627 		status |= 1;
628 	}
629 
630 	return status;
631 }
632