1 /* SPDX-License-Identifier: MIT */
2 /*
3 * io_uring_register.c
4 *
5 * Description: Unit tests for the io_uring_register system call.
6 *
7 * Copyright 2019, Red Hat, Inc.
8 * Author: Jeff Moyer <[email protected]>
9 */
10 #include <stdio.h>
11 #include <fcntl.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <sys/sysinfo.h>
17 #include <poll.h>
18 #include <assert.h>
19 #include <sys/uio.h>
20 #include <sys/mman.h>
21 #include <linux/mman.h>
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 #include <limits.h>
25
26 #include "helpers.h"
27 #include "liburing.h"
28 #include "../src/syscall.h"
29
30 static int pagesize;
31 static rlim_t mlock_limit;
32 static int devnull;
33
34 #if !defined(CONFIG_HAVE_MEMFD_CREATE)
35 #include <sys/syscall.h>
36 #include <linux/memfd.h>
37
memfd_create(const char * name,unsigned int flags)38 static int memfd_create(const char *name, unsigned int flags)
39 {
40 return (int)syscall(SYS_memfd_create, name, flags);
41 }
42 #endif
43
44
expect_fail(int fd,unsigned int opcode,void * arg,unsigned int nr_args,int error)45 static int expect_fail(int fd, unsigned int opcode, void *arg,
46 unsigned int nr_args, int error)
47 {
48 int ret;
49
50 ret = __sys_io_uring_register(fd, opcode, arg, nr_args);
51 if (ret != -1) {
52 int ret2 = 0;
53
54 fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
55 if (opcode == IORING_REGISTER_BUFFERS) {
56 ret2 = __sys_io_uring_register(fd,
57 IORING_UNREGISTER_BUFFERS, 0, 0);
58 } else if (opcode == IORING_REGISTER_FILES) {
59 ret2 = __sys_io_uring_register(fd,
60 IORING_UNREGISTER_FILES, 0, 0);
61 }
62 if (ret2) {
63 fprintf(stderr, "internal error: failed to unregister\n");
64 exit(1);
65 }
66 return 1;
67 }
68
69 if (errno != error) {
70 fprintf(stderr, "expected %d, got %d\n", error, errno);
71 return 1;
72 }
73 return 0;
74 }
75
new_io_uring(int entries,struct io_uring_params * p)76 static int new_io_uring(int entries, struct io_uring_params *p)
77 {
78 int fd;
79
80 fd = __sys_io_uring_setup(entries, p);
81 if (fd < 0) {
82 perror("io_uring_setup");
83 exit(1);
84 }
85 return fd;
86 }
87
88 #define MAXFDS (UINT_MAX * sizeof(int))
89
map_filebacked(size_t size)90 static void *map_filebacked(size_t size)
91 {
92 int fd, ret;
93 void *addr;
94 char template[32] = "io_uring_register-test-XXXXXXXX";
95
96 fd = mkstemp(template);
97 if (fd < 0) {
98 perror("mkstemp");
99 return NULL;
100 }
101 unlink(template);
102
103 ret = ftruncate(fd, size);
104 if (ret < 0) {
105 perror("ftruncate");
106 close(fd);
107 return NULL;
108 }
109
110 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
111 if (addr == MAP_FAILED) {
112 perror("mmap");
113 close(fd);
114 return NULL;
115 }
116
117 close(fd);
118 return addr;
119 }
120
121 /*
122 * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
123 * but probably should augment it to test 253 and 254, specifically.
124 */
test_max_fds(int uring_fd)125 static int test_max_fds(int uring_fd)
126 {
127 int status = 1;
128 int ret;
129 void *fd_as; /* file descriptor address space */
130 int fdtable_fd; /* fd for the file that will be mapped over and over */
131 int io_fd; /* the valid fd for I/O -- /dev/null */
132 int *fds; /* used to map the file into the address space */
133 char template[32] = "io_uring_register-test-XXXXXXXX";
134 unsigned long long i, nr_maps, nr_fds;
135
136 /*
137 * First, mmap anonymous the full size. That will guarantee the
138 * mapping will fit in the memory area selected by mmap. Then,
139 * over-write that mapping using a file-backed mapping, 128MiB at
140 * a time using MAP_FIXED.
141 */
142 fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
143 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
144 if (fd_as == MAP_FAILED) {
145 if (errno == ENOMEM)
146 return 0;
147 perror("mmap fd_as");
148 exit(1);
149 }
150
151 fdtable_fd = mkstemp(template);
152 if (fdtable_fd < 0) {
153 perror("mkstemp");
154 exit(1);
155 }
156 unlink(template);
157 ret = ftruncate(fdtable_fd, 128*1024*1024);
158 if (ret < 0) {
159 perror("ftruncate");
160 exit(1);
161 }
162
163 io_fd = open("/dev/null", O_RDWR);
164 if (io_fd < 0) {
165 perror("open /dev/null");
166 exit(1);
167 }
168 fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
169 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
170 if (fds == MAP_FAILED) {
171 perror("mmap fdtable");
172 exit(1);
173 }
174
175 /* fill the fd table */
176 nr_fds = 128*1024*1024 / sizeof(int);
177 for (i = 0; i < nr_fds; i++)
178 fds[i] = io_fd;
179
180 /* map the file through the rest of the address space */
181 nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
182 for (i = 0; i < nr_maps; i++) {
183 fds = &fds[nr_fds]; /* advance fds by 128MiB */
184 fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
185 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
186 if (fds == MAP_FAILED) {
187 fprintf(stderr, "mmap failed at offset %lu\n",
188 (unsigned long)((char *)fd_as - (char *)fds));
189 exit(1);
190 }
191 }
192
193 /* Now fd_as points to the file descriptor array. */
194 /*
195 * We may not be able to map all of these files. Let's back off
196 * until success.
197 */
198 nr_fds = UINT_MAX;
199 while (nr_fds) {
200 ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES,
201 fd_as, nr_fds);
202 if (ret != 0) {
203 nr_fds /= 2;
204 continue;
205 }
206 status = 0;
207 ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES,
208 0, 0);
209 if (ret < 0) {
210 ret = errno;
211 errno = ret;
212 perror("io_uring_register UNREGISTER_FILES");
213 exit(1);
214 }
215 break;
216 }
217
218 close(io_fd);
219 close(fdtable_fd);
220 ret = munmap(fd_as, UINT_MAX * sizeof(int));
221 if (ret != 0) {
222 fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
223 exit(1);
224 }
225
226 return status;
227 }
228
test_memlock_exceeded(int fd)229 static int test_memlock_exceeded(int fd)
230 {
231 int ret;
232 void *buf;
233 struct iovec iov;
234
235 /* if limit is larger than 2gb, just skip this test */
236 if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
237 return 0;
238
239 iov.iov_len = mlock_limit * 2;
240 buf = t_malloc(iov.iov_len);
241 iov.iov_base = buf;
242
243 while (iov.iov_len) {
244 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
245 if (ret < 0) {
246 if (errno == ENOMEM) {
247 iov.iov_len /= 2;
248 continue;
249 }
250 if (errno == EFAULT) {
251 free(buf);
252 return 0;
253 }
254 fprintf(stderr, "expected success or EFAULT, got %d\n", errno);
255 free(buf);
256 return 1;
257 }
258 ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
259 NULL, 0);
260 if (ret != 0) {
261 fprintf(stderr, "error: unregister failed with %d\n", errno);
262 free(buf);
263 return 1;
264 }
265 break;
266 }
267 if (!iov.iov_len)
268 printf("Unable to register buffers. Check memlock rlimit.\n");
269
270 free(buf);
271 return 0;
272 }
273
test_iovec_nr(int fd)274 static int test_iovec_nr(int fd)
275 {
276 int i, ret, status = 0;
277 unsigned int nr = 1000000;
278 struct iovec *iovs;
279 void *buf;
280
281 iovs = malloc(nr * sizeof(struct iovec));
282 if (!iovs) {
283 fprintf(stdout, "can't allocate iovecs, skip\n");
284 return 0;
285 }
286 buf = t_malloc(pagesize);
287
288 for (i = 0; i < nr; i++) {
289 iovs[i].iov_base = buf;
290 iovs[i].iov_len = pagesize;
291 }
292
293 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
294
295 /* reduce to UIO_MAXIOV */
296 nr = UIO_MAXIOV;
297 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
298 if (ret && (errno == ENOMEM || errno == EPERM) && geteuid()) {
299 fprintf(stderr, "can't register large iovec for regular users, skip\n");
300 } else if (ret != 0) {
301 fprintf(stderr, "expected success, got %d\n", errno);
302 status = 1;
303 } else {
304 __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
305 }
306 free(buf);
307 free(iovs);
308 return status;
309 }
310
311 /*
312 * io_uring limit is 1G. iov_len limit is ~OUL, I think
313 */
test_iovec_size(int fd)314 static int test_iovec_size(int fd)
315 {
316 unsigned int status = 0;
317 int ret;
318 struct iovec iov;
319 void *buf;
320
321 /* NULL pointer for base */
322 iov.iov_base = 0;
323 iov.iov_len = 4096;
324 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
325
326 /* valid base, 0 length */
327 iov.iov_base = &buf;
328 iov.iov_len = 0;
329 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
330
331 /* valid base, length exceeds size */
332 /* this requires an unampped page directly after buf */
333 buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
334 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
335 assert(buf != MAP_FAILED);
336 ret = munmap(buf + pagesize, pagesize);
337 assert(ret == 0);
338 iov.iov_base = buf;
339 iov.iov_len = 2 * pagesize;
340 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
341 munmap(buf, pagesize);
342
343 /* huge page */
344 buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
345 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
346 -1, 0);
347 if (buf == MAP_FAILED) {
348 printf("Unable to map a huge page. Try increasing "
349 "/proc/sys/vm/nr_hugepages by at least 1.\n");
350 printf("Skipping the hugepage test\n");
351 } else {
352 /*
353 * This should succeed, so long as RLIMIT_MEMLOCK is
354 * not exceeded
355 */
356 iov.iov_base = buf;
357 iov.iov_len = 2*1024*1024;
358 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
359 if (ret < 0) {
360 if (errno == ENOMEM)
361 printf("Unable to test registering of a huge "
362 "page. Try increasing the "
363 "RLIMIT_MEMLOCK resource limit by at "
364 "least 2MB.");
365 else {
366 fprintf(stderr, "expected success, got %d\n", errno);
367 status = 1;
368 }
369 } else {
370 ret = __sys_io_uring_register(fd,
371 IORING_UNREGISTER_BUFFERS, 0, 0);
372 if (ret < 0) {
373 perror("io_uring_unregister");
374 status = 1;
375 }
376 }
377 }
378 ret = munmap(iov.iov_base, iov.iov_len);
379 assert(ret == 0);
380
381 /* file-backed buffers -- not supported */
382 buf = map_filebacked(2*1024*1024);
383 if (!buf)
384 status = 1;
385 iov.iov_base = buf;
386 iov.iov_len = 2*1024*1024;
387 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP);
388 munmap(buf, 2*1024*1024);
389
390 /* bump up against the soft limit and make sure we get EFAULT
391 * or whatever we're supposed to get. NOTE: this requires
392 * running the test as non-root. */
393 if (getuid() != 0)
394 status |= test_memlock_exceeded(fd);
395
396 return status;
397 }
398
ioring_poll(struct io_uring * ring,int fd,int fixed)399 static int ioring_poll(struct io_uring *ring, int fd, int fixed)
400 {
401 int ret;
402 struct io_uring_sqe *sqe;
403 struct io_uring_cqe *cqe;
404
405 sqe = io_uring_get_sqe(ring);
406 memset(sqe, 0, sizeof(*sqe));
407 sqe->opcode = IORING_OP_POLL_ADD;
408 if (fixed)
409 sqe->flags = IOSQE_FIXED_FILE;
410 sqe->fd = fd;
411 sqe->poll_events = POLLIN|POLLOUT;
412
413 ret = io_uring_submit(ring);
414 if (ret != 1) {
415 fprintf(stderr, "failed to submit poll sqe: %d.\n", errno);
416 return 1;
417 }
418
419 ret = io_uring_wait_cqe(ring, &cqe);
420 if (ret < 0) {
421 fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
422 return 1;
423 }
424 ret = 0;
425 if (cqe->res != POLLOUT) {
426 fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
427 POLLOUT, cqe->res);
428 ret = 1;
429 }
430
431 io_uring_cqe_seen(ring, cqe);
432 return ret;
433 }
434
test_poll_ringfd(void)435 static int test_poll_ringfd(void)
436 {
437 int status = 0;
438 int ret;
439 int fd;
440 struct io_uring ring;
441
442 ret = io_uring_queue_init(1, &ring, 0);
443 if (ret) {
444 perror("io_uring_queue_init");
445 return 1;
446 }
447 fd = ring.ring_fd;
448
449 /* try polling the ring fd */
450 status = ioring_poll(&ring, fd, 0);
451
452 /*
453 * now register the ring fd, and try the poll again. This should
454 * fail, because the kernel does not allow registering of the
455 * ring_fd.
456 */
457 status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF);
458
459 /* tear down queue */
460 io_uring_queue_exit(&ring);
461
462 return status;
463 }
464
test_shmem(void)465 static int test_shmem(void)
466 {
467 const char pattern = 0xEA;
468 const int len = 4096;
469 struct io_uring_sqe *sqe;
470 struct io_uring_cqe *cqe;
471 struct io_uring ring;
472 struct iovec iov;
473 int memfd, ret, i;
474 char *mem;
475 int pipefd[2] = {-1, -1};
476
477 ret = io_uring_queue_init(8, &ring, 0);
478 if (ret)
479 return 1;
480
481 if (pipe(pipefd)) {
482 perror("pipe");
483 return 1;
484 }
485 memfd = memfd_create("uring-shmem-test", 0);
486 if (memfd < 0) {
487 fprintf(stderr, "memfd_create() failed %i\n", -errno);
488 return 1;
489 }
490 if (ftruncate(memfd, len)) {
491 fprintf(stderr, "can't truncate memfd\n");
492 return 1;
493 }
494 mem = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, memfd, 0);
495 if (!mem) {
496 fprintf(stderr, "mmap failed\n");
497 return 1;
498 }
499 for (i = 0; i < len; i++)
500 mem[i] = pattern;
501
502 iov.iov_base = mem;
503 iov.iov_len = len;
504 ret = io_uring_register_buffers(&ring, &iov, 1);
505 if (ret) {
506 if (ret == -EOPNOTSUPP) {
507 fprintf(stdout, "memfd registration isn't supported, "
508 "skip\n");
509 goto out;
510 }
511
512 fprintf(stderr, "buffer reg failed: %d\n", ret);
513 return 1;
514 }
515
516 /* check that we can read and write from/to shmem reg buffer */
517 sqe = io_uring_get_sqe(&ring);
518 io_uring_prep_write_fixed(sqe, pipefd[1], mem, 512, 0, 0);
519 sqe->user_data = 1;
520
521 ret = io_uring_submit(&ring);
522 if (ret != 1) {
523 fprintf(stderr, "submit write failed\n");
524 return 1;
525 }
526 ret = io_uring_wait_cqe(&ring, &cqe);
527 if (ret < 0 || cqe->user_data != 1 || cqe->res != 512) {
528 fprintf(stderr, "reading from shmem failed\n");
529 return 1;
530 }
531 io_uring_cqe_seen(&ring, cqe);
532
533 /* clean it, should be populated with the pattern back from the pipe */
534 memset(mem, 0, 512);
535 sqe = io_uring_get_sqe(&ring);
536 io_uring_prep_read_fixed(sqe, pipefd[0], mem, 512, 0, 0);
537 sqe->user_data = 2;
538
539 ret = io_uring_submit(&ring);
540 if (ret != 1) {
541 fprintf(stderr, "submit write failed\n");
542 return 1;
543 }
544 ret = io_uring_wait_cqe(&ring, &cqe);
545 if (ret < 0 || cqe->user_data != 2 || cqe->res != 512) {
546 fprintf(stderr, "reading from shmem failed\n");
547 return 1;
548 }
549 io_uring_cqe_seen(&ring, cqe);
550
551 for (i = 0; i < 512; i++) {
552 if (mem[i] != pattern) {
553 fprintf(stderr, "data integrity fail\n");
554 return 1;
555 }
556 }
557
558 ret = io_uring_unregister_buffers(&ring);
559 if (ret) {
560 fprintf(stderr, "buffer unreg failed: %d\n", ret);
561 return 1;
562 }
563 out:
564 io_uring_queue_exit(&ring);
565 close(pipefd[0]);
566 close(pipefd[1]);
567 munmap(mem, len);
568 close(memfd);
569 return 0;
570 }
571
main(int argc,char ** argv)572 int main(int argc, char **argv)
573 {
574 int fd, ret;
575 unsigned int status = 0;
576 struct io_uring_params p;
577 struct rlimit rlim;
578
579 if (argc > 1)
580 return 0;
581
582 /* setup globals */
583 pagesize = getpagesize();
584 ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
585 if (ret < 0) {
586 perror("getrlimit");
587 return 1;
588 }
589 mlock_limit = rlim.rlim_cur;
590 devnull = open("/dev/null", O_RDWR);
591 if (devnull < 0) {
592 perror("open /dev/null");
593 exit(1);
594 }
595
596 /* invalid fd */
597 status |= expect_fail(-1, 0, NULL, 0, EBADF);
598 /* valid fd that is not an io_uring fd */
599 status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP);
600
601 /* invalid opcode */
602 memset(&p, 0, sizeof(p));
603 fd = new_io_uring(1, &p);
604 ret = expect_fail(fd, ~0U, NULL, 0, EINVAL);
605 if (ret) {
606 /* if this succeeds, tear down the io_uring instance
607 * and start clean for the next test. */
608 close(fd);
609 fd = new_io_uring(1, &p);
610 }
611
612 /* IORING_REGISTER_BUFFERS */
613 status |= test_iovec_size(fd);
614 status |= test_iovec_nr(fd);
615 /* IORING_REGISTER_FILES */
616 status |= test_max_fds(fd);
617 close(fd);
618 /* uring poll on the uring fd */
619 status |= test_poll_ringfd();
620
621 if (status)
622 fprintf(stderr, "FAIL\n");
623
624 ret = test_shmem();
625 if (ret) {
626 fprintf(stderr, "test_shmem() failed\n");
627 status |= 1;
628 }
629
630 return status;
631 }
632