1 /*
2 * Copyright 2020 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <xf86drm.h>
14
15 #ifdef MAJOR_IN_MKDEV
16 #include <sys/mkdev.h>
17 #endif
18 #ifdef MAJOR_IN_SYSMACROS
19 #include <sys/sysmacros.h>
20 #endif
21
22 #include "drm-uapi/virtgpu_drm.h"
23 #include "util/sparse_array.h"
24 #define VIRGL_RENDERER_UNSTABLE_APIS
25 #include "virtio-gpu/virglrenderer_hw.h"
26
27 #include "vn_renderer_internal.h"
28
29 #ifndef VIRTGPU_PARAM_GUEST_VRAM
30 /* All guest allocations happen via virtgpu dedicated heap. */
31 #define VIRTGPU_PARAM_GUEST_VRAM 9
32 #endif
33
34 #ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
35 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
36 #endif
37
38 /* XXX comment these out to really use kernel uapi */
39 #define SIMULATE_BO_SIZE_FIX 1
40 #define SIMULATE_SYNCOBJ 1
41 #define SIMULATE_SUBMIT 1
42
43 #define VIRTGPU_PCI_VENDOR_ID 0x1af4
44 #define VIRTGPU_PCI_DEVICE_ID 0x1050
45
46 struct virtgpu;
47
48 struct virtgpu_shmem {
49 struct vn_renderer_shmem base;
50 uint32_t gem_handle;
51 };
52
53 struct virtgpu_bo {
54 struct vn_renderer_bo base;
55 uint32_t gem_handle;
56 uint32_t blob_flags;
57 };
58
59 struct virtgpu_sync {
60 struct vn_renderer_sync base;
61
62 /*
63 * drm_syncobj is in one of these states
64 *
65 * - value N: drm_syncobj has a signaled fence chain with seqno N
66 * - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
67 * (which may point to another unsignaled fence chain with
68 * seqno between N and M, and so on)
69 *
70 * TODO Do we want to use binary drm_syncobjs? They would be
71 *
72 * - value 0: drm_syncobj has no fence
73 * - value 1: drm_syncobj has a signaled fence with seqno 0
74 *
75 * They are cheaper but require special care.
76 */
77 uint32_t syncobj_handle;
78 };
79
80 struct virtgpu {
81 struct vn_renderer base;
82
83 struct vn_instance *instance;
84
85 int fd;
86
87 bool has_primary;
88 int primary_major;
89 int primary_minor;
90 int render_major;
91 int render_minor;
92
93 int bustype;
94 drmPciBusInfo pci_bus_info;
95
96 uint32_t max_timeline_count;
97
98 struct {
99 enum virgl_renderer_capset id;
100 uint32_t version;
101 struct virgl_renderer_capset_venus data;
102 } capset;
103
104 uint32_t shmem_blob_mem;
105 uint32_t bo_blob_mem;
106
107 /* note that we use gem_handle instead of res_id to index because
108 * res_id is monotonically increasing by default (see
109 * virtio_gpu_resource_id_get)
110 */
111 struct util_sparse_array shmem_array;
112 struct util_sparse_array bo_array;
113
114 mtx_t dma_buf_import_mutex;
115
116 struct vn_renderer_shmem_cache shmem_cache;
117
118 bool supports_cross_device;
119 };
120
121 #ifdef SIMULATE_SYNCOBJ
122
123 #include "util/hash_table.h"
124 #include "util/u_idalloc.h"
125
126 static struct {
127 mtx_t mutex;
128 struct hash_table *syncobjs;
129 struct util_idalloc ida;
130
131 int signaled_fd;
132 } sim;
133
134 struct sim_syncobj {
135 mtx_t mutex;
136 uint64_t point;
137
138 int pending_fd;
139 uint64_t pending_point;
140 bool pending_cpu;
141 };
142
143 static uint32_t
sim_syncobj_create(struct virtgpu * gpu,bool signaled)144 sim_syncobj_create(struct virtgpu *gpu, bool signaled)
145 {
146 struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
147 if (!syncobj)
148 return 0;
149
150 mtx_init(&syncobj->mutex, mtx_plain);
151 syncobj->pending_fd = -1;
152
153 mtx_lock(&sim.mutex);
154
155 /* initialize lazily */
156 if (!sim.syncobjs) {
157 sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
158 if (!sim.syncobjs) {
159 mtx_unlock(&sim.mutex);
160 return 0;
161 }
162
163 util_idalloc_init(&sim.ida, 32);
164
165 struct drm_virtgpu_execbuffer args = {
166 .flags = VIRTGPU_EXECBUF_RING_IDX | VIRTGPU_EXECBUF_FENCE_FD_OUT,
167 .ring_idx = 0, /* CPU ring */
168 };
169 int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
170 if (ret || args.fence_fd < 0) {
171 _mesa_hash_table_destroy(sim.syncobjs, NULL);
172 sim.syncobjs = NULL;
173 mtx_unlock(&sim.mutex);
174 return 0;
175 }
176
177 sim.signaled_fd = args.fence_fd;
178 }
179
180 const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
181 _mesa_hash_table_insert(sim.syncobjs,
182 (const void *)(uintptr_t)syncobj_handle, syncobj);
183
184 mtx_unlock(&sim.mutex);
185
186 return syncobj_handle;
187 }
188
189 static void
sim_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)190 sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
191 {
192 struct sim_syncobj *syncobj = NULL;
193
194 mtx_lock(&sim.mutex);
195
196 struct hash_entry *entry = _mesa_hash_table_search(
197 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
198 if (entry) {
199 syncobj = entry->data;
200 _mesa_hash_table_remove(sim.syncobjs, entry);
201 util_idalloc_free(&sim.ida, syncobj_handle - 1);
202 }
203
204 mtx_unlock(&sim.mutex);
205
206 if (syncobj) {
207 if (syncobj->pending_fd >= 0)
208 close(syncobj->pending_fd);
209 mtx_destroy(&syncobj->mutex);
210 free(syncobj);
211 }
212 }
213
214 static VkResult
sim_syncobj_poll(int fd,int poll_timeout)215 sim_syncobj_poll(int fd, int poll_timeout)
216 {
217 struct pollfd pollfd = {
218 .fd = fd,
219 .events = POLLIN,
220 };
221 int ret;
222 do {
223 ret = poll(&pollfd, 1, poll_timeout);
224 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
225
226 if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
227 return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
228 : VK_ERROR_DEVICE_LOST;
229 }
230
231 return ret ? VK_SUCCESS : VK_TIMEOUT;
232 }
233
234 static void
sim_syncobj_set_point_locked(struct sim_syncobj * syncobj,uint64_t point)235 sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
236 {
237 syncobj->point = point;
238
239 if (syncobj->pending_fd >= 0) {
240 close(syncobj->pending_fd);
241 syncobj->pending_fd = -1;
242 syncobj->pending_point = point;
243 }
244 }
245
246 static void
sim_syncobj_update_point_locked(struct sim_syncobj * syncobj,int poll_timeout)247 sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
248 {
249 if (syncobj->pending_fd >= 0) {
250 VkResult result;
251 if (syncobj->pending_cpu) {
252 if (poll_timeout == -1) {
253 const int max_cpu_timeout = 2000;
254 poll_timeout = max_cpu_timeout;
255 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
256 if (result == VK_TIMEOUT) {
257 vn_log(NULL, "cpu sync timed out after %dms; ignoring",
258 poll_timeout);
259 result = VK_SUCCESS;
260 }
261 } else {
262 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
263 }
264 } else {
265 result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
266 }
267 if (result == VK_SUCCESS) {
268 close(syncobj->pending_fd);
269 syncobj->pending_fd = -1;
270 syncobj->point = syncobj->pending_point;
271 }
272 }
273 }
274
275 static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu * gpu,uint32_t syncobj_handle)276 sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
277 {
278 struct sim_syncobj *syncobj = NULL;
279
280 mtx_lock(&sim.mutex);
281 struct hash_entry *entry = _mesa_hash_table_search(
282 sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
283 if (entry)
284 syncobj = entry->data;
285 mtx_unlock(&sim.mutex);
286
287 return syncobj;
288 }
289
290 static int
sim_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)291 sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
292 {
293 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
294 if (!syncobj)
295 return -1;
296
297 mtx_lock(&syncobj->mutex);
298 sim_syncobj_set_point_locked(syncobj, 0);
299 mtx_unlock(&syncobj->mutex);
300
301 return 0;
302 }
303
304 static int
sim_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)305 sim_syncobj_query(struct virtgpu *gpu,
306 uint32_t syncobj_handle,
307 uint64_t *point)
308 {
309 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
310 if (!syncobj)
311 return -1;
312
313 mtx_lock(&syncobj->mutex);
314 sim_syncobj_update_point_locked(syncobj, 0);
315 *point = syncobj->point;
316 mtx_unlock(&syncobj->mutex);
317
318 return 0;
319 }
320
321 static int
sim_syncobj_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)322 sim_syncobj_signal(struct virtgpu *gpu,
323 uint32_t syncobj_handle,
324 uint64_t point)
325 {
326 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
327 if (!syncobj)
328 return -1;
329
330 mtx_lock(&syncobj->mutex);
331 sim_syncobj_set_point_locked(syncobj, point);
332 mtx_unlock(&syncobj->mutex);
333
334 return 0;
335 }
336
337 static int
sim_syncobj_submit(struct virtgpu * gpu,uint32_t syncobj_handle,int sync_fd,uint64_t point,bool cpu)338 sim_syncobj_submit(struct virtgpu *gpu,
339 uint32_t syncobj_handle,
340 int sync_fd,
341 uint64_t point,
342 bool cpu)
343 {
344 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
345 if (!syncobj)
346 return -1;
347
348 int pending_fd = dup(sync_fd);
349 if (pending_fd < 0) {
350 vn_log(gpu->instance, "failed to dup sync fd");
351 return -1;
352 }
353
354 mtx_lock(&syncobj->mutex);
355
356 if (syncobj->pending_fd >= 0) {
357 mtx_unlock(&syncobj->mutex);
358
359 /* TODO */
360 vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
361 close(pending_fd);
362 return -1;
363 }
364 if (syncobj->point >= point)
365 vn_log(gpu->instance, "non-monotonic signaling");
366
367 syncobj->pending_fd = pending_fd;
368 syncobj->pending_point = point;
369 syncobj->pending_cpu = cpu;
370
371 mtx_unlock(&syncobj->mutex);
372
373 return 0;
374 }
375
376 static int
timeout_to_poll_timeout(uint64_t timeout)377 timeout_to_poll_timeout(uint64_t timeout)
378 {
379 const uint64_t ns_per_ms = 1000000;
380 const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
381 if (!ms && timeout)
382 return -1;
383 return ms <= INT_MAX ? ms : -1;
384 }
385
386 static int
sim_syncobj_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)387 sim_syncobj_wait(struct virtgpu *gpu,
388 const struct vn_renderer_wait *wait,
389 bool wait_avail)
390 {
391 if (wait_avail)
392 return -1;
393
394 const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
395
396 /* TODO poll all fds at the same time */
397 for (uint32_t i = 0; i < wait->sync_count; i++) {
398 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
399 const uint64_t point = wait->sync_values[i];
400
401 struct sim_syncobj *syncobj =
402 sim_syncobj_lookup(gpu, sync->syncobj_handle);
403 if (!syncobj)
404 return -1;
405
406 mtx_lock(&syncobj->mutex);
407
408 if (syncobj->point < point)
409 sim_syncobj_update_point_locked(syncobj, poll_timeout);
410
411 if (syncobj->point < point) {
412 if (wait->wait_any && i < wait->sync_count - 1 &&
413 syncobj->pending_fd < 0) {
414 mtx_unlock(&syncobj->mutex);
415 continue;
416 }
417 errno = ETIME;
418 mtx_unlock(&syncobj->mutex);
419 return -1;
420 }
421
422 mtx_unlock(&syncobj->mutex);
423
424 if (wait->wait_any)
425 break;
426
427 /* TODO adjust poll_timeout */
428 }
429
430 return 0;
431 }
432
433 static int
sim_syncobj_export(struct virtgpu * gpu,uint32_t syncobj_handle)434 sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
435 {
436 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
437 if (!syncobj)
438 return -1;
439
440 int fd = -1;
441 mtx_lock(&syncobj->mutex);
442 if (syncobj->pending_fd >= 0)
443 fd = dup(syncobj->pending_fd);
444 else
445 fd = dup(sim.signaled_fd);
446 mtx_unlock(&syncobj->mutex);
447
448 return fd;
449 }
450
451 static uint32_t
sim_syncobj_import(struct virtgpu * gpu,uint32_t syncobj_handle,int fd)452 sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
453 {
454 struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
455 if (!syncobj)
456 return 0;
457
458 if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
459 return 0;
460
461 return syncobj_handle;
462 }
463
464 #endif /* SIMULATE_SYNCOBJ */
465
466 #ifdef SIMULATE_SUBMIT
467
468 static int
sim_submit_signal_syncs(struct virtgpu * gpu,int sync_fd,struct vn_renderer_sync * const * syncs,const uint64_t * sync_values,uint32_t sync_count,bool cpu)469 sim_submit_signal_syncs(struct virtgpu *gpu,
470 int sync_fd,
471 struct vn_renderer_sync *const *syncs,
472 const uint64_t *sync_values,
473 uint32_t sync_count,
474 bool cpu)
475 {
476 for (uint32_t i = 0; i < sync_count; i++) {
477 struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
478 const uint64_t pending_point = sync_values[i];
479
480 #ifdef SIMULATE_SYNCOBJ
481 int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
482 pending_point, cpu);
483 if (ret)
484 return ret;
485 #else
486 /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
487 * DRM_IOCTL_SYNCOBJ_TRANSFER
488 */
489 return -1;
490 #endif
491 }
492
493 return 0;
494 }
495
496 static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo * const * bos,uint32_t bo_count)497 sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
498 uint32_t bo_count)
499 {
500 uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
501 if (!gem_handles)
502 return NULL;
503
504 for (uint32_t i = 0; i < bo_count; i++) {
505 struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
506 gem_handles[i] = bo->gem_handle;
507 }
508
509 return gem_handles;
510 }
511
512 static int
sim_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)513 sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
514 {
515 /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
516 uint32_t *gem_handles = NULL;
517 if (submit->bo_count) {
518 gem_handles =
519 sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
520 if (!gem_handles)
521 return -1;
522 }
523
524 assert(submit->batch_count);
525
526 int ret = 0;
527 for (uint32_t i = 0; i < submit->batch_count; i++) {
528 const struct vn_renderer_submit_batch *batch = &submit->batches[i];
529
530 struct drm_virtgpu_execbuffer args = {
531 .flags = VIRTGPU_EXECBUF_RING_IDX |
532 (batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0),
533 .size = batch->cs_size,
534 .command = (uintptr_t)batch->cs_data,
535 .bo_handles = (uintptr_t)gem_handles,
536 .num_bo_handles = submit->bo_count,
537 .ring_idx = batch->ring_idx,
538 };
539
540 ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
541 if (ret) {
542 vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
543 break;
544 }
545
546 if (batch->sync_count) {
547 ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
548 batch->sync_values, batch->sync_count,
549 batch->ring_idx == 0);
550 close(args.fence_fd);
551 if (ret)
552 break;
553 }
554 }
555
556 free(gem_handles);
557 return ret;
558 }
559
560 #endif /* SIMULATE_SUBMIT */
561
562 static int
virtgpu_ioctl(struct virtgpu * gpu,unsigned long request,void * args)563 virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
564 {
565 return drmIoctl(gpu->fd, request, args);
566 }
567
568 static uint64_t
virtgpu_ioctl_getparam(struct virtgpu * gpu,uint64_t param)569 virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
570 {
571 /* val must be zeroed because kernel only writes the lower 32 bits */
572 uint64_t val = 0;
573 struct drm_virtgpu_getparam args = {
574 .param = param,
575 .value = (uintptr_t)&val,
576 };
577
578 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
579 return ret ? 0 : val;
580 }
581
582 static int
virtgpu_ioctl_get_caps(struct virtgpu * gpu,enum virgl_renderer_capset id,uint32_t version,void * capset,size_t capset_size)583 virtgpu_ioctl_get_caps(struct virtgpu *gpu,
584 enum virgl_renderer_capset id,
585 uint32_t version,
586 void *capset,
587 size_t capset_size)
588 {
589 struct drm_virtgpu_get_caps args = {
590 .cap_set_id = id,
591 .cap_set_ver = version,
592 .addr = (uintptr_t)capset,
593 .size = capset_size,
594 };
595
596 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
597 }
598
599 static int
virtgpu_ioctl_context_init(struct virtgpu * gpu,enum virgl_renderer_capset capset_id)600 virtgpu_ioctl_context_init(struct virtgpu *gpu,
601 enum virgl_renderer_capset capset_id)
602 {
603 struct drm_virtgpu_context_set_param ctx_set_params[3] = {
604 {
605 .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
606 .value = capset_id,
607 },
608 {
609 .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
610 .value = 64,
611 },
612 {
613 .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
614 .value = 0, /* don't generate drm_events on fence signaling */
615 },
616 };
617
618 struct drm_virtgpu_context_init args = {
619 .num_params = ARRAY_SIZE(ctx_set_params),
620 .ctx_set_params = (uintptr_t)&ctx_set_params,
621 };
622
623 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
624 }
625
626 static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu,uint32_t blob_mem,uint32_t blob_flags,size_t blob_size,uint64_t blob_id,uint32_t * res_id)627 virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
628 uint32_t blob_mem,
629 uint32_t blob_flags,
630 size_t blob_size,
631 uint64_t blob_id,
632 uint32_t *res_id)
633 {
634 #ifdef SIMULATE_BO_SIZE_FIX
635 blob_size = align64(blob_size, 4096);
636 #endif
637
638 struct drm_virtgpu_resource_create_blob args = {
639 .blob_mem = blob_mem,
640 .blob_flags = blob_flags,
641 .size = blob_size,
642 .blob_id = blob_id,
643 };
644
645 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
646 return 0;
647
648 *res_id = args.res_handle;
649 return args.bo_handle;
650 }
651
652 static int
virtgpu_ioctl_resource_info(struct virtgpu * gpu,uint32_t gem_handle,struct drm_virtgpu_resource_info * info)653 virtgpu_ioctl_resource_info(struct virtgpu *gpu,
654 uint32_t gem_handle,
655 struct drm_virtgpu_resource_info *info)
656 {
657 *info = (struct drm_virtgpu_resource_info){
658 .bo_handle = gem_handle,
659 };
660
661 return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
662 }
663
664 static void
virtgpu_ioctl_gem_close(struct virtgpu * gpu,uint32_t gem_handle)665 virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
666 {
667 struct drm_gem_close args = {
668 .handle = gem_handle,
669 };
670
671 ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
672 assert(!ret);
673 }
674
675 static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu * gpu,uint32_t gem_handle,bool mappable)676 virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
677 uint32_t gem_handle,
678 bool mappable)
679 {
680 struct drm_prime_handle args = {
681 .handle = gem_handle,
682 .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
683 };
684
685 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
686 return ret ? -1 : args.fd;
687 }
688
689 static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu * gpu,int fd)690 virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
691 {
692 struct drm_prime_handle args = {
693 .fd = fd,
694 };
695
696 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
697 return ret ? 0 : args.handle;
698 }
699
700 static void *
virtgpu_ioctl_map(struct virtgpu * gpu,uint32_t gem_handle,size_t size)701 virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
702 {
703 struct drm_virtgpu_map args = {
704 .handle = gem_handle,
705 };
706
707 if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
708 return NULL;
709
710 void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
711 args.offset);
712 if (ptr == MAP_FAILED)
713 return NULL;
714
715 return ptr;
716 }
717
718 static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu * gpu,bool signaled)719 virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
720 {
721 #ifdef SIMULATE_SYNCOBJ
722 return sim_syncobj_create(gpu, signaled);
723 #endif
724
725 struct drm_syncobj_create args = {
726 .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
727 };
728
729 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
730 return ret ? 0 : args.handle;
731 }
732
733 static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)734 virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
735 {
736 #ifdef SIMULATE_SYNCOBJ
737 sim_syncobj_destroy(gpu, syncobj_handle);
738 return;
739 #endif
740
741 struct drm_syncobj_destroy args = {
742 .handle = syncobj_handle,
743 };
744
745 ASSERTED const int ret =
746 virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
747 assert(!ret);
748 }
749
750 static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu * gpu,uint32_t syncobj_handle,bool sync_file)751 virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
752 uint32_t syncobj_handle,
753 bool sync_file)
754 {
755 #ifdef SIMULATE_SYNCOBJ
756 return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
757 #endif
758
759 struct drm_syncobj_handle args = {
760 .handle = syncobj_handle,
761 .flags =
762 sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
763 };
764
765 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
766 if (ret)
767 return -1;
768
769 return args.fd;
770 }
771
772 static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu * gpu,int fd,uint32_t syncobj_handle)773 virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
774 int fd,
775 uint32_t syncobj_handle)
776 {
777 #ifdef SIMULATE_SYNCOBJ
778 return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
779 #endif
780
781 struct drm_syncobj_handle args = {
782 .handle = syncobj_handle,
783 .flags =
784 syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
785 .fd = fd,
786 };
787
788 int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
789 if (ret)
790 return 0;
791
792 return args.handle;
793 }
794
795 static int
virtgpu_ioctl_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)796 virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
797 {
798 #ifdef SIMULATE_SYNCOBJ
799 return sim_syncobj_reset(gpu, syncobj_handle);
800 #endif
801
802 struct drm_syncobj_array args = {
803 .handles = (uintptr_t)&syncobj_handle,
804 .count_handles = 1,
805 };
806
807 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
808 }
809
810 static int
virtgpu_ioctl_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)811 virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
812 uint32_t syncobj_handle,
813 uint64_t *point)
814 {
815 #ifdef SIMULATE_SYNCOBJ
816 return sim_syncobj_query(gpu, syncobj_handle, point);
817 #endif
818
819 struct drm_syncobj_timeline_array args = {
820 .handles = (uintptr_t)&syncobj_handle,
821 .points = (uintptr_t)point,
822 .count_handles = 1,
823 };
824
825 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
826 }
827
828 static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)829 virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
830 uint32_t syncobj_handle,
831 uint64_t point)
832 {
833 #ifdef SIMULATE_SYNCOBJ
834 return sim_syncobj_signal(gpu, syncobj_handle, point);
835 #endif
836
837 struct drm_syncobj_timeline_array args = {
838 .handles = (uintptr_t)&syncobj_handle,
839 .points = (uintptr_t)&point,
840 .count_handles = 1,
841 };
842
843 return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
844 }
845
846 static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)847 virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
848 const struct vn_renderer_wait *wait,
849 bool wait_avail)
850 {
851 #ifdef SIMULATE_SYNCOBJ
852 return sim_syncobj_wait(gpu, wait, wait_avail);
853 #endif
854
855 /* always enable wait-before-submit */
856 uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
857 if (!wait->wait_any)
858 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
859 /* wait for fences to appear instead of signaling */
860 if (wait_avail)
861 flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
862
863 /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
864 uint32_t *syncobj_handles =
865 malloc(sizeof(*syncobj_handles) * wait->sync_count);
866 if (!syncobj_handles)
867 return -1;
868 for (uint32_t i = 0; i < wait->sync_count; i++) {
869 struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
870 syncobj_handles[i] = sync->syncobj_handle;
871 }
872
873 struct drm_syncobj_timeline_wait args = {
874 .handles = (uintptr_t)syncobj_handles,
875 .points = (uintptr_t)wait->sync_values,
876 .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
877 .count_handles = wait->sync_count,
878 .flags = flags,
879 };
880
881 const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
882
883 free(syncobj_handles);
884
885 return ret;
886 }
887
888 static int
virtgpu_ioctl_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)889 virtgpu_ioctl_submit(struct virtgpu *gpu,
890 const struct vn_renderer_submit *submit)
891 {
892 #ifdef SIMULATE_SUBMIT
893 return sim_submit(gpu, submit);
894 #endif
895 return -1;
896 }
897
898 static VkResult
virtgpu_sync_write(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t val)899 virtgpu_sync_write(struct vn_renderer *renderer,
900 struct vn_renderer_sync *_sync,
901 uint64_t val)
902 {
903 struct virtgpu *gpu = (struct virtgpu *)renderer;
904 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
905
906 const int ret =
907 virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
908
909 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
910 }
911
912 static VkResult
virtgpu_sync_read(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t * val)913 virtgpu_sync_read(struct vn_renderer *renderer,
914 struct vn_renderer_sync *_sync,
915 uint64_t *val)
916 {
917 struct virtgpu *gpu = (struct virtgpu *)renderer;
918 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
919
920 const int ret =
921 virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
922
923 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
924 }
925
926 static VkResult
virtgpu_sync_reset(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t initial_val)927 virtgpu_sync_reset(struct vn_renderer *renderer,
928 struct vn_renderer_sync *_sync,
929 uint64_t initial_val)
930 {
931 struct virtgpu *gpu = (struct virtgpu *)renderer;
932 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
933
934 int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
935 if (!ret) {
936 ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
937 initial_val);
938 }
939
940 return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
941 }
942
943 static int
virtgpu_sync_export_syncobj(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,bool sync_file)944 virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
945 struct vn_renderer_sync *_sync,
946 bool sync_file)
947 {
948 struct virtgpu *gpu = (struct virtgpu *)renderer;
949 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
950
951 return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
952 sync_file);
953 }
954
955 static void
virtgpu_sync_destroy(struct vn_renderer * renderer,struct vn_renderer_sync * _sync)956 virtgpu_sync_destroy(struct vn_renderer *renderer,
957 struct vn_renderer_sync *_sync)
958 {
959 struct virtgpu *gpu = (struct virtgpu *)renderer;
960 struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
961
962 virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
963
964 free(sync);
965 }
966
967 static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer * renderer,int fd,bool sync_file,struct vn_renderer_sync ** out_sync)968 virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
969 int fd,
970 bool sync_file,
971 struct vn_renderer_sync **out_sync)
972 {
973 struct virtgpu *gpu = (struct virtgpu *)renderer;
974
975 uint32_t syncobj_handle;
976 if (sync_file) {
977 syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
978 if (!syncobj_handle)
979 return VK_ERROR_OUT_OF_HOST_MEMORY;
980 if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
981 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
982 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
983 }
984 } else {
985 syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
986 if (!syncobj_handle)
987 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
988 }
989
990 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
991 if (!sync) {
992 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
993 return VK_ERROR_OUT_OF_HOST_MEMORY;
994 }
995
996 sync->syncobj_handle = syncobj_handle;
997 sync->base.sync_id = 0; /* TODO */
998
999 *out_sync = &sync->base;
1000
1001 return VK_SUCCESS;
1002 }
1003
1004 static VkResult
virtgpu_sync_create(struct vn_renderer * renderer,uint64_t initial_val,uint32_t flags,struct vn_renderer_sync ** out_sync)1005 virtgpu_sync_create(struct vn_renderer *renderer,
1006 uint64_t initial_val,
1007 uint32_t flags,
1008 struct vn_renderer_sync **out_sync)
1009 {
1010 struct virtgpu *gpu = (struct virtgpu *)renderer;
1011
1012 /* TODO */
1013 if (flags & VN_RENDERER_SYNC_SHAREABLE)
1014 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1015
1016 /* always false because we don't use binary drm_syncobjs */
1017 const bool signaled = false;
1018 const uint32_t syncobj_handle =
1019 virtgpu_ioctl_syncobj_create(gpu, signaled);
1020 if (!syncobj_handle)
1021 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1022
1023 /* add a signaled fence chain with seqno initial_val */
1024 const int ret =
1025 virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1026 if (ret) {
1027 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1028 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1029 }
1030
1031 struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1032 if (!sync) {
1033 virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1034 return VK_ERROR_OUT_OF_HOST_MEMORY;
1035 }
1036
1037 sync->syncobj_handle = syncobj_handle;
1038 /* we will have a sync_id when shareable is true and virtio-gpu associates
1039 * a host sync object with guest drm_syncobj
1040 */
1041 sync->base.sync_id = 0;
1042
1043 *out_sync = &sync->base;
1044
1045 return VK_SUCCESS;
1046 }
1047
1048 static void
virtgpu_bo_invalidate(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1049 virtgpu_bo_invalidate(struct vn_renderer *renderer,
1050 struct vn_renderer_bo *bo,
1051 VkDeviceSize offset,
1052 VkDeviceSize size)
1053 {
1054 /* nop because kernel makes every mapping coherent */
1055 }
1056
1057 static void
virtgpu_bo_flush(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1058 virtgpu_bo_flush(struct vn_renderer *renderer,
1059 struct vn_renderer_bo *bo,
1060 VkDeviceSize offset,
1061 VkDeviceSize size)
1062 {
1063 /* nop because kernel makes every mapping coherent */
1064 }
1065
1066 static void *
virtgpu_bo_map(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1067 virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1068 {
1069 struct virtgpu *gpu = (struct virtgpu *)renderer;
1070 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1071 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1072
1073 /* not thread-safe but is fine */
1074 if (!bo->base.mmap_ptr && mappable) {
1075 bo->base.mmap_ptr =
1076 virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1077 }
1078
1079 return bo->base.mmap_ptr;
1080 }
1081
1082 static int
virtgpu_bo_export_dma_buf(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1083 virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1084 struct vn_renderer_bo *_bo)
1085 {
1086 struct virtgpu *gpu = (struct virtgpu *)renderer;
1087 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1088 const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1089 const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1090
1091 return shareable
1092 ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1093 : -1;
1094 }
1095
1096 static bool
virtgpu_bo_destroy(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1097 virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1098 {
1099 struct virtgpu *gpu = (struct virtgpu *)renderer;
1100 struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1101
1102 mtx_lock(&gpu->dma_buf_import_mutex);
1103
1104 /* Check the refcount again after the import lock is grabbed. Yes, we use
1105 * the double-checked locking anti-pattern.
1106 */
1107 if (vn_refcount_is_valid(&bo->base.refcount)) {
1108 mtx_unlock(&gpu->dma_buf_import_mutex);
1109 return false;
1110 }
1111
1112 if (bo->base.mmap_ptr)
1113 munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1114
1115 /* Set gem_handle to 0 to indicate that the bo is invalid. Must be set
1116 * before closing gem handle. Otherwise the same gem handle can be reused
1117 * by another newly created bo and unexpectedly gotten zero'ed out the
1118 * tracked gem handle.
1119 */
1120 const uint32_t gem_handle = bo->gem_handle;
1121 bo->gem_handle = 0;
1122 virtgpu_ioctl_gem_close(gpu, gem_handle);
1123
1124 mtx_unlock(&gpu->dma_buf_import_mutex);
1125
1126 return true;
1127 }
1128
1129 static uint32_t
virtgpu_bo_blob_flags(struct virtgpu * gpu,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles)1130 virtgpu_bo_blob_flags(struct virtgpu *gpu,
1131 VkMemoryPropertyFlags flags,
1132 VkExternalMemoryHandleTypeFlags external_handles)
1133 {
1134 uint32_t blob_flags = 0;
1135 if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1136 blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1137 if (external_handles)
1138 blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1139 if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) {
1140 if (gpu->supports_cross_device)
1141 blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1142 }
1143
1144 return blob_flags;
1145 }
1146
1147 static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer * renderer,VkDeviceSize size,int fd,VkMemoryPropertyFlags flags,struct vn_renderer_bo ** out_bo)1148 virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1149 VkDeviceSize size,
1150 int fd,
1151 VkMemoryPropertyFlags flags,
1152 struct vn_renderer_bo **out_bo)
1153 {
1154 struct virtgpu *gpu = (struct virtgpu *)renderer;
1155 struct drm_virtgpu_resource_info info;
1156 uint32_t gem_handle = 0;
1157 struct virtgpu_bo *bo = NULL;
1158
1159 mtx_lock(&gpu->dma_buf_import_mutex);
1160
1161 gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1162 if (!gem_handle)
1163 goto fail;
1164 bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1165
1166 if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1167 goto fail;
1168
1169 /* Upon import, blob_flags is not passed to the kernel and is only for
1170 * internal use. Set it to what works best for us.
1171 * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags
1172 * - classic 3d: SHAREABLE only for export and to fail the map
1173 */
1174 uint32_t blob_flags = VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1175 size_t mmap_size = 0;
1176 if (info.blob_mem) {
1177 /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1178 if (info.blob_mem != gpu->bo_blob_mem)
1179 goto fail;
1180
1181 blob_flags |= virtgpu_bo_blob_flags(gpu, flags, 0);
1182
1183 /* mmap_size is only used when mappable */
1184 mmap_size = 0;
1185 if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1186 if (info.size < size)
1187 goto fail;
1188
1189 mmap_size = size;
1190 }
1191 }
1192
1193 /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1194 * might only be memset to 0 and is not considered initialized in theory
1195 */
1196 if (bo->gem_handle == gem_handle) {
1197 if (bo->base.mmap_size < mmap_size)
1198 goto fail;
1199 if (blob_flags & ~bo->blob_flags)
1200 goto fail;
1201
1202 /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1203 * temporarily before virtgpu_bo_destroy grabs the lock
1204 */
1205 vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1206 } else {
1207 *bo = (struct virtgpu_bo){
1208 .base = {
1209 .refcount = VN_REFCOUNT_INIT(1),
1210 .res_id = info.res_handle,
1211 .mmap_size = mmap_size,
1212 },
1213 .gem_handle = gem_handle,
1214 .blob_flags = blob_flags,
1215 };
1216 }
1217
1218 mtx_unlock(&gpu->dma_buf_import_mutex);
1219
1220 *out_bo = &bo->base;
1221
1222 return VK_SUCCESS;
1223
1224 fail:
1225 if (gem_handle && bo->gem_handle != gem_handle)
1226 virtgpu_ioctl_gem_close(gpu, gem_handle);
1227 mtx_unlock(&gpu->dma_buf_import_mutex);
1228 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1229 }
1230
1231 static VkResult
virtgpu_bo_create_from_device_memory(struct vn_renderer * renderer,VkDeviceSize size,vn_object_id mem_id,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles,struct vn_renderer_bo ** out_bo)1232 virtgpu_bo_create_from_device_memory(
1233 struct vn_renderer *renderer,
1234 VkDeviceSize size,
1235 vn_object_id mem_id,
1236 VkMemoryPropertyFlags flags,
1237 VkExternalMemoryHandleTypeFlags external_handles,
1238 struct vn_renderer_bo **out_bo)
1239 {
1240 struct virtgpu *gpu = (struct virtgpu *)renderer;
1241 const uint32_t blob_flags = virtgpu_bo_blob_flags(gpu, flags, external_handles);
1242
1243 uint32_t res_id;
1244 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1245 gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1246 if (!gem_handle)
1247 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1248
1249 struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1250 *bo = (struct virtgpu_bo){
1251 .base = {
1252 .refcount = VN_REFCOUNT_INIT(1),
1253 .res_id = res_id,
1254 .mmap_size = size,
1255 },
1256 .gem_handle = gem_handle,
1257 .blob_flags = blob_flags,
1258 };
1259
1260 *out_bo = &bo->base;
1261
1262 return VK_SUCCESS;
1263 }
1264
1265 static void
virtgpu_shmem_destroy_now(struct vn_renderer * renderer,struct vn_renderer_shmem * _shmem)1266 virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1267 struct vn_renderer_shmem *_shmem)
1268 {
1269 struct virtgpu *gpu = (struct virtgpu *)renderer;
1270 struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1271
1272 munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1273 virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1274 }
1275
1276 static void
virtgpu_shmem_destroy(struct vn_renderer * renderer,struct vn_renderer_shmem * shmem)1277 virtgpu_shmem_destroy(struct vn_renderer *renderer,
1278 struct vn_renderer_shmem *shmem)
1279 {
1280 struct virtgpu *gpu = (struct virtgpu *)renderer;
1281
1282 if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1283 return;
1284
1285 virtgpu_shmem_destroy_now(&gpu->base, shmem);
1286 }
1287
1288 static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer * renderer,size_t size)1289 virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1290 {
1291 struct virtgpu *gpu = (struct virtgpu *)renderer;
1292
1293 struct vn_renderer_shmem *cached_shmem =
1294 vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1295 if (cached_shmem) {
1296 cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1297 return cached_shmem;
1298 }
1299
1300 uint32_t res_id;
1301 uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1302 gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1303 &res_id);
1304 if (!gem_handle)
1305 return NULL;
1306
1307 void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1308 if (!ptr) {
1309 virtgpu_ioctl_gem_close(gpu, gem_handle);
1310 return NULL;
1311 }
1312
1313 struct virtgpu_shmem *shmem =
1314 util_sparse_array_get(&gpu->shmem_array, gem_handle);
1315 *shmem = (struct virtgpu_shmem){
1316 .base = {
1317 .refcount = VN_REFCOUNT_INIT(1),
1318 .res_id = res_id,
1319 .mmap_size = size,
1320 .mmap_ptr = ptr,
1321 },
1322 .gem_handle = gem_handle,
1323 };
1324
1325 return &shmem->base;
1326 }
1327
1328 static VkResult
virtgpu_wait(struct vn_renderer * renderer,const struct vn_renderer_wait * wait)1329 virtgpu_wait(struct vn_renderer *renderer,
1330 const struct vn_renderer_wait *wait)
1331 {
1332 struct virtgpu *gpu = (struct virtgpu *)renderer;
1333
1334 const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1335 if (ret && errno != ETIME)
1336 return VK_ERROR_DEVICE_LOST;
1337
1338 return ret ? VK_TIMEOUT : VK_SUCCESS;
1339 }
1340
1341 static VkResult
virtgpu_submit(struct vn_renderer * renderer,const struct vn_renderer_submit * submit)1342 virtgpu_submit(struct vn_renderer *renderer,
1343 const struct vn_renderer_submit *submit)
1344 {
1345 struct virtgpu *gpu = (struct virtgpu *)renderer;
1346
1347 const int ret = virtgpu_ioctl_submit(gpu, submit);
1348 return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1349 }
1350
1351 static void
virtgpu_init_renderer_info(struct virtgpu * gpu)1352 virtgpu_init_renderer_info(struct virtgpu *gpu)
1353 {
1354 struct vn_renderer_info *info = &gpu->base.info;
1355
1356 info->drm.props = (VkPhysicalDeviceDrmPropertiesEXT){
1357 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT,
1358 .hasPrimary = gpu->has_primary,
1359 .hasRender = true,
1360 .primaryMajor = gpu->primary_major,
1361 .primaryMinor = gpu->primary_minor,
1362 .renderMajor = gpu->render_major,
1363 .renderMinor = gpu->render_minor,
1364 };
1365
1366 info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1367 info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1368
1369 if (gpu->bustype == DRM_BUS_PCI) {
1370 info->pci.has_bus_info = true;
1371 info->pci.props = (VkPhysicalDevicePCIBusInfoPropertiesEXT){
1372 .sType =
1373 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT,
1374 .pciDomain = gpu->pci_bus_info.domain,
1375 .pciBus = gpu->pci_bus_info.bus,
1376 .pciDevice = gpu->pci_bus_info.dev,
1377 .pciFunction = gpu->pci_bus_info.func,
1378 };
1379 }
1380
1381 info->has_dma_buf_import = true;
1382 /* TODO switch from emulation to drm_syncobj */
1383 info->has_external_sync = true;
1384
1385 info->has_implicit_fencing = false;
1386
1387 const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1388 info->wire_format_version = capset->wire_format_version;
1389 info->vk_xml_version = capset->vk_xml_version;
1390 info->vk_ext_command_serialization_spec_version =
1391 capset->vk_ext_command_serialization_spec_version;
1392 info->vk_mesa_venus_protocol_spec_version =
1393 capset->vk_mesa_venus_protocol_spec_version;
1394 assert(capset->supports_blob_id_0);
1395
1396 /* ensure vk_extension_mask is large enough to hold all capset masks */
1397 STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1398 sizeof(capset->vk_extension_mask1));
1399 memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1400 sizeof(capset->vk_extension_mask1));
1401
1402 assert(capset->allow_vk_wait_syncs);
1403
1404 assert(capset->supports_multiple_timelines);
1405 info->max_timeline_count = gpu->max_timeline_count;
1406
1407 if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1408 info->has_guest_vram = true;
1409
1410 /* Use guest blob allocations from dedicated heap (Host visible memory) */
1411 if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_HOST3D && capset->use_guest_vram)
1412 info->has_guest_vram = true;
1413 }
1414
1415 static void
virtgpu_destroy(struct vn_renderer * renderer,const VkAllocationCallbacks * alloc)1416 virtgpu_destroy(struct vn_renderer *renderer,
1417 const VkAllocationCallbacks *alloc)
1418 {
1419 struct virtgpu *gpu = (struct virtgpu *)renderer;
1420
1421 vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1422
1423 if (gpu->fd >= 0)
1424 close(gpu->fd);
1425
1426 mtx_destroy(&gpu->dma_buf_import_mutex);
1427
1428 util_sparse_array_finish(&gpu->shmem_array);
1429 util_sparse_array_finish(&gpu->bo_array);
1430
1431 vk_free(alloc, gpu);
1432 }
1433
1434 static inline void
virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu * gpu)1435 virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu)
1436 {
1437 /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory. They are
1438 * logically contiguous in the guest but are sglists (iovecs) in the host.
1439 * That makes them slower to process in the host. With host process
1440 * isolation, it also becomes impossible for the host to access sglists
1441 * directly.
1442 *
1443 * While there are ideas (and shipped code in some cases) such as creating
1444 * udmabufs from sglists, or having a dedicated guest heap, it seems the
1445 * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D. That is, when the
1446 * renderer sees a request to export a blob where
1447 *
1448 * - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1449 * - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1450 * - blob_id is 0
1451 *
1452 * it allocates a host shmem.
1453 *
1454 * supports_blob_id_0 has been enforced by mandated render server config.
1455 */
1456 assert(gpu->capset.data.supports_blob_id_0);
1457 gpu->shmem_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1458 }
1459
1460 static VkResult
virtgpu_init_context(struct virtgpu * gpu)1461 virtgpu_init_context(struct virtgpu *gpu)
1462 {
1463 assert(!gpu->capset.version);
1464 const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1465 if (ret) {
1466 if (VN_DEBUG(INIT)) {
1467 vn_log(gpu->instance, "failed to initialize context: %s",
1468 strerror(errno));
1469 }
1470 return VK_ERROR_INITIALIZATION_FAILED;
1471 }
1472
1473 return VK_SUCCESS;
1474 }
1475
1476 static VkResult
virtgpu_init_capset(struct virtgpu * gpu)1477 virtgpu_init_capset(struct virtgpu *gpu)
1478 {
1479 gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1480 gpu->capset.version = 0;
1481
1482 const int ret =
1483 virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1484 &gpu->capset.data, sizeof(gpu->capset.data));
1485 if (ret) {
1486 if (VN_DEBUG(INIT)) {
1487 vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1488 gpu->capset.version, strerror(errno));
1489 }
1490 return VK_ERROR_INITIALIZATION_FAILED;
1491 }
1492
1493 return VK_SUCCESS;
1494 }
1495
1496 static VkResult
virtgpu_init_params(struct virtgpu * gpu)1497 virtgpu_init_params(struct virtgpu *gpu)
1498 {
1499 const uint64_t required_params[] = {
1500 VIRTGPU_PARAM_3D_FEATURES, VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1501 VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CONTEXT_INIT,
1502 };
1503 uint64_t val;
1504 for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1505 val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1506 if (!val) {
1507 if (VN_DEBUG(INIT)) {
1508 vn_log(gpu->instance, "required kernel param %d is missing",
1509 (int)required_params[i]);
1510 }
1511 return VK_ERROR_INITIALIZATION_FAILED;
1512 }
1513 }
1514
1515 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1516 if (val) {
1517 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1518 } else {
1519 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1520 if (val) {
1521 gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1522 }
1523 }
1524
1525 if (!val) {
1526 vn_log(gpu->instance,
1527 "one of required kernel params (%d or %d) is missing",
1528 (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1529 return VK_ERROR_INITIALIZATION_FAILED;
1530 }
1531
1532 /* Cross-device feature is optional. It enables sharing dma-bufs
1533 * with other virtio devices, like virtio-wl or virtio-video used
1534 * by ChromeOS VMs. Qemu doesn't support cross-device sharing.
1535 */
1536 val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_CROSS_DEVICE);
1537 if (val)
1538 gpu->supports_cross_device = true;
1539
1540 /* implied by CONTEXT_INIT uapi */
1541 gpu->max_timeline_count = 64;
1542
1543 return VK_SUCCESS;
1544 }
1545
1546 static VkResult
virtgpu_open_device(struct virtgpu * gpu,const drmDevicePtr dev)1547 virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1548 {
1549 bool supported_bus = false;
1550
1551 switch (dev->bustype) {
1552 case DRM_BUS_PCI:
1553 if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1554 dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1555 supported_bus = true;
1556 break;
1557 case DRM_BUS_PLATFORM:
1558 supported_bus = true;
1559 break;
1560 default:
1561 break;
1562 }
1563
1564 if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1565 if (VN_DEBUG(INIT)) {
1566 const char *name = "unknown";
1567 for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1568 if (dev->available_nodes & (1 << i)) {
1569 name = dev->nodes[i];
1570 break;
1571 }
1572 }
1573 vn_log(gpu->instance, "skipping DRM device %s", name);
1574 }
1575 return VK_ERROR_INITIALIZATION_FAILED;
1576 }
1577
1578 const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1579 const char *node_path = dev->nodes[DRM_NODE_RENDER];
1580
1581 int fd = open(node_path, O_RDWR | O_CLOEXEC);
1582 if (fd < 0) {
1583 if (VN_DEBUG(INIT))
1584 vn_log(gpu->instance, "failed to open %s", node_path);
1585 return VK_ERROR_INITIALIZATION_FAILED;
1586 }
1587
1588 drmVersionPtr version = drmGetVersion(fd);
1589 if (!version || strcmp(version->name, "virtio_gpu") ||
1590 version->version_major != 0) {
1591 if (VN_DEBUG(INIT)) {
1592 if (version) {
1593 vn_log(gpu->instance, "unknown DRM driver %s version %d",
1594 version->name, version->version_major);
1595 } else {
1596 vn_log(gpu->instance, "failed to get DRM driver version");
1597 }
1598 }
1599 if (version)
1600 drmFreeVersion(version);
1601 close(fd);
1602 return VK_ERROR_INITIALIZATION_FAILED;
1603 }
1604
1605 gpu->fd = fd;
1606
1607 struct stat st;
1608 if (stat(primary_path, &st) == 0) {
1609 gpu->has_primary = true;
1610 gpu->primary_major = major(st.st_rdev);
1611 gpu->primary_minor = minor(st.st_rdev);
1612 } else {
1613 gpu->has_primary = false;
1614 gpu->primary_major = 0;
1615 gpu->primary_minor = 0;
1616 }
1617 stat(node_path, &st);
1618 gpu->render_major = major(st.st_rdev);
1619 gpu->render_minor = minor(st.st_rdev);
1620
1621 gpu->bustype = dev->bustype;
1622 if (dev->bustype == DRM_BUS_PCI)
1623 gpu->pci_bus_info = *dev->businfo.pci;
1624
1625 drmFreeVersion(version);
1626
1627 if (VN_DEBUG(INIT))
1628 vn_log(gpu->instance, "using DRM device %s", node_path);
1629
1630 return VK_SUCCESS;
1631 }
1632
1633 static VkResult
virtgpu_open(struct virtgpu * gpu)1634 virtgpu_open(struct virtgpu *gpu)
1635 {
1636 drmDevicePtr devs[8];
1637 int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1638 if (count < 0) {
1639 if (VN_DEBUG(INIT))
1640 vn_log(gpu->instance, "failed to enumerate DRM devices");
1641 return VK_ERROR_INITIALIZATION_FAILED;
1642 }
1643
1644 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1645 for (int i = 0; i < count; i++) {
1646 result = virtgpu_open_device(gpu, devs[i]);
1647 if (result == VK_SUCCESS)
1648 break;
1649 }
1650
1651 drmFreeDevices(devs, count);
1652
1653 return result;
1654 }
1655
1656 static VkResult
virtgpu_init(struct virtgpu * gpu)1657 virtgpu_init(struct virtgpu *gpu)
1658 {
1659 util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1660 1024);
1661 util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1662
1663 mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1664
1665 VkResult result = virtgpu_open(gpu);
1666 if (result == VK_SUCCESS)
1667 result = virtgpu_init_params(gpu);
1668 if (result == VK_SUCCESS)
1669 result = virtgpu_init_capset(gpu);
1670 if (result == VK_SUCCESS)
1671 result = virtgpu_init_context(gpu);
1672 if (result != VK_SUCCESS)
1673 return result;
1674
1675 virtgpu_init_shmem_blob_mem(gpu);
1676
1677 vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1678 virtgpu_shmem_destroy_now);
1679
1680 virtgpu_init_renderer_info(gpu);
1681
1682 gpu->base.ops.destroy = virtgpu_destroy;
1683 gpu->base.ops.submit = virtgpu_submit;
1684 gpu->base.ops.wait = virtgpu_wait;
1685
1686 gpu->base.shmem_ops.create = virtgpu_shmem_create;
1687 gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1688
1689 gpu->base.bo_ops.create_from_device_memory =
1690 virtgpu_bo_create_from_device_memory;
1691 gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1692 gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1693 gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1694 gpu->base.bo_ops.map = virtgpu_bo_map;
1695 gpu->base.bo_ops.flush = virtgpu_bo_flush;
1696 gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1697
1698 gpu->base.sync_ops.create = virtgpu_sync_create;
1699 gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1700 gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1701 gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1702 gpu->base.sync_ops.reset = virtgpu_sync_reset;
1703 gpu->base.sync_ops.read = virtgpu_sync_read;
1704 gpu->base.sync_ops.write = virtgpu_sync_write;
1705
1706 return VK_SUCCESS;
1707 }
1708
1709 VkResult
vn_renderer_create_virtgpu(struct vn_instance * instance,const VkAllocationCallbacks * alloc,struct vn_renderer ** renderer)1710 vn_renderer_create_virtgpu(struct vn_instance *instance,
1711 const VkAllocationCallbacks *alloc,
1712 struct vn_renderer **renderer)
1713 {
1714 struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1715 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1716 if (!gpu)
1717 return VK_ERROR_OUT_OF_HOST_MEMORY;
1718
1719 gpu->instance = instance;
1720 gpu->fd = -1;
1721
1722 VkResult result = virtgpu_init(gpu);
1723 if (result != VK_SUCCESS) {
1724 virtgpu_destroy(&gpu->base, alloc);
1725 return result;
1726 }
1727
1728 *renderer = &gpu->base;
1729
1730 return VK_SUCCESS;
1731 }
1732