xref: /aosp_15_r20/external/mesa3d/src/virtio/vulkan/vn_renderer_virtgpu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2020 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <sys/mman.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 #include <unistd.h>
13 #include <xf86drm.h>
14 
15 #ifdef MAJOR_IN_MKDEV
16 #include <sys/mkdev.h>
17 #endif
18 #ifdef MAJOR_IN_SYSMACROS
19 #include <sys/sysmacros.h>
20 #endif
21 
22 #include "drm-uapi/virtgpu_drm.h"
23 #include "util/sparse_array.h"
24 #define VIRGL_RENDERER_UNSTABLE_APIS
25 #include "virtio-gpu/virglrenderer_hw.h"
26 
27 #include "vn_renderer_internal.h"
28 
29 #ifndef VIRTGPU_PARAM_GUEST_VRAM
30 /* All guest allocations happen via virtgpu dedicated heap. */
31 #define VIRTGPU_PARAM_GUEST_VRAM 9
32 #endif
33 
34 #ifndef VIRTGPU_BLOB_MEM_GUEST_VRAM
35 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004
36 #endif
37 
38 /* XXX comment these out to really use kernel uapi */
39 #define SIMULATE_BO_SIZE_FIX 1
40 #define SIMULATE_SYNCOBJ     1
41 #define SIMULATE_SUBMIT      1
42 
43 #define VIRTGPU_PCI_VENDOR_ID 0x1af4
44 #define VIRTGPU_PCI_DEVICE_ID 0x1050
45 
46 struct virtgpu;
47 
48 struct virtgpu_shmem {
49    struct vn_renderer_shmem base;
50    uint32_t gem_handle;
51 };
52 
53 struct virtgpu_bo {
54    struct vn_renderer_bo base;
55    uint32_t gem_handle;
56    uint32_t blob_flags;
57 };
58 
59 struct virtgpu_sync {
60    struct vn_renderer_sync base;
61 
62    /*
63     * drm_syncobj is in one of these states
64     *
65     *  - value N:      drm_syncobj has a signaled fence chain with seqno N
66     *  - pending N->M: drm_syncobj has an unsignaled fence chain with seqno M
67     *                  (which may point to another unsignaled fence chain with
68     *                   seqno between N and M, and so on)
69     *
70     * TODO Do we want to use binary drm_syncobjs?  They would be
71     *
72     *  - value 0: drm_syncobj has no fence
73     *  - value 1: drm_syncobj has a signaled fence with seqno 0
74     *
75     * They are cheaper but require special care.
76     */
77    uint32_t syncobj_handle;
78 };
79 
80 struct virtgpu {
81    struct vn_renderer base;
82 
83    struct vn_instance *instance;
84 
85    int fd;
86 
87    bool has_primary;
88    int primary_major;
89    int primary_minor;
90    int render_major;
91    int render_minor;
92 
93    int bustype;
94    drmPciBusInfo pci_bus_info;
95 
96    uint32_t max_timeline_count;
97 
98    struct {
99       enum virgl_renderer_capset id;
100       uint32_t version;
101       struct virgl_renderer_capset_venus data;
102    } capset;
103 
104    uint32_t shmem_blob_mem;
105    uint32_t bo_blob_mem;
106 
107    /* note that we use gem_handle instead of res_id to index because
108     * res_id is monotonically increasing by default (see
109     * virtio_gpu_resource_id_get)
110     */
111    struct util_sparse_array shmem_array;
112    struct util_sparse_array bo_array;
113 
114    mtx_t dma_buf_import_mutex;
115 
116    struct vn_renderer_shmem_cache shmem_cache;
117 
118    bool supports_cross_device;
119 };
120 
121 #ifdef SIMULATE_SYNCOBJ
122 
123 #include "util/hash_table.h"
124 #include "util/u_idalloc.h"
125 
126 static struct {
127    mtx_t mutex;
128    struct hash_table *syncobjs;
129    struct util_idalloc ida;
130 
131    int signaled_fd;
132 } sim;
133 
134 struct sim_syncobj {
135    mtx_t mutex;
136    uint64_t point;
137 
138    int pending_fd;
139    uint64_t pending_point;
140    bool pending_cpu;
141 };
142 
143 static uint32_t
sim_syncobj_create(struct virtgpu * gpu,bool signaled)144 sim_syncobj_create(struct virtgpu *gpu, bool signaled)
145 {
146    struct sim_syncobj *syncobj = calloc(1, sizeof(*syncobj));
147    if (!syncobj)
148       return 0;
149 
150    mtx_init(&syncobj->mutex, mtx_plain);
151    syncobj->pending_fd = -1;
152 
153    mtx_lock(&sim.mutex);
154 
155    /* initialize lazily */
156    if (!sim.syncobjs) {
157       sim.syncobjs = _mesa_pointer_hash_table_create(NULL);
158       if (!sim.syncobjs) {
159          mtx_unlock(&sim.mutex);
160          return 0;
161       }
162 
163       util_idalloc_init(&sim.ida, 32);
164 
165       struct drm_virtgpu_execbuffer args = {
166          .flags = VIRTGPU_EXECBUF_RING_IDX | VIRTGPU_EXECBUF_FENCE_FD_OUT,
167          .ring_idx = 0, /* CPU ring */
168       };
169       int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
170       if (ret || args.fence_fd < 0) {
171          _mesa_hash_table_destroy(sim.syncobjs, NULL);
172          sim.syncobjs = NULL;
173          mtx_unlock(&sim.mutex);
174          return 0;
175       }
176 
177       sim.signaled_fd = args.fence_fd;
178    }
179 
180    const unsigned syncobj_handle = util_idalloc_alloc(&sim.ida) + 1;
181    _mesa_hash_table_insert(sim.syncobjs,
182                            (const void *)(uintptr_t)syncobj_handle, syncobj);
183 
184    mtx_unlock(&sim.mutex);
185 
186    return syncobj_handle;
187 }
188 
189 static void
sim_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)190 sim_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
191 {
192    struct sim_syncobj *syncobj = NULL;
193 
194    mtx_lock(&sim.mutex);
195 
196    struct hash_entry *entry = _mesa_hash_table_search(
197       sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
198    if (entry) {
199       syncobj = entry->data;
200       _mesa_hash_table_remove(sim.syncobjs, entry);
201       util_idalloc_free(&sim.ida, syncobj_handle - 1);
202    }
203 
204    mtx_unlock(&sim.mutex);
205 
206    if (syncobj) {
207       if (syncobj->pending_fd >= 0)
208          close(syncobj->pending_fd);
209       mtx_destroy(&syncobj->mutex);
210       free(syncobj);
211    }
212 }
213 
214 static VkResult
sim_syncobj_poll(int fd,int poll_timeout)215 sim_syncobj_poll(int fd, int poll_timeout)
216 {
217    struct pollfd pollfd = {
218       .fd = fd,
219       .events = POLLIN,
220    };
221    int ret;
222    do {
223       ret = poll(&pollfd, 1, poll_timeout);
224    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
225 
226    if (ret < 0 || (ret > 0 && !(pollfd.revents & POLLIN))) {
227       return (ret < 0 && errno == ENOMEM) ? VK_ERROR_OUT_OF_HOST_MEMORY
228                                           : VK_ERROR_DEVICE_LOST;
229    }
230 
231    return ret ? VK_SUCCESS : VK_TIMEOUT;
232 }
233 
234 static void
sim_syncobj_set_point_locked(struct sim_syncobj * syncobj,uint64_t point)235 sim_syncobj_set_point_locked(struct sim_syncobj *syncobj, uint64_t point)
236 {
237    syncobj->point = point;
238 
239    if (syncobj->pending_fd >= 0) {
240       close(syncobj->pending_fd);
241       syncobj->pending_fd = -1;
242       syncobj->pending_point = point;
243    }
244 }
245 
246 static void
sim_syncobj_update_point_locked(struct sim_syncobj * syncobj,int poll_timeout)247 sim_syncobj_update_point_locked(struct sim_syncobj *syncobj, int poll_timeout)
248 {
249    if (syncobj->pending_fd >= 0) {
250       VkResult result;
251       if (syncobj->pending_cpu) {
252          if (poll_timeout == -1) {
253             const int max_cpu_timeout = 2000;
254             poll_timeout = max_cpu_timeout;
255             result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
256             if (result == VK_TIMEOUT) {
257                vn_log(NULL, "cpu sync timed out after %dms; ignoring",
258                       poll_timeout);
259                result = VK_SUCCESS;
260             }
261          } else {
262             result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
263          }
264       } else {
265          result = sim_syncobj_poll(syncobj->pending_fd, poll_timeout);
266       }
267       if (result == VK_SUCCESS) {
268          close(syncobj->pending_fd);
269          syncobj->pending_fd = -1;
270          syncobj->point = syncobj->pending_point;
271       }
272    }
273 }
274 
275 static struct sim_syncobj *
sim_syncobj_lookup(struct virtgpu * gpu,uint32_t syncobj_handle)276 sim_syncobj_lookup(struct virtgpu *gpu, uint32_t syncobj_handle)
277 {
278    struct sim_syncobj *syncobj = NULL;
279 
280    mtx_lock(&sim.mutex);
281    struct hash_entry *entry = _mesa_hash_table_search(
282       sim.syncobjs, (const void *)(uintptr_t)syncobj_handle);
283    if (entry)
284       syncobj = entry->data;
285    mtx_unlock(&sim.mutex);
286 
287    return syncobj;
288 }
289 
290 static int
sim_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)291 sim_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
292 {
293    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
294    if (!syncobj)
295       return -1;
296 
297    mtx_lock(&syncobj->mutex);
298    sim_syncobj_set_point_locked(syncobj, 0);
299    mtx_unlock(&syncobj->mutex);
300 
301    return 0;
302 }
303 
304 static int
sim_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)305 sim_syncobj_query(struct virtgpu *gpu,
306                   uint32_t syncobj_handle,
307                   uint64_t *point)
308 {
309    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
310    if (!syncobj)
311       return -1;
312 
313    mtx_lock(&syncobj->mutex);
314    sim_syncobj_update_point_locked(syncobj, 0);
315    *point = syncobj->point;
316    mtx_unlock(&syncobj->mutex);
317 
318    return 0;
319 }
320 
321 static int
sim_syncobj_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)322 sim_syncobj_signal(struct virtgpu *gpu,
323                    uint32_t syncobj_handle,
324                    uint64_t point)
325 {
326    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
327    if (!syncobj)
328       return -1;
329 
330    mtx_lock(&syncobj->mutex);
331    sim_syncobj_set_point_locked(syncobj, point);
332    mtx_unlock(&syncobj->mutex);
333 
334    return 0;
335 }
336 
337 static int
sim_syncobj_submit(struct virtgpu * gpu,uint32_t syncobj_handle,int sync_fd,uint64_t point,bool cpu)338 sim_syncobj_submit(struct virtgpu *gpu,
339                    uint32_t syncobj_handle,
340                    int sync_fd,
341                    uint64_t point,
342                    bool cpu)
343 {
344    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
345    if (!syncobj)
346       return -1;
347 
348    int pending_fd = dup(sync_fd);
349    if (pending_fd < 0) {
350       vn_log(gpu->instance, "failed to dup sync fd");
351       return -1;
352    }
353 
354    mtx_lock(&syncobj->mutex);
355 
356    if (syncobj->pending_fd >= 0) {
357       mtx_unlock(&syncobj->mutex);
358 
359       /* TODO */
360       vn_log(gpu->instance, "sorry, no simulated timeline semaphore");
361       close(pending_fd);
362       return -1;
363    }
364    if (syncobj->point >= point)
365       vn_log(gpu->instance, "non-monotonic signaling");
366 
367    syncobj->pending_fd = pending_fd;
368    syncobj->pending_point = point;
369    syncobj->pending_cpu = cpu;
370 
371    mtx_unlock(&syncobj->mutex);
372 
373    return 0;
374 }
375 
376 static int
timeout_to_poll_timeout(uint64_t timeout)377 timeout_to_poll_timeout(uint64_t timeout)
378 {
379    const uint64_t ns_per_ms = 1000000;
380    const uint64_t ms = (timeout + ns_per_ms - 1) / ns_per_ms;
381    if (!ms && timeout)
382       return -1;
383    return ms <= INT_MAX ? ms : -1;
384 }
385 
386 static int
sim_syncobj_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)387 sim_syncobj_wait(struct virtgpu *gpu,
388                  const struct vn_renderer_wait *wait,
389                  bool wait_avail)
390 {
391    if (wait_avail)
392       return -1;
393 
394    const int poll_timeout = timeout_to_poll_timeout(wait->timeout);
395 
396    /* TODO poll all fds at the same time */
397    for (uint32_t i = 0; i < wait->sync_count; i++) {
398       struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
399       const uint64_t point = wait->sync_values[i];
400 
401       struct sim_syncobj *syncobj =
402          sim_syncobj_lookup(gpu, sync->syncobj_handle);
403       if (!syncobj)
404          return -1;
405 
406       mtx_lock(&syncobj->mutex);
407 
408       if (syncobj->point < point)
409          sim_syncobj_update_point_locked(syncobj, poll_timeout);
410 
411       if (syncobj->point < point) {
412          if (wait->wait_any && i < wait->sync_count - 1 &&
413              syncobj->pending_fd < 0) {
414             mtx_unlock(&syncobj->mutex);
415             continue;
416          }
417          errno = ETIME;
418          mtx_unlock(&syncobj->mutex);
419          return -1;
420       }
421 
422       mtx_unlock(&syncobj->mutex);
423 
424       if (wait->wait_any)
425          break;
426 
427       /* TODO adjust poll_timeout */
428    }
429 
430    return 0;
431 }
432 
433 static int
sim_syncobj_export(struct virtgpu * gpu,uint32_t syncobj_handle)434 sim_syncobj_export(struct virtgpu *gpu, uint32_t syncobj_handle)
435 {
436    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
437    if (!syncobj)
438       return -1;
439 
440    int fd = -1;
441    mtx_lock(&syncobj->mutex);
442    if (syncobj->pending_fd >= 0)
443       fd = dup(syncobj->pending_fd);
444    else
445       fd = dup(sim.signaled_fd);
446    mtx_unlock(&syncobj->mutex);
447 
448    return fd;
449 }
450 
451 static uint32_t
sim_syncobj_import(struct virtgpu * gpu,uint32_t syncobj_handle,int fd)452 sim_syncobj_import(struct virtgpu *gpu, uint32_t syncobj_handle, int fd)
453 {
454    struct sim_syncobj *syncobj = sim_syncobj_lookup(gpu, syncobj_handle);
455    if (!syncobj)
456       return 0;
457 
458    if (sim_syncobj_submit(gpu, syncobj_handle, fd, 1, false))
459       return 0;
460 
461    return syncobj_handle;
462 }
463 
464 #endif /* SIMULATE_SYNCOBJ */
465 
466 #ifdef SIMULATE_SUBMIT
467 
468 static int
sim_submit_signal_syncs(struct virtgpu * gpu,int sync_fd,struct vn_renderer_sync * const * syncs,const uint64_t * sync_values,uint32_t sync_count,bool cpu)469 sim_submit_signal_syncs(struct virtgpu *gpu,
470                         int sync_fd,
471                         struct vn_renderer_sync *const *syncs,
472                         const uint64_t *sync_values,
473                         uint32_t sync_count,
474                         bool cpu)
475 {
476    for (uint32_t i = 0; i < sync_count; i++) {
477       struct virtgpu_sync *sync = (struct virtgpu_sync *)syncs[i];
478       const uint64_t pending_point = sync_values[i];
479 
480 #ifdef SIMULATE_SYNCOBJ
481       int ret = sim_syncobj_submit(gpu, sync->syncobj_handle, sync_fd,
482                                    pending_point, cpu);
483       if (ret)
484          return ret;
485 #else
486       /* we can in theory do a DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE followed by a
487        * DRM_IOCTL_SYNCOBJ_TRANSFER
488        */
489       return -1;
490 #endif
491    }
492 
493    return 0;
494 }
495 
496 static uint32_t *
sim_submit_alloc_gem_handles(struct vn_renderer_bo * const * bos,uint32_t bo_count)497 sim_submit_alloc_gem_handles(struct vn_renderer_bo *const *bos,
498                              uint32_t bo_count)
499 {
500    uint32_t *gem_handles = malloc(sizeof(*gem_handles) * bo_count);
501    if (!gem_handles)
502       return NULL;
503 
504    for (uint32_t i = 0; i < bo_count; i++) {
505       struct virtgpu_bo *bo = (struct virtgpu_bo *)bos[i];
506       gem_handles[i] = bo->gem_handle;
507    }
508 
509    return gem_handles;
510 }
511 
512 static int
sim_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)513 sim_submit(struct virtgpu *gpu, const struct vn_renderer_submit *submit)
514 {
515    /* TODO replace submit->bos by submit->gem_handles to avoid malloc/loop */
516    uint32_t *gem_handles = NULL;
517    if (submit->bo_count) {
518       gem_handles =
519          sim_submit_alloc_gem_handles(submit->bos, submit->bo_count);
520       if (!gem_handles)
521          return -1;
522    }
523 
524    assert(submit->batch_count);
525 
526    int ret = 0;
527    for (uint32_t i = 0; i < submit->batch_count; i++) {
528       const struct vn_renderer_submit_batch *batch = &submit->batches[i];
529 
530       struct drm_virtgpu_execbuffer args = {
531          .flags = VIRTGPU_EXECBUF_RING_IDX |
532                   (batch->sync_count ? VIRTGPU_EXECBUF_FENCE_FD_OUT : 0),
533          .size = batch->cs_size,
534          .command = (uintptr_t)batch->cs_data,
535          .bo_handles = (uintptr_t)gem_handles,
536          .num_bo_handles = submit->bo_count,
537          .ring_idx = batch->ring_idx,
538       };
539 
540       ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args);
541       if (ret) {
542          vn_log(gpu->instance, "failed to execbuffer: %s", strerror(errno));
543          break;
544       }
545 
546       if (batch->sync_count) {
547          ret = sim_submit_signal_syncs(gpu, args.fence_fd, batch->syncs,
548                                        batch->sync_values, batch->sync_count,
549                                        batch->ring_idx == 0);
550          close(args.fence_fd);
551          if (ret)
552             break;
553       }
554    }
555 
556    free(gem_handles);
557    return ret;
558 }
559 
560 #endif /* SIMULATE_SUBMIT */
561 
562 static int
virtgpu_ioctl(struct virtgpu * gpu,unsigned long request,void * args)563 virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args)
564 {
565    return drmIoctl(gpu->fd, request, args);
566 }
567 
568 static uint64_t
virtgpu_ioctl_getparam(struct virtgpu * gpu,uint64_t param)569 virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param)
570 {
571    /* val must be zeroed because kernel only writes the lower 32 bits */
572    uint64_t val = 0;
573    struct drm_virtgpu_getparam args = {
574       .param = param,
575       .value = (uintptr_t)&val,
576    };
577 
578    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args);
579    return ret ? 0 : val;
580 }
581 
582 static int
virtgpu_ioctl_get_caps(struct virtgpu * gpu,enum virgl_renderer_capset id,uint32_t version,void * capset,size_t capset_size)583 virtgpu_ioctl_get_caps(struct virtgpu *gpu,
584                        enum virgl_renderer_capset id,
585                        uint32_t version,
586                        void *capset,
587                        size_t capset_size)
588 {
589    struct drm_virtgpu_get_caps args = {
590       .cap_set_id = id,
591       .cap_set_ver = version,
592       .addr = (uintptr_t)capset,
593       .size = capset_size,
594    };
595 
596    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args);
597 }
598 
599 static int
virtgpu_ioctl_context_init(struct virtgpu * gpu,enum virgl_renderer_capset capset_id)600 virtgpu_ioctl_context_init(struct virtgpu *gpu,
601                            enum virgl_renderer_capset capset_id)
602 {
603    struct drm_virtgpu_context_set_param ctx_set_params[3] = {
604       {
605          .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID,
606          .value = capset_id,
607       },
608       {
609          .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS,
610          .value = 64,
611       },
612       {
613          .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK,
614          .value = 0, /* don't generate drm_events on fence signaling */
615       },
616    };
617 
618    struct drm_virtgpu_context_init args = {
619       .num_params = ARRAY_SIZE(ctx_set_params),
620       .ctx_set_params = (uintptr_t)&ctx_set_params,
621    };
622 
623    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args);
624 }
625 
626 static uint32_t
virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu,uint32_t blob_mem,uint32_t blob_flags,size_t blob_size,uint64_t blob_id,uint32_t * res_id)627 virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu,
628                                    uint32_t blob_mem,
629                                    uint32_t blob_flags,
630                                    size_t blob_size,
631                                    uint64_t blob_id,
632                                    uint32_t *res_id)
633 {
634 #ifdef SIMULATE_BO_SIZE_FIX
635    blob_size = align64(blob_size, 4096);
636 #endif
637 
638    struct drm_virtgpu_resource_create_blob args = {
639       .blob_mem = blob_mem,
640       .blob_flags = blob_flags,
641       .size = blob_size,
642       .blob_id = blob_id,
643    };
644 
645    if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args))
646       return 0;
647 
648    *res_id = args.res_handle;
649    return args.bo_handle;
650 }
651 
652 static int
virtgpu_ioctl_resource_info(struct virtgpu * gpu,uint32_t gem_handle,struct drm_virtgpu_resource_info * info)653 virtgpu_ioctl_resource_info(struct virtgpu *gpu,
654                             uint32_t gem_handle,
655                             struct drm_virtgpu_resource_info *info)
656 {
657    *info = (struct drm_virtgpu_resource_info){
658       .bo_handle = gem_handle,
659    };
660 
661    return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_INFO, info);
662 }
663 
664 static void
virtgpu_ioctl_gem_close(struct virtgpu * gpu,uint32_t gem_handle)665 virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle)
666 {
667    struct drm_gem_close args = {
668       .handle = gem_handle,
669    };
670 
671    ASSERTED const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args);
672    assert(!ret);
673 }
674 
675 static int
virtgpu_ioctl_prime_handle_to_fd(struct virtgpu * gpu,uint32_t gem_handle,bool mappable)676 virtgpu_ioctl_prime_handle_to_fd(struct virtgpu *gpu,
677                                  uint32_t gem_handle,
678                                  bool mappable)
679 {
680    struct drm_prime_handle args = {
681       .handle = gem_handle,
682       .flags = DRM_CLOEXEC | (mappable ? DRM_RDWR : 0),
683    };
684 
685    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
686    return ret ? -1 : args.fd;
687 }
688 
689 static uint32_t
virtgpu_ioctl_prime_fd_to_handle(struct virtgpu * gpu,int fd)690 virtgpu_ioctl_prime_fd_to_handle(struct virtgpu *gpu, int fd)
691 {
692    struct drm_prime_handle args = {
693       .fd = fd,
694    };
695 
696    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_PRIME_FD_TO_HANDLE, &args);
697    return ret ? 0 : args.handle;
698 }
699 
700 static void *
virtgpu_ioctl_map(struct virtgpu * gpu,uint32_t gem_handle,size_t size)701 virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size)
702 {
703    struct drm_virtgpu_map args = {
704       .handle = gem_handle,
705    };
706 
707    if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args))
708       return NULL;
709 
710    void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd,
711                     args.offset);
712    if (ptr == MAP_FAILED)
713       return NULL;
714 
715    return ptr;
716 }
717 
718 static uint32_t
virtgpu_ioctl_syncobj_create(struct virtgpu * gpu,bool signaled)719 virtgpu_ioctl_syncobj_create(struct virtgpu *gpu, bool signaled)
720 {
721 #ifdef SIMULATE_SYNCOBJ
722    return sim_syncobj_create(gpu, signaled);
723 #endif
724 
725    struct drm_syncobj_create args = {
726       .flags = signaled ? DRM_SYNCOBJ_CREATE_SIGNALED : 0,
727    };
728 
729    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_CREATE, &args);
730    return ret ? 0 : args.handle;
731 }
732 
733 static void
virtgpu_ioctl_syncobj_destroy(struct virtgpu * gpu,uint32_t syncobj_handle)734 virtgpu_ioctl_syncobj_destroy(struct virtgpu *gpu, uint32_t syncobj_handle)
735 {
736 #ifdef SIMULATE_SYNCOBJ
737    sim_syncobj_destroy(gpu, syncobj_handle);
738    return;
739 #endif
740 
741    struct drm_syncobj_destroy args = {
742       .handle = syncobj_handle,
743    };
744 
745    ASSERTED const int ret =
746       virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_DESTROY, &args);
747    assert(!ret);
748 }
749 
750 static int
virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu * gpu,uint32_t syncobj_handle,bool sync_file)751 virtgpu_ioctl_syncobj_handle_to_fd(struct virtgpu *gpu,
752                                    uint32_t syncobj_handle,
753                                    bool sync_file)
754 {
755 #ifdef SIMULATE_SYNCOBJ
756    return sync_file ? sim_syncobj_export(gpu, syncobj_handle) : -1;
757 #endif
758 
759    struct drm_syncobj_handle args = {
760       .handle = syncobj_handle,
761       .flags =
762          sync_file ? DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE : 0,
763    };
764 
765    int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &args);
766    if (ret)
767       return -1;
768 
769    return args.fd;
770 }
771 
772 static uint32_t
virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu * gpu,int fd,uint32_t syncobj_handle)773 virtgpu_ioctl_syncobj_fd_to_handle(struct virtgpu *gpu,
774                                    int fd,
775                                    uint32_t syncobj_handle)
776 {
777 #ifdef SIMULATE_SYNCOBJ
778    return syncobj_handle ? sim_syncobj_import(gpu, syncobj_handle, fd) : 0;
779 #endif
780 
781    struct drm_syncobj_handle args = {
782       .handle = syncobj_handle,
783       .flags =
784          syncobj_handle ? DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE : 0,
785       .fd = fd,
786    };
787 
788    int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &args);
789    if (ret)
790       return 0;
791 
792    return args.handle;
793 }
794 
795 static int
virtgpu_ioctl_syncobj_reset(struct virtgpu * gpu,uint32_t syncobj_handle)796 virtgpu_ioctl_syncobj_reset(struct virtgpu *gpu, uint32_t syncobj_handle)
797 {
798 #ifdef SIMULATE_SYNCOBJ
799    return sim_syncobj_reset(gpu, syncobj_handle);
800 #endif
801 
802    struct drm_syncobj_array args = {
803       .handles = (uintptr_t)&syncobj_handle,
804       .count_handles = 1,
805    };
806 
807    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_RESET, &args);
808 }
809 
810 static int
virtgpu_ioctl_syncobj_query(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t * point)811 virtgpu_ioctl_syncobj_query(struct virtgpu *gpu,
812                             uint32_t syncobj_handle,
813                             uint64_t *point)
814 {
815 #ifdef SIMULATE_SYNCOBJ
816    return sim_syncobj_query(gpu, syncobj_handle, point);
817 #endif
818 
819    struct drm_syncobj_timeline_array args = {
820       .handles = (uintptr_t)&syncobj_handle,
821       .points = (uintptr_t)point,
822       .count_handles = 1,
823    };
824 
825    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_QUERY, &args);
826 }
827 
828 static int
virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu * gpu,uint32_t syncobj_handle,uint64_t point)829 virtgpu_ioctl_syncobj_timeline_signal(struct virtgpu *gpu,
830                                       uint32_t syncobj_handle,
831                                       uint64_t point)
832 {
833 #ifdef SIMULATE_SYNCOBJ
834    return sim_syncobj_signal(gpu, syncobj_handle, point);
835 #endif
836 
837    struct drm_syncobj_timeline_array args = {
838       .handles = (uintptr_t)&syncobj_handle,
839       .points = (uintptr_t)&point,
840       .count_handles = 1,
841    };
842 
843    return virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, &args);
844 }
845 
846 static int
virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu * gpu,const struct vn_renderer_wait * wait,bool wait_avail)847 virtgpu_ioctl_syncobj_timeline_wait(struct virtgpu *gpu,
848                                     const struct vn_renderer_wait *wait,
849                                     bool wait_avail)
850 {
851 #ifdef SIMULATE_SYNCOBJ
852    return sim_syncobj_wait(gpu, wait, wait_avail);
853 #endif
854 
855    /* always enable wait-before-submit */
856    uint32_t flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
857    if (!wait->wait_any)
858       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
859    /* wait for fences to appear instead of signaling */
860    if (wait_avail)
861       flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE;
862 
863    /* TODO replace wait->syncs by wait->sync_handles to avoid malloc/loop */
864    uint32_t *syncobj_handles =
865       malloc(sizeof(*syncobj_handles) * wait->sync_count);
866    if (!syncobj_handles)
867       return -1;
868    for (uint32_t i = 0; i < wait->sync_count; i++) {
869       struct virtgpu_sync *sync = (struct virtgpu_sync *)wait->syncs[i];
870       syncobj_handles[i] = sync->syncobj_handle;
871    }
872 
873    struct drm_syncobj_timeline_wait args = {
874       .handles = (uintptr_t)syncobj_handles,
875       .points = (uintptr_t)wait->sync_values,
876       .timeout_nsec = os_time_get_absolute_timeout(wait->timeout),
877       .count_handles = wait->sync_count,
878       .flags = flags,
879    };
880 
881    const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &args);
882 
883    free(syncobj_handles);
884 
885    return ret;
886 }
887 
888 static int
virtgpu_ioctl_submit(struct virtgpu * gpu,const struct vn_renderer_submit * submit)889 virtgpu_ioctl_submit(struct virtgpu *gpu,
890                      const struct vn_renderer_submit *submit)
891 {
892 #ifdef SIMULATE_SUBMIT
893    return sim_submit(gpu, submit);
894 #endif
895    return -1;
896 }
897 
898 static VkResult
virtgpu_sync_write(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t val)899 virtgpu_sync_write(struct vn_renderer *renderer,
900                    struct vn_renderer_sync *_sync,
901                    uint64_t val)
902 {
903    struct virtgpu *gpu = (struct virtgpu *)renderer;
904    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
905 
906    const int ret =
907       virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle, val);
908 
909    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
910 }
911 
912 static VkResult
virtgpu_sync_read(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t * val)913 virtgpu_sync_read(struct vn_renderer *renderer,
914                   struct vn_renderer_sync *_sync,
915                   uint64_t *val)
916 {
917    struct virtgpu *gpu = (struct virtgpu *)renderer;
918    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
919 
920    const int ret =
921       virtgpu_ioctl_syncobj_query(gpu, sync->syncobj_handle, val);
922 
923    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
924 }
925 
926 static VkResult
virtgpu_sync_reset(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,uint64_t initial_val)927 virtgpu_sync_reset(struct vn_renderer *renderer,
928                    struct vn_renderer_sync *_sync,
929                    uint64_t initial_val)
930 {
931    struct virtgpu *gpu = (struct virtgpu *)renderer;
932    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
933 
934    int ret = virtgpu_ioctl_syncobj_reset(gpu, sync->syncobj_handle);
935    if (!ret) {
936       ret = virtgpu_ioctl_syncobj_timeline_signal(gpu, sync->syncobj_handle,
937                                                   initial_val);
938    }
939 
940    return ret ? VK_ERROR_OUT_OF_DEVICE_MEMORY : VK_SUCCESS;
941 }
942 
943 static int
virtgpu_sync_export_syncobj(struct vn_renderer * renderer,struct vn_renderer_sync * _sync,bool sync_file)944 virtgpu_sync_export_syncobj(struct vn_renderer *renderer,
945                             struct vn_renderer_sync *_sync,
946                             bool sync_file)
947 {
948    struct virtgpu *gpu = (struct virtgpu *)renderer;
949    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
950 
951    return virtgpu_ioctl_syncobj_handle_to_fd(gpu, sync->syncobj_handle,
952                                              sync_file);
953 }
954 
955 static void
virtgpu_sync_destroy(struct vn_renderer * renderer,struct vn_renderer_sync * _sync)956 virtgpu_sync_destroy(struct vn_renderer *renderer,
957                      struct vn_renderer_sync *_sync)
958 {
959    struct virtgpu *gpu = (struct virtgpu *)renderer;
960    struct virtgpu_sync *sync = (struct virtgpu_sync *)_sync;
961 
962    virtgpu_ioctl_syncobj_destroy(gpu, sync->syncobj_handle);
963 
964    free(sync);
965 }
966 
967 static VkResult
virtgpu_sync_create_from_syncobj(struct vn_renderer * renderer,int fd,bool sync_file,struct vn_renderer_sync ** out_sync)968 virtgpu_sync_create_from_syncobj(struct vn_renderer *renderer,
969                                  int fd,
970                                  bool sync_file,
971                                  struct vn_renderer_sync **out_sync)
972 {
973    struct virtgpu *gpu = (struct virtgpu *)renderer;
974 
975    uint32_t syncobj_handle;
976    if (sync_file) {
977       syncobj_handle = virtgpu_ioctl_syncobj_create(gpu, false);
978       if (!syncobj_handle)
979          return VK_ERROR_OUT_OF_HOST_MEMORY;
980       if (!virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, syncobj_handle)) {
981          virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
982          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
983       }
984    } else {
985       syncobj_handle = virtgpu_ioctl_syncobj_fd_to_handle(gpu, fd, 0);
986       if (!syncobj_handle)
987          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
988    }
989 
990    struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
991    if (!sync) {
992       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
993       return VK_ERROR_OUT_OF_HOST_MEMORY;
994    }
995 
996    sync->syncobj_handle = syncobj_handle;
997    sync->base.sync_id = 0; /* TODO */
998 
999    *out_sync = &sync->base;
1000 
1001    return VK_SUCCESS;
1002 }
1003 
1004 static VkResult
virtgpu_sync_create(struct vn_renderer * renderer,uint64_t initial_val,uint32_t flags,struct vn_renderer_sync ** out_sync)1005 virtgpu_sync_create(struct vn_renderer *renderer,
1006                     uint64_t initial_val,
1007                     uint32_t flags,
1008                     struct vn_renderer_sync **out_sync)
1009 {
1010    struct virtgpu *gpu = (struct virtgpu *)renderer;
1011 
1012    /* TODO */
1013    if (flags & VN_RENDERER_SYNC_SHAREABLE)
1014       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1015 
1016    /* always false because we don't use binary drm_syncobjs */
1017    const bool signaled = false;
1018    const uint32_t syncobj_handle =
1019       virtgpu_ioctl_syncobj_create(gpu, signaled);
1020    if (!syncobj_handle)
1021       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1022 
1023    /* add a signaled fence chain with seqno initial_val */
1024    const int ret =
1025       virtgpu_ioctl_syncobj_timeline_signal(gpu, syncobj_handle, initial_val);
1026    if (ret) {
1027       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1028       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1029    }
1030 
1031    struct virtgpu_sync *sync = calloc(1, sizeof(*sync));
1032    if (!sync) {
1033       virtgpu_ioctl_syncobj_destroy(gpu, syncobj_handle);
1034       return VK_ERROR_OUT_OF_HOST_MEMORY;
1035    }
1036 
1037    sync->syncobj_handle = syncobj_handle;
1038    /* we will have a sync_id when shareable is true and virtio-gpu associates
1039     * a host sync object with guest drm_syncobj
1040     */
1041    sync->base.sync_id = 0;
1042 
1043    *out_sync = &sync->base;
1044 
1045    return VK_SUCCESS;
1046 }
1047 
1048 static void
virtgpu_bo_invalidate(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1049 virtgpu_bo_invalidate(struct vn_renderer *renderer,
1050                       struct vn_renderer_bo *bo,
1051                       VkDeviceSize offset,
1052                       VkDeviceSize size)
1053 {
1054    /* nop because kernel makes every mapping coherent */
1055 }
1056 
1057 static void
virtgpu_bo_flush(struct vn_renderer * renderer,struct vn_renderer_bo * bo,VkDeviceSize offset,VkDeviceSize size)1058 virtgpu_bo_flush(struct vn_renderer *renderer,
1059                  struct vn_renderer_bo *bo,
1060                  VkDeviceSize offset,
1061                  VkDeviceSize size)
1062 {
1063    /* nop because kernel makes every mapping coherent */
1064 }
1065 
1066 static void *
virtgpu_bo_map(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1067 virtgpu_bo_map(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1068 {
1069    struct virtgpu *gpu = (struct virtgpu *)renderer;
1070    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1071    const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1072 
1073    /* not thread-safe but is fine */
1074    if (!bo->base.mmap_ptr && mappable) {
1075       bo->base.mmap_ptr =
1076          virtgpu_ioctl_map(gpu, bo->gem_handle, bo->base.mmap_size);
1077    }
1078 
1079    return bo->base.mmap_ptr;
1080 }
1081 
1082 static int
virtgpu_bo_export_dma_buf(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1083 virtgpu_bo_export_dma_buf(struct vn_renderer *renderer,
1084                           struct vn_renderer_bo *_bo)
1085 {
1086    struct virtgpu *gpu = (struct virtgpu *)renderer;
1087    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1088    const bool mappable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1089    const bool shareable = bo->blob_flags & VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1090 
1091    return shareable
1092              ? virtgpu_ioctl_prime_handle_to_fd(gpu, bo->gem_handle, mappable)
1093              : -1;
1094 }
1095 
1096 static bool
virtgpu_bo_destroy(struct vn_renderer * renderer,struct vn_renderer_bo * _bo)1097 virtgpu_bo_destroy(struct vn_renderer *renderer, struct vn_renderer_bo *_bo)
1098 {
1099    struct virtgpu *gpu = (struct virtgpu *)renderer;
1100    struct virtgpu_bo *bo = (struct virtgpu_bo *)_bo;
1101 
1102    mtx_lock(&gpu->dma_buf_import_mutex);
1103 
1104    /* Check the refcount again after the import lock is grabbed.  Yes, we use
1105     * the double-checked locking anti-pattern.
1106     */
1107    if (vn_refcount_is_valid(&bo->base.refcount)) {
1108       mtx_unlock(&gpu->dma_buf_import_mutex);
1109       return false;
1110    }
1111 
1112    if (bo->base.mmap_ptr)
1113       munmap(bo->base.mmap_ptr, bo->base.mmap_size);
1114 
1115    /* Set gem_handle to 0 to indicate that the bo is invalid. Must be set
1116     * before closing gem handle. Otherwise the same gem handle can be reused
1117     * by another newly created bo and unexpectedly gotten zero'ed out the
1118     * tracked gem handle.
1119     */
1120    const uint32_t gem_handle = bo->gem_handle;
1121    bo->gem_handle = 0;
1122    virtgpu_ioctl_gem_close(gpu, gem_handle);
1123 
1124    mtx_unlock(&gpu->dma_buf_import_mutex);
1125 
1126    return true;
1127 }
1128 
1129 static uint32_t
virtgpu_bo_blob_flags(struct virtgpu * gpu,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles)1130 virtgpu_bo_blob_flags(struct virtgpu *gpu,
1131                       VkMemoryPropertyFlags flags,
1132                       VkExternalMemoryHandleTypeFlags external_handles)
1133 {
1134    uint32_t blob_flags = 0;
1135    if (flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
1136       blob_flags |= VIRTGPU_BLOB_FLAG_USE_MAPPABLE;
1137    if (external_handles)
1138       blob_flags |= VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1139    if (external_handles & VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) {
1140       if (gpu->supports_cross_device)
1141          blob_flags |= VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE;
1142    }
1143 
1144    return blob_flags;
1145 }
1146 
1147 static VkResult
virtgpu_bo_create_from_dma_buf(struct vn_renderer * renderer,VkDeviceSize size,int fd,VkMemoryPropertyFlags flags,struct vn_renderer_bo ** out_bo)1148 virtgpu_bo_create_from_dma_buf(struct vn_renderer *renderer,
1149                                VkDeviceSize size,
1150                                int fd,
1151                                VkMemoryPropertyFlags flags,
1152                                struct vn_renderer_bo **out_bo)
1153 {
1154    struct virtgpu *gpu = (struct virtgpu *)renderer;
1155    struct drm_virtgpu_resource_info info;
1156    uint32_t gem_handle = 0;
1157    struct virtgpu_bo *bo = NULL;
1158 
1159    mtx_lock(&gpu->dma_buf_import_mutex);
1160 
1161    gem_handle = virtgpu_ioctl_prime_fd_to_handle(gpu, fd);
1162    if (!gem_handle)
1163       goto fail;
1164    bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1165 
1166    if (virtgpu_ioctl_resource_info(gpu, gem_handle, &info))
1167       goto fail;
1168 
1169    /* Upon import, blob_flags is not passed to the kernel and is only for
1170     * internal use. Set it to what works best for us.
1171     * - blob mem: SHAREABLE + conditional MAPPABLE per VkMemoryPropertyFlags
1172     * - classic 3d: SHAREABLE only for export and to fail the map
1173     */
1174    uint32_t blob_flags = VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
1175    size_t mmap_size = 0;
1176    if (info.blob_mem) {
1177       /* must be VIRTGPU_BLOB_MEM_HOST3D or VIRTGPU_BLOB_MEM_GUEST_VRAM */
1178       if (info.blob_mem != gpu->bo_blob_mem)
1179          goto fail;
1180 
1181       blob_flags |= virtgpu_bo_blob_flags(gpu, flags, 0);
1182 
1183       /* mmap_size is only used when mappable */
1184       mmap_size = 0;
1185       if (blob_flags & VIRTGPU_BLOB_FLAG_USE_MAPPABLE) {
1186          if (info.size < size)
1187             goto fail;
1188 
1189          mmap_size = size;
1190       }
1191    }
1192 
1193    /* we check bo->gem_handle instead of bo->refcount because bo->refcount
1194     * might only be memset to 0 and is not considered initialized in theory
1195     */
1196    if (bo->gem_handle == gem_handle) {
1197       if (bo->base.mmap_size < mmap_size)
1198          goto fail;
1199       if (blob_flags & ~bo->blob_flags)
1200          goto fail;
1201 
1202       /* we can't use vn_renderer_bo_ref as the refcount may drop to 0
1203        * temporarily before virtgpu_bo_destroy grabs the lock
1204        */
1205       vn_refcount_fetch_add_relaxed(&bo->base.refcount, 1);
1206    } else {
1207       *bo = (struct virtgpu_bo){
1208          .base = {
1209             .refcount = VN_REFCOUNT_INIT(1),
1210             .res_id = info.res_handle,
1211             .mmap_size = mmap_size,
1212          },
1213          .gem_handle = gem_handle,
1214          .blob_flags = blob_flags,
1215       };
1216    }
1217 
1218    mtx_unlock(&gpu->dma_buf_import_mutex);
1219 
1220    *out_bo = &bo->base;
1221 
1222    return VK_SUCCESS;
1223 
1224 fail:
1225    if (gem_handle && bo->gem_handle != gem_handle)
1226       virtgpu_ioctl_gem_close(gpu, gem_handle);
1227    mtx_unlock(&gpu->dma_buf_import_mutex);
1228    return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1229 }
1230 
1231 static VkResult
virtgpu_bo_create_from_device_memory(struct vn_renderer * renderer,VkDeviceSize size,vn_object_id mem_id,VkMemoryPropertyFlags flags,VkExternalMemoryHandleTypeFlags external_handles,struct vn_renderer_bo ** out_bo)1232 virtgpu_bo_create_from_device_memory(
1233    struct vn_renderer *renderer,
1234    VkDeviceSize size,
1235    vn_object_id mem_id,
1236    VkMemoryPropertyFlags flags,
1237    VkExternalMemoryHandleTypeFlags external_handles,
1238    struct vn_renderer_bo **out_bo)
1239 {
1240    struct virtgpu *gpu = (struct virtgpu *)renderer;
1241    const uint32_t blob_flags = virtgpu_bo_blob_flags(gpu, flags, external_handles);
1242 
1243    uint32_t res_id;
1244    uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1245       gpu, gpu->bo_blob_mem, blob_flags, size, mem_id, &res_id);
1246    if (!gem_handle)
1247       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1248 
1249    struct virtgpu_bo *bo = util_sparse_array_get(&gpu->bo_array, gem_handle);
1250    *bo = (struct virtgpu_bo){
1251       .base = {
1252          .refcount = VN_REFCOUNT_INIT(1),
1253          .res_id = res_id,
1254          .mmap_size = size,
1255       },
1256       .gem_handle = gem_handle,
1257       .blob_flags = blob_flags,
1258    };
1259 
1260    *out_bo = &bo->base;
1261 
1262    return VK_SUCCESS;
1263 }
1264 
1265 static void
virtgpu_shmem_destroy_now(struct vn_renderer * renderer,struct vn_renderer_shmem * _shmem)1266 virtgpu_shmem_destroy_now(struct vn_renderer *renderer,
1267                           struct vn_renderer_shmem *_shmem)
1268 {
1269    struct virtgpu *gpu = (struct virtgpu *)renderer;
1270    struct virtgpu_shmem *shmem = (struct virtgpu_shmem *)_shmem;
1271 
1272    munmap(shmem->base.mmap_ptr, shmem->base.mmap_size);
1273    virtgpu_ioctl_gem_close(gpu, shmem->gem_handle);
1274 }
1275 
1276 static void
virtgpu_shmem_destroy(struct vn_renderer * renderer,struct vn_renderer_shmem * shmem)1277 virtgpu_shmem_destroy(struct vn_renderer *renderer,
1278                       struct vn_renderer_shmem *shmem)
1279 {
1280    struct virtgpu *gpu = (struct virtgpu *)renderer;
1281 
1282    if (vn_renderer_shmem_cache_add(&gpu->shmem_cache, shmem))
1283       return;
1284 
1285    virtgpu_shmem_destroy_now(&gpu->base, shmem);
1286 }
1287 
1288 static struct vn_renderer_shmem *
virtgpu_shmem_create(struct vn_renderer * renderer,size_t size)1289 virtgpu_shmem_create(struct vn_renderer *renderer, size_t size)
1290 {
1291    struct virtgpu *gpu = (struct virtgpu *)renderer;
1292 
1293    struct vn_renderer_shmem *cached_shmem =
1294       vn_renderer_shmem_cache_get(&gpu->shmem_cache, size);
1295    if (cached_shmem) {
1296       cached_shmem->refcount = VN_REFCOUNT_INIT(1);
1297       return cached_shmem;
1298    }
1299 
1300    uint32_t res_id;
1301    uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(
1302       gpu, gpu->shmem_blob_mem, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0,
1303       &res_id);
1304    if (!gem_handle)
1305       return NULL;
1306 
1307    void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size);
1308    if (!ptr) {
1309       virtgpu_ioctl_gem_close(gpu, gem_handle);
1310       return NULL;
1311    }
1312 
1313    struct virtgpu_shmem *shmem =
1314       util_sparse_array_get(&gpu->shmem_array, gem_handle);
1315    *shmem = (struct virtgpu_shmem){
1316       .base = {
1317          .refcount = VN_REFCOUNT_INIT(1),
1318          .res_id = res_id,
1319          .mmap_size = size,
1320          .mmap_ptr = ptr,
1321       },
1322       .gem_handle = gem_handle,
1323    };
1324 
1325    return &shmem->base;
1326 }
1327 
1328 static VkResult
virtgpu_wait(struct vn_renderer * renderer,const struct vn_renderer_wait * wait)1329 virtgpu_wait(struct vn_renderer *renderer,
1330              const struct vn_renderer_wait *wait)
1331 {
1332    struct virtgpu *gpu = (struct virtgpu *)renderer;
1333 
1334    const int ret = virtgpu_ioctl_syncobj_timeline_wait(gpu, wait, false);
1335    if (ret && errno != ETIME)
1336       return VK_ERROR_DEVICE_LOST;
1337 
1338    return ret ? VK_TIMEOUT : VK_SUCCESS;
1339 }
1340 
1341 static VkResult
virtgpu_submit(struct vn_renderer * renderer,const struct vn_renderer_submit * submit)1342 virtgpu_submit(struct vn_renderer *renderer,
1343                const struct vn_renderer_submit *submit)
1344 {
1345    struct virtgpu *gpu = (struct virtgpu *)renderer;
1346 
1347    const int ret = virtgpu_ioctl_submit(gpu, submit);
1348    return ret ? VK_ERROR_DEVICE_LOST : VK_SUCCESS;
1349 }
1350 
1351 static void
virtgpu_init_renderer_info(struct virtgpu * gpu)1352 virtgpu_init_renderer_info(struct virtgpu *gpu)
1353 {
1354    struct vn_renderer_info *info = &gpu->base.info;
1355 
1356    info->drm.props = (VkPhysicalDeviceDrmPropertiesEXT){
1357       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT,
1358       .hasPrimary = gpu->has_primary,
1359       .hasRender = true,
1360       .primaryMajor = gpu->primary_major,
1361       .primaryMinor = gpu->primary_minor,
1362       .renderMajor = gpu->render_major,
1363       .renderMinor = gpu->render_minor,
1364    };
1365 
1366    info->pci.vendor_id = VIRTGPU_PCI_VENDOR_ID;
1367    info->pci.device_id = VIRTGPU_PCI_DEVICE_ID;
1368 
1369    if (gpu->bustype == DRM_BUS_PCI) {
1370       info->pci.has_bus_info = true;
1371       info->pci.props = (VkPhysicalDevicePCIBusInfoPropertiesEXT){
1372          .sType =
1373             VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT,
1374          .pciDomain = gpu->pci_bus_info.domain,
1375          .pciBus = gpu->pci_bus_info.bus,
1376          .pciDevice = gpu->pci_bus_info.dev,
1377          .pciFunction = gpu->pci_bus_info.func,
1378       };
1379    }
1380 
1381    info->has_dma_buf_import = true;
1382    /* TODO switch from emulation to drm_syncobj */
1383    info->has_external_sync = true;
1384 
1385    info->has_implicit_fencing = false;
1386 
1387    const struct virgl_renderer_capset_venus *capset = &gpu->capset.data;
1388    info->wire_format_version = capset->wire_format_version;
1389    info->vk_xml_version = capset->vk_xml_version;
1390    info->vk_ext_command_serialization_spec_version =
1391       capset->vk_ext_command_serialization_spec_version;
1392    info->vk_mesa_venus_protocol_spec_version =
1393       capset->vk_mesa_venus_protocol_spec_version;
1394    assert(capset->supports_blob_id_0);
1395 
1396    /* ensure vk_extension_mask is large enough to hold all capset masks */
1397    STATIC_ASSERT(sizeof(info->vk_extension_mask) >=
1398                  sizeof(capset->vk_extension_mask1));
1399    memcpy(info->vk_extension_mask, capset->vk_extension_mask1,
1400           sizeof(capset->vk_extension_mask1));
1401 
1402    assert(capset->allow_vk_wait_syncs);
1403 
1404    assert(capset->supports_multiple_timelines);
1405    info->max_timeline_count = gpu->max_timeline_count;
1406 
1407    if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_GUEST_VRAM)
1408       info->has_guest_vram = true;
1409 
1410    /* Use guest blob allocations from dedicated heap (Host visible memory) */
1411    if (gpu->bo_blob_mem == VIRTGPU_BLOB_MEM_HOST3D && capset->use_guest_vram)
1412       info->has_guest_vram = true;
1413 }
1414 
1415 static void
virtgpu_destroy(struct vn_renderer * renderer,const VkAllocationCallbacks * alloc)1416 virtgpu_destroy(struct vn_renderer *renderer,
1417                 const VkAllocationCallbacks *alloc)
1418 {
1419    struct virtgpu *gpu = (struct virtgpu *)renderer;
1420 
1421    vn_renderer_shmem_cache_fini(&gpu->shmem_cache);
1422 
1423    if (gpu->fd >= 0)
1424       close(gpu->fd);
1425 
1426    mtx_destroy(&gpu->dma_buf_import_mutex);
1427 
1428    util_sparse_array_finish(&gpu->shmem_array);
1429    util_sparse_array_finish(&gpu->bo_array);
1430 
1431    vk_free(alloc, gpu);
1432 }
1433 
1434 static inline void
virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu * gpu)1435 virtgpu_init_shmem_blob_mem(ASSERTED struct virtgpu *gpu)
1436 {
1437    /* VIRTGPU_BLOB_MEM_GUEST allocates from the guest system memory.  They are
1438     * logically contiguous in the guest but are sglists (iovecs) in the host.
1439     * That makes them slower to process in the host.  With host process
1440     * isolation, it also becomes impossible for the host to access sglists
1441     * directly.
1442     *
1443     * While there are ideas (and shipped code in some cases) such as creating
1444     * udmabufs from sglists, or having a dedicated guest heap, it seems the
1445     * easiest way is to reuse VIRTGPU_BLOB_MEM_HOST3D.  That is, when the
1446     * renderer sees a request to export a blob where
1447     *
1448     *  - blob_mem is VIRTGPU_BLOB_MEM_HOST3D
1449     *  - blob_flags is VIRTGPU_BLOB_FLAG_USE_MAPPABLE
1450     *  - blob_id is 0
1451     *
1452     * it allocates a host shmem.
1453     *
1454     * supports_blob_id_0 has been enforced by mandated render server config.
1455     */
1456    assert(gpu->capset.data.supports_blob_id_0);
1457    gpu->shmem_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1458 }
1459 
1460 static VkResult
virtgpu_init_context(struct virtgpu * gpu)1461 virtgpu_init_context(struct virtgpu *gpu)
1462 {
1463    assert(!gpu->capset.version);
1464    const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id);
1465    if (ret) {
1466       if (VN_DEBUG(INIT)) {
1467          vn_log(gpu->instance, "failed to initialize context: %s",
1468                 strerror(errno));
1469       }
1470       return VK_ERROR_INITIALIZATION_FAILED;
1471    }
1472 
1473    return VK_SUCCESS;
1474 }
1475 
1476 static VkResult
virtgpu_init_capset(struct virtgpu * gpu)1477 virtgpu_init_capset(struct virtgpu *gpu)
1478 {
1479    gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS;
1480    gpu->capset.version = 0;
1481 
1482    const int ret =
1483       virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version,
1484                              &gpu->capset.data, sizeof(gpu->capset.data));
1485    if (ret) {
1486       if (VN_DEBUG(INIT)) {
1487          vn_log(gpu->instance, "failed to get venus v%d capset: %s",
1488                 gpu->capset.version, strerror(errno));
1489       }
1490       return VK_ERROR_INITIALIZATION_FAILED;
1491    }
1492 
1493    return VK_SUCCESS;
1494 }
1495 
1496 static VkResult
virtgpu_init_params(struct virtgpu * gpu)1497 virtgpu_init_params(struct virtgpu *gpu)
1498 {
1499    const uint64_t required_params[] = {
1500       VIRTGPU_PARAM_3D_FEATURES,   VIRTGPU_PARAM_CAPSET_QUERY_FIX,
1501       VIRTGPU_PARAM_RESOURCE_BLOB, VIRTGPU_PARAM_CONTEXT_INIT,
1502    };
1503    uint64_t val;
1504    for (uint32_t i = 0; i < ARRAY_SIZE(required_params); i++) {
1505       val = virtgpu_ioctl_getparam(gpu, required_params[i]);
1506       if (!val) {
1507          if (VN_DEBUG(INIT)) {
1508             vn_log(gpu->instance, "required kernel param %d is missing",
1509                    (int)required_params[i]);
1510          }
1511          return VK_ERROR_INITIALIZATION_FAILED;
1512       }
1513    }
1514 
1515    val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_HOST_VISIBLE);
1516    if (val) {
1517       gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_HOST3D;
1518    } else {
1519       val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_GUEST_VRAM);
1520       if (val) {
1521          gpu->bo_blob_mem = VIRTGPU_BLOB_MEM_GUEST_VRAM;
1522       }
1523    }
1524 
1525    if (!val) {
1526       vn_log(gpu->instance,
1527              "one of required kernel params (%d or %d) is missing",
1528              (int)VIRTGPU_PARAM_HOST_VISIBLE, (int)VIRTGPU_PARAM_GUEST_VRAM);
1529       return VK_ERROR_INITIALIZATION_FAILED;
1530    }
1531 
1532    /* Cross-device feature is optional.  It enables sharing dma-bufs
1533     * with other virtio devices, like virtio-wl or virtio-video used
1534     * by ChromeOS VMs.  Qemu doesn't support cross-device sharing.
1535     */
1536    val = virtgpu_ioctl_getparam(gpu, VIRTGPU_PARAM_CROSS_DEVICE);
1537    if (val)
1538       gpu->supports_cross_device = true;
1539 
1540    /* implied by CONTEXT_INIT uapi */
1541    gpu->max_timeline_count = 64;
1542 
1543    return VK_SUCCESS;
1544 }
1545 
1546 static VkResult
virtgpu_open_device(struct virtgpu * gpu,const drmDevicePtr dev)1547 virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev)
1548 {
1549    bool supported_bus = false;
1550 
1551    switch (dev->bustype) {
1552    case DRM_BUS_PCI:
1553       if (dev->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID &&
1554           dev->deviceinfo.pci->device_id == VIRTGPU_PCI_DEVICE_ID)
1555          supported_bus = true;
1556       break;
1557    case DRM_BUS_PLATFORM:
1558       supported_bus = true;
1559       break;
1560    default:
1561       break;
1562    }
1563 
1564    if (!supported_bus || !(dev->available_nodes & (1 << DRM_NODE_RENDER))) {
1565       if (VN_DEBUG(INIT)) {
1566          const char *name = "unknown";
1567          for (uint32_t i = 0; i < DRM_NODE_MAX; i++) {
1568             if (dev->available_nodes & (1 << i)) {
1569                name = dev->nodes[i];
1570                break;
1571             }
1572          }
1573          vn_log(gpu->instance, "skipping DRM device %s", name);
1574       }
1575       return VK_ERROR_INITIALIZATION_FAILED;
1576    }
1577 
1578    const char *primary_path = dev->nodes[DRM_NODE_PRIMARY];
1579    const char *node_path = dev->nodes[DRM_NODE_RENDER];
1580 
1581    int fd = open(node_path, O_RDWR | O_CLOEXEC);
1582    if (fd < 0) {
1583       if (VN_DEBUG(INIT))
1584          vn_log(gpu->instance, "failed to open %s", node_path);
1585       return VK_ERROR_INITIALIZATION_FAILED;
1586    }
1587 
1588    drmVersionPtr version = drmGetVersion(fd);
1589    if (!version || strcmp(version->name, "virtio_gpu") ||
1590        version->version_major != 0) {
1591       if (VN_DEBUG(INIT)) {
1592          if (version) {
1593             vn_log(gpu->instance, "unknown DRM driver %s version %d",
1594                    version->name, version->version_major);
1595          } else {
1596             vn_log(gpu->instance, "failed to get DRM driver version");
1597          }
1598       }
1599       if (version)
1600          drmFreeVersion(version);
1601       close(fd);
1602       return VK_ERROR_INITIALIZATION_FAILED;
1603    }
1604 
1605    gpu->fd = fd;
1606 
1607    struct stat st;
1608    if (stat(primary_path, &st) == 0) {
1609       gpu->has_primary = true;
1610       gpu->primary_major = major(st.st_rdev);
1611       gpu->primary_minor = minor(st.st_rdev);
1612    } else {
1613       gpu->has_primary = false;
1614       gpu->primary_major = 0;
1615       gpu->primary_minor = 0;
1616    }
1617    stat(node_path, &st);
1618    gpu->render_major = major(st.st_rdev);
1619    gpu->render_minor = minor(st.st_rdev);
1620 
1621    gpu->bustype = dev->bustype;
1622    if (dev->bustype == DRM_BUS_PCI)
1623       gpu->pci_bus_info = *dev->businfo.pci;
1624 
1625    drmFreeVersion(version);
1626 
1627    if (VN_DEBUG(INIT))
1628       vn_log(gpu->instance, "using DRM device %s", node_path);
1629 
1630    return VK_SUCCESS;
1631 }
1632 
1633 static VkResult
virtgpu_open(struct virtgpu * gpu)1634 virtgpu_open(struct virtgpu *gpu)
1635 {
1636    drmDevicePtr devs[8];
1637    int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs));
1638    if (count < 0) {
1639       if (VN_DEBUG(INIT))
1640          vn_log(gpu->instance, "failed to enumerate DRM devices");
1641       return VK_ERROR_INITIALIZATION_FAILED;
1642    }
1643 
1644    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1645    for (int i = 0; i < count; i++) {
1646       result = virtgpu_open_device(gpu, devs[i]);
1647       if (result == VK_SUCCESS)
1648          break;
1649    }
1650 
1651    drmFreeDevices(devs, count);
1652 
1653    return result;
1654 }
1655 
1656 static VkResult
virtgpu_init(struct virtgpu * gpu)1657 virtgpu_init(struct virtgpu *gpu)
1658 {
1659    util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem),
1660                           1024);
1661    util_sparse_array_init(&gpu->bo_array, sizeof(struct virtgpu_bo), 1024);
1662 
1663    mtx_init(&gpu->dma_buf_import_mutex, mtx_plain);
1664 
1665    VkResult result = virtgpu_open(gpu);
1666    if (result == VK_SUCCESS)
1667       result = virtgpu_init_params(gpu);
1668    if (result == VK_SUCCESS)
1669       result = virtgpu_init_capset(gpu);
1670    if (result == VK_SUCCESS)
1671       result = virtgpu_init_context(gpu);
1672    if (result != VK_SUCCESS)
1673       return result;
1674 
1675    virtgpu_init_shmem_blob_mem(gpu);
1676 
1677    vn_renderer_shmem_cache_init(&gpu->shmem_cache, &gpu->base,
1678                                 virtgpu_shmem_destroy_now);
1679 
1680    virtgpu_init_renderer_info(gpu);
1681 
1682    gpu->base.ops.destroy = virtgpu_destroy;
1683    gpu->base.ops.submit = virtgpu_submit;
1684    gpu->base.ops.wait = virtgpu_wait;
1685 
1686    gpu->base.shmem_ops.create = virtgpu_shmem_create;
1687    gpu->base.shmem_ops.destroy = virtgpu_shmem_destroy;
1688 
1689    gpu->base.bo_ops.create_from_device_memory =
1690       virtgpu_bo_create_from_device_memory;
1691    gpu->base.bo_ops.create_from_dma_buf = virtgpu_bo_create_from_dma_buf;
1692    gpu->base.bo_ops.destroy = virtgpu_bo_destroy;
1693    gpu->base.bo_ops.export_dma_buf = virtgpu_bo_export_dma_buf;
1694    gpu->base.bo_ops.map = virtgpu_bo_map;
1695    gpu->base.bo_ops.flush = virtgpu_bo_flush;
1696    gpu->base.bo_ops.invalidate = virtgpu_bo_invalidate;
1697 
1698    gpu->base.sync_ops.create = virtgpu_sync_create;
1699    gpu->base.sync_ops.create_from_syncobj = virtgpu_sync_create_from_syncobj;
1700    gpu->base.sync_ops.destroy = virtgpu_sync_destroy;
1701    gpu->base.sync_ops.export_syncobj = virtgpu_sync_export_syncobj;
1702    gpu->base.sync_ops.reset = virtgpu_sync_reset;
1703    gpu->base.sync_ops.read = virtgpu_sync_read;
1704    gpu->base.sync_ops.write = virtgpu_sync_write;
1705 
1706    return VK_SUCCESS;
1707 }
1708 
1709 VkResult
vn_renderer_create_virtgpu(struct vn_instance * instance,const VkAllocationCallbacks * alloc,struct vn_renderer ** renderer)1710 vn_renderer_create_virtgpu(struct vn_instance *instance,
1711                            const VkAllocationCallbacks *alloc,
1712                            struct vn_renderer **renderer)
1713 {
1714    struct virtgpu *gpu = vk_zalloc(alloc, sizeof(*gpu), VN_DEFAULT_ALIGN,
1715                                    VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1716    if (!gpu)
1717       return VK_ERROR_OUT_OF_HOST_MEMORY;
1718 
1719    gpu->instance = instance;
1720    gpu->fd = -1;
1721 
1722    VkResult result = virtgpu_init(gpu);
1723    if (result != VK_SUCCESS) {
1724       virtgpu_destroy(&gpu->base, alloc);
1725       return result;
1726    }
1727 
1728    *renderer = &gpu->base;
1729 
1730    return VK_SUCCESS;
1731 }
1732