xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_knl_drm.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include <fcntl.h>
8 #include <sys/mman.h>
9 #include <xf86drm.h>
10 
11 #include "tu_knl_drm.h"
12 #include "tu_device.h"
13 #include "tu_rmv.h"
14 
15 VkResult
tu_allocate_userspace_iova(struct tu_device * dev,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)16 tu_allocate_userspace_iova(struct tu_device *dev,
17                            uint64_t size,
18                            uint64_t client_iova,
19                            enum tu_bo_alloc_flags flags,
20                            uint64_t *iova)
21 {
22    *iova = 0;
23 
24    if (flags & TU_BO_ALLOC_REPLAYABLE) {
25       if (client_iova) {
26          if (util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
27             *iova = client_iova;
28          } else {
29             return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
30          }
31       } else {
32          /* We have to separate replayable IOVAs from ordinary one in order to
33           * for them not to clash. The easiest way to do this is to allocate
34           * them from the other end of the address space.
35           */
36          dev->vma.alloc_high = true;
37          *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
38       }
39    } else {
40       dev->vma.alloc_high = false;
41       *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
42    }
43 
44    if (!*iova)
45       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
46 
47    return VK_SUCCESS;
48 }
49 
50 int
tu_drm_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)51 tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
52 {
53    int prime_fd;
54    int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
55                                 DRM_CLOEXEC | DRM_RDWR, &prime_fd);
56 
57    return ret == 0 ? prime_fd : -1;
58 }
59 
60 void
tu_drm_bo_finish(struct tu_device * dev,struct tu_bo * bo)61 tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
62 {
63    assert(bo->gem_handle);
64 
65    u_rwlock_rdlock(&dev->dma_bo_lock);
66 
67    if (!p_atomic_dec_zero(&bo->refcnt)) {
68       u_rwlock_rdunlock(&dev->dma_bo_lock);
69       return;
70    }
71 
72    if (bo->map) {
73       TU_RMV(bo_unmap, dev, bo);
74       munmap(bo->map, bo->size);
75    }
76 
77    TU_RMV(bo_destroy, dev, bo);
78    tu_debug_bos_del(dev, bo);
79 
80    mtx_lock(&dev->bo_mutex);
81    dev->bo_count--;
82    dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
83 
84    struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
85    exchanging_bo->bo_list_idx = bo->bo_list_idx;
86 
87    if (bo->implicit_sync)
88       dev->implicit_sync_bo_count--;
89 
90    mtx_unlock(&dev->bo_mutex);
91 
92    if (dev->physical_device->has_set_iova) {
93       mtx_lock(&dev->vma_mutex);
94       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
95             u_vector_add(&dev->zombie_vmas);
96       vma->gem_handle = bo->gem_handle;
97 #ifdef TU_HAS_VIRTIO
98       vma->res_id = bo->res_id;
99 #endif
100       vma->iova = bo->iova;
101       vma->size = bo->size;
102       vma->fence = p_atomic_read(&dev->queues[0]->fence);
103 
104       /* Must be cleared under the VMA mutex, or another thread could race to
105        * reap the VMA, closing the BO and letting a new GEM allocation produce
106        * this handle again.
107        */
108       memset(bo, 0, sizeof(*bo));
109       mtx_unlock(&dev->vma_mutex);
110    } else {
111       /* Our BO structs are stored in a sparse array in the physical device,
112        * so we don't want to free the BO pointer, instead we want to reset it
113        * to 0, to signal that array entry as being free.
114        */
115       uint32_t gem_handle = bo->gem_handle;
116       memset(bo, 0, sizeof(*bo));
117 
118       /* Note that virtgpu GEM_CLOSE path is a bit different, but it does
119        * not use the !has_set_iova path so we can ignore that
120        */
121       struct drm_gem_close req = {
122          .handle = gem_handle,
123       };
124 
125       drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
126    }
127 
128    u_rwlock_rdunlock(&dev->dma_bo_lock);
129 }
130 
131 uint32_t
tu_syncobj_from_vk_sync(struct vk_sync * sync)132 tu_syncobj_from_vk_sync(struct vk_sync *sync)
133 {
134    uint32_t syncobj = -1;
135    if (vk_sync_is_tu_timeline_sync(sync)) {
136       syncobj = to_tu_timeline_sync(sync)->syncobj;
137    } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
138       syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
139    }
140 
141    assert(syncobj != -1);
142 
143    return syncobj;
144 }
145 
146 static VkResult
tu_timeline_sync_init(struct vk_device * vk_device,struct vk_sync * vk_sync,uint64_t initial_value)147 tu_timeline_sync_init(struct vk_device *vk_device,
148                       struct vk_sync *vk_sync,
149                       uint64_t initial_value)
150 {
151    struct tu_device *device = container_of(vk_device, struct tu_device, vk);
152    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
153    uint32_t flags = 0;
154 
155    assert(device->fd >= 0);
156 
157    int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
158 
159    if (err < 0) {
160         return vk_error(device, VK_ERROR_DEVICE_LOST);
161    }
162 
163    sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
164                                     TU_TIMELINE_SYNC_STATE_RESET;
165 
166    return VK_SUCCESS;
167 }
168 
169 static void
tu_timeline_sync_finish(struct vk_device * vk_device,struct vk_sync * vk_sync)170 tu_timeline_sync_finish(struct vk_device *vk_device,
171                    struct vk_sync *vk_sync)
172 {
173    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
174    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
175 
176    assert(dev->fd >= 0);
177    ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
178    assert(err == 0);
179 }
180 
181 static VkResult
tu_timeline_sync_reset(struct vk_device * vk_device,struct vk_sync * vk_sync)182 tu_timeline_sync_reset(struct vk_device *vk_device,
183                   struct vk_sync *vk_sync)
184 {
185    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
186    struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
187 
188    int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
189    if (err) {
190       return vk_errorf(dev, VK_ERROR_UNKNOWN,
191                        "DRM_IOCTL_SYNCOBJ_RESET failed: %m");
192    } else {
193        sync->state = TU_TIMELINE_SYNC_STATE_RESET;
194    }
195 
196    return VK_SUCCESS;
197 }
198 
199 static VkResult
drm_syncobj_wait(struct tu_device * device,uint32_t * handles,uint32_t count_handles,uint64_t timeout_nsec,bool wait_all)200 drm_syncobj_wait(struct tu_device *device,
201                  uint32_t *handles, uint32_t count_handles,
202                  uint64_t timeout_nsec, bool wait_all)
203 {
204    uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
205    if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
206 
207    /* syncobj absolute timeouts are signed.  clamp OS_TIMEOUT_INFINITE down. */
208    timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
209 
210    int err = drmSyncobjWait(device->fd, handles,
211                             count_handles, timeout_nsec,
212                             syncobj_wait_flags,
213                             NULL /* first_signaled */);
214    if (err && errno == ETIME) {
215       return VK_TIMEOUT;
216    } else if (err) {
217       return vk_errorf(device, VK_ERROR_UNKNOWN,
218                        "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
219    }
220 
221    return VK_SUCCESS;
222 }
223 
224 /* Based on anv_bo_sync_wait */
225 static VkResult
tu_timeline_sync_wait(struct vk_device * vk_device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)226 tu_timeline_sync_wait(struct vk_device *vk_device,
227                  uint32_t wait_count,
228                  const struct vk_sync_wait *waits,
229                  enum vk_sync_wait_flags wait_flags,
230                  uint64_t abs_timeout_ns)
231 {
232    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
233    bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
234 
235    uint32_t handles[wait_count];
236    uint32_t submit_count;
237    VkResult ret = VK_SUCCESS;
238    uint32_t pending = wait_count;
239    struct tu_timeline_sync *submitted_syncs[wait_count];
240 
241    while (pending) {
242       pending = 0;
243       submit_count = 0;
244 
245       for (unsigned i = 0; i < wait_count; ++i) {
246          struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
247 
248          if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
249             assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
250             pending++;
251          } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
252             if (wait_flags & VK_SYNC_WAIT_ANY)
253                return VK_SUCCESS;
254          } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
255             if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
256                handles[submit_count] = sync->syncobj;
257                submitted_syncs[submit_count++] = sync;
258             }
259          }
260       }
261 
262       if (submit_count > 0) {
263          do {
264             ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
265          } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
266 
267          if (ret == VK_SUCCESS) {
268             for (unsigned i = 0; i < submit_count; ++i) {
269                struct tu_timeline_sync *sync = submitted_syncs[i];
270                sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
271             }
272          } else {
273             /* return error covering timeout */
274             return ret;
275          }
276       } else if (pending > 0) {
277          /* If we've hit this then someone decided to vkWaitForFences before
278           * they've actually submitted any of them to a queue.  This is a
279           * fairly pessimal case, so it's ok to lock here and use a standard
280           * pthreads condition variable.
281           */
282          pthread_mutex_lock(&dev->submit_mutex);
283 
284          /* It's possible that some of the fences have changed state since the
285           * last time we checked.  Now that we have the lock, check for
286           * pending fences again and don't wait if it's changed.
287           */
288          uint32_t now_pending = 0;
289          for (uint32_t i = 0; i < wait_count; i++) {
290             struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
291             if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
292                now_pending++;
293          }
294          assert(now_pending <= pending);
295 
296          if (now_pending == pending) {
297             struct timespec abstime = {
298                .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
299                .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
300             };
301 
302             ASSERTED int ret;
303             ret = pthread_cond_timedwait(&dev->timeline_cond,
304                                          &dev->submit_mutex, &abstime);
305             assert(ret != EINVAL);
306             if (os_time_get_nano() >= abs_timeout_ns) {
307                pthread_mutex_unlock(&dev->submit_mutex);
308                return VK_TIMEOUT;
309             }
310          }
311 
312          pthread_mutex_unlock(&dev->submit_mutex);
313       }
314    }
315 
316    return ret;
317 }
318 
319 const struct vk_sync_type tu_timeline_sync_type = {
320    .size = sizeof(struct tu_timeline_sync),
321    .features = (enum vk_sync_features)(
322       VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
323       VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
324       VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
325       VK_SYNC_FEATURE_WAIT_PENDING),
326    .init = tu_timeline_sync_init,
327    .finish = tu_timeline_sync_finish,
328    .reset = tu_timeline_sync_reset,
329    .wait_many = tu_timeline_sync_wait,
330 };
331