1 /*
2 * Copyright © 2018 Google, Inc.
3 * Copyright © 2015 Intel Corporation
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <fcntl.h>
8 #include <sys/mman.h>
9 #include <xf86drm.h>
10
11 #include "tu_knl_drm.h"
12 #include "tu_device.h"
13 #include "tu_rmv.h"
14
15 VkResult
tu_allocate_userspace_iova(struct tu_device * dev,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)16 tu_allocate_userspace_iova(struct tu_device *dev,
17 uint64_t size,
18 uint64_t client_iova,
19 enum tu_bo_alloc_flags flags,
20 uint64_t *iova)
21 {
22 *iova = 0;
23
24 if (flags & TU_BO_ALLOC_REPLAYABLE) {
25 if (client_iova) {
26 if (util_vma_heap_alloc_addr(&dev->vma, client_iova, size)) {
27 *iova = client_iova;
28 } else {
29 return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
30 }
31 } else {
32 /* We have to separate replayable IOVAs from ordinary one in order to
33 * for them not to clash. The easiest way to do this is to allocate
34 * them from the other end of the address space.
35 */
36 dev->vma.alloc_high = true;
37 *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
38 }
39 } else {
40 dev->vma.alloc_high = false;
41 *iova = util_vma_heap_alloc(&dev->vma, size, os_page_size);
42 }
43
44 if (!*iova)
45 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
46
47 return VK_SUCCESS;
48 }
49
50 int
tu_drm_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)51 tu_drm_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
52 {
53 int prime_fd;
54 int ret = drmPrimeHandleToFD(dev->fd, bo->gem_handle,
55 DRM_CLOEXEC | DRM_RDWR, &prime_fd);
56
57 return ret == 0 ? prime_fd : -1;
58 }
59
60 void
tu_drm_bo_finish(struct tu_device * dev,struct tu_bo * bo)61 tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo)
62 {
63 assert(bo->gem_handle);
64
65 u_rwlock_rdlock(&dev->dma_bo_lock);
66
67 if (!p_atomic_dec_zero(&bo->refcnt)) {
68 u_rwlock_rdunlock(&dev->dma_bo_lock);
69 return;
70 }
71
72 if (bo->map) {
73 TU_RMV(bo_unmap, dev, bo);
74 munmap(bo->map, bo->size);
75 }
76
77 TU_RMV(bo_destroy, dev, bo);
78 tu_debug_bos_del(dev, bo);
79
80 mtx_lock(&dev->bo_mutex);
81 dev->bo_count--;
82 dev->bo_list[bo->bo_list_idx] = dev->bo_list[dev->bo_count];
83
84 struct tu_bo* exchanging_bo = tu_device_lookup_bo(dev, dev->bo_list[bo->bo_list_idx].handle);
85 exchanging_bo->bo_list_idx = bo->bo_list_idx;
86
87 if (bo->implicit_sync)
88 dev->implicit_sync_bo_count--;
89
90 mtx_unlock(&dev->bo_mutex);
91
92 if (dev->physical_device->has_set_iova) {
93 mtx_lock(&dev->vma_mutex);
94 struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
95 u_vector_add(&dev->zombie_vmas);
96 vma->gem_handle = bo->gem_handle;
97 #ifdef TU_HAS_VIRTIO
98 vma->res_id = bo->res_id;
99 #endif
100 vma->iova = bo->iova;
101 vma->size = bo->size;
102 vma->fence = p_atomic_read(&dev->queues[0]->fence);
103
104 /* Must be cleared under the VMA mutex, or another thread could race to
105 * reap the VMA, closing the BO and letting a new GEM allocation produce
106 * this handle again.
107 */
108 memset(bo, 0, sizeof(*bo));
109 mtx_unlock(&dev->vma_mutex);
110 } else {
111 /* Our BO structs are stored in a sparse array in the physical device,
112 * so we don't want to free the BO pointer, instead we want to reset it
113 * to 0, to signal that array entry as being free.
114 */
115 uint32_t gem_handle = bo->gem_handle;
116 memset(bo, 0, sizeof(*bo));
117
118 /* Note that virtgpu GEM_CLOSE path is a bit different, but it does
119 * not use the !has_set_iova path so we can ignore that
120 */
121 struct drm_gem_close req = {
122 .handle = gem_handle,
123 };
124
125 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
126 }
127
128 u_rwlock_rdunlock(&dev->dma_bo_lock);
129 }
130
131 uint32_t
tu_syncobj_from_vk_sync(struct vk_sync * sync)132 tu_syncobj_from_vk_sync(struct vk_sync *sync)
133 {
134 uint32_t syncobj = -1;
135 if (vk_sync_is_tu_timeline_sync(sync)) {
136 syncobj = to_tu_timeline_sync(sync)->syncobj;
137 } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
138 syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
139 }
140
141 assert(syncobj != -1);
142
143 return syncobj;
144 }
145
146 static VkResult
tu_timeline_sync_init(struct vk_device * vk_device,struct vk_sync * vk_sync,uint64_t initial_value)147 tu_timeline_sync_init(struct vk_device *vk_device,
148 struct vk_sync *vk_sync,
149 uint64_t initial_value)
150 {
151 struct tu_device *device = container_of(vk_device, struct tu_device, vk);
152 struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
153 uint32_t flags = 0;
154
155 assert(device->fd >= 0);
156
157 int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
158
159 if (err < 0) {
160 return vk_error(device, VK_ERROR_DEVICE_LOST);
161 }
162
163 sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
164 TU_TIMELINE_SYNC_STATE_RESET;
165
166 return VK_SUCCESS;
167 }
168
169 static void
tu_timeline_sync_finish(struct vk_device * vk_device,struct vk_sync * vk_sync)170 tu_timeline_sync_finish(struct vk_device *vk_device,
171 struct vk_sync *vk_sync)
172 {
173 struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
174 struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
175
176 assert(dev->fd >= 0);
177 ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
178 assert(err == 0);
179 }
180
181 static VkResult
tu_timeline_sync_reset(struct vk_device * vk_device,struct vk_sync * vk_sync)182 tu_timeline_sync_reset(struct vk_device *vk_device,
183 struct vk_sync *vk_sync)
184 {
185 struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
186 struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
187
188 int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
189 if (err) {
190 return vk_errorf(dev, VK_ERROR_UNKNOWN,
191 "DRM_IOCTL_SYNCOBJ_RESET failed: %m");
192 } else {
193 sync->state = TU_TIMELINE_SYNC_STATE_RESET;
194 }
195
196 return VK_SUCCESS;
197 }
198
199 static VkResult
drm_syncobj_wait(struct tu_device * device,uint32_t * handles,uint32_t count_handles,uint64_t timeout_nsec,bool wait_all)200 drm_syncobj_wait(struct tu_device *device,
201 uint32_t *handles, uint32_t count_handles,
202 uint64_t timeout_nsec, bool wait_all)
203 {
204 uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
205 if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
206
207 /* syncobj absolute timeouts are signed. clamp OS_TIMEOUT_INFINITE down. */
208 timeout_nsec = MIN2(timeout_nsec, (uint64_t)INT64_MAX);
209
210 int err = drmSyncobjWait(device->fd, handles,
211 count_handles, timeout_nsec,
212 syncobj_wait_flags,
213 NULL /* first_signaled */);
214 if (err && errno == ETIME) {
215 return VK_TIMEOUT;
216 } else if (err) {
217 return vk_errorf(device, VK_ERROR_UNKNOWN,
218 "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
219 }
220
221 return VK_SUCCESS;
222 }
223
224 /* Based on anv_bo_sync_wait */
225 static VkResult
tu_timeline_sync_wait(struct vk_device * vk_device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)226 tu_timeline_sync_wait(struct vk_device *vk_device,
227 uint32_t wait_count,
228 const struct vk_sync_wait *waits,
229 enum vk_sync_wait_flags wait_flags,
230 uint64_t abs_timeout_ns)
231 {
232 struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
233 bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
234
235 uint32_t handles[wait_count];
236 uint32_t submit_count;
237 VkResult ret = VK_SUCCESS;
238 uint32_t pending = wait_count;
239 struct tu_timeline_sync *submitted_syncs[wait_count];
240
241 while (pending) {
242 pending = 0;
243 submit_count = 0;
244
245 for (unsigned i = 0; i < wait_count; ++i) {
246 struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
247
248 if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
249 assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
250 pending++;
251 } else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
252 if (wait_flags & VK_SYNC_WAIT_ANY)
253 return VK_SUCCESS;
254 } else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
255 if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
256 handles[submit_count] = sync->syncobj;
257 submitted_syncs[submit_count++] = sync;
258 }
259 }
260 }
261
262 if (submit_count > 0) {
263 do {
264 ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
265 } while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
266
267 if (ret == VK_SUCCESS) {
268 for (unsigned i = 0; i < submit_count; ++i) {
269 struct tu_timeline_sync *sync = submitted_syncs[i];
270 sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
271 }
272 } else {
273 /* return error covering timeout */
274 return ret;
275 }
276 } else if (pending > 0) {
277 /* If we've hit this then someone decided to vkWaitForFences before
278 * they've actually submitted any of them to a queue. This is a
279 * fairly pessimal case, so it's ok to lock here and use a standard
280 * pthreads condition variable.
281 */
282 pthread_mutex_lock(&dev->submit_mutex);
283
284 /* It's possible that some of the fences have changed state since the
285 * last time we checked. Now that we have the lock, check for
286 * pending fences again and don't wait if it's changed.
287 */
288 uint32_t now_pending = 0;
289 for (uint32_t i = 0; i < wait_count; i++) {
290 struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
291 if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
292 now_pending++;
293 }
294 assert(now_pending <= pending);
295
296 if (now_pending == pending) {
297 struct timespec abstime = {
298 .tv_sec = abs_timeout_ns / NSEC_PER_SEC,
299 .tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
300 };
301
302 ASSERTED int ret;
303 ret = pthread_cond_timedwait(&dev->timeline_cond,
304 &dev->submit_mutex, &abstime);
305 assert(ret != EINVAL);
306 if (os_time_get_nano() >= abs_timeout_ns) {
307 pthread_mutex_unlock(&dev->submit_mutex);
308 return VK_TIMEOUT;
309 }
310 }
311
312 pthread_mutex_unlock(&dev->submit_mutex);
313 }
314 }
315
316 return ret;
317 }
318
319 const struct vk_sync_type tu_timeline_sync_type = {
320 .size = sizeof(struct tu_timeline_sync),
321 .features = (enum vk_sync_features)(
322 VK_SYNC_FEATURE_BINARY | VK_SYNC_FEATURE_GPU_WAIT |
323 VK_SYNC_FEATURE_GPU_MULTI_WAIT | VK_SYNC_FEATURE_CPU_WAIT |
324 VK_SYNC_FEATURE_CPU_RESET | VK_SYNC_FEATURE_WAIT_ANY |
325 VK_SYNC_FEATURE_WAIT_PENDING),
326 .init = tu_timeline_sync_init,
327 .finish = tu_timeline_sync_finish,
328 .reset = tu_timeline_sync_reset,
329 .wait_many = tu_timeline_sync_wait,
330 };
331