xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_knl_drm_msm.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Google, Inc.
3  * Copyright © 2015 Intel Corporation
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "tu_knl.h"
8 
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <sys/ioctl.h>
12 #include <sys/mman.h>
13 #include <xf86drm.h>
14 
15 #include "vk_util.h"
16 
17 #include "drm-uapi/msm_drm.h"
18 #include "util/u_debug.h"
19 #include "util/hash_table.h"
20 
21 #include "tu_cmd_buffer.h"
22 #include "tu_cs.h"
23 #include "tu_device.h"
24 #include "tu_dynamic_rendering.h"
25 #include "tu_knl_drm.h"
26 #include "tu_rmv.h"
27 #include "redump.h"
28 
29 struct tu_msm_queue_submit
30 {
31    struct vk_queue_submit *vk_submit;
32    struct tu_u_trace_submission_data *u_trace_submission_data;
33 
34    struct tu_cmd_buffer **cmd_buffers;
35    struct drm_msm_gem_submit_cmd *cmds;
36    struct drm_msm_gem_submit_syncobj *in_syncobjs;
37    struct drm_msm_gem_submit_syncobj *out_syncobjs;
38 
39    uint32_t nr_cmd_buffers;
40    uint32_t nr_in_syncobjs;
41    uint32_t nr_out_syncobjs;
42    uint32_t entry_count;
43    uint32_t perf_pass_index;
44 
45    bool     autotune_fence;
46 };
47 
48 struct tu_u_trace_syncobj
49 {
50    uint32_t msm_queue_id;
51    uint32_t fence;
52 };
53 
54 static int
tu_drm_get_param(int fd,uint32_t param,uint64_t * value)55 tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
56 {
57    /* Technically this requires a pipe, but the kernel only supports one pipe
58     * anyway at the time of writing and most of these are clearly pipe
59     * independent. */
60    struct drm_msm_param req = {
61       .pipe = MSM_PIPE_3D0,
62       .param = param,
63    };
64 
65    int ret = drmCommandWriteRead(fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
66    if (ret)
67       return ret;
68 
69    *value = req.value;
70 
71    return 0;
72 }
73 
74 static int
tu_drm_get_gpu_id(const struct tu_physical_device * dev,uint32_t * id)75 tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id)
76 {
77    uint64_t value;
78    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GPU_ID, &value);
79    if (ret)
80       return ret;
81 
82    *id = value;
83    return 0;
84 }
85 
86 static int
tu_drm_get_gmem_size(const struct tu_physical_device * dev,uint32_t * size)87 tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size)
88 {
89    uint64_t value;
90    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_SIZE, &value);
91    if (ret)
92       return ret;
93 
94    *size = value;
95    return 0;
96 }
97 
98 static int
tu_drm_get_gmem_base(const struct tu_physical_device * dev,uint64_t * base)99 tu_drm_get_gmem_base(const struct tu_physical_device *dev, uint64_t *base)
100 {
101    return tu_drm_get_param(dev->local_fd, MSM_PARAM_GMEM_BASE, base);
102 }
103 
104 static int
tu_drm_get_va_prop(const struct tu_physical_device * dev,uint64_t * va_start,uint64_t * va_size)105 tu_drm_get_va_prop(const struct tu_physical_device *dev,
106                    uint64_t *va_start, uint64_t *va_size)
107 {
108    uint64_t value;
109    int ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_START, &value);
110    if (ret)
111       return ret;
112 
113    *va_start = value;
114 
115    ret = tu_drm_get_param(dev->local_fd, MSM_PARAM_VA_SIZE, &value);
116    if (ret)
117       return ret;
118 
119    *va_size = value;
120 
121    return 0;
122 }
123 
124 static uint32_t
tu_drm_get_priorities(const struct tu_physical_device * dev)125 tu_drm_get_priorities(const struct tu_physical_device *dev)
126 {
127    uint64_t val = 1;
128    tu_drm_get_param(dev->local_fd, MSM_PARAM_PRIORITIES, &val);
129    assert(val >= 1);
130 
131    return val;
132 }
133 
134 static bool
tu_drm_is_memory_type_supported(int fd,uint32_t flags)135 tu_drm_is_memory_type_supported(int fd, uint32_t flags)
136 {
137    struct drm_msm_gem_new req_alloc = { .size = 0x1000, .flags = flags };
138 
139    int ret =
140       drmCommandWriteRead(fd, DRM_MSM_GEM_NEW, &req_alloc, sizeof(req_alloc));
141    if (ret) {
142       return false;
143    }
144 
145    struct drm_gem_close req_close = {
146       .handle = req_alloc.handle,
147    };
148    drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &req_close);
149 
150    return true;
151 }
152 
153 static VkResult
msm_device_init(struct tu_device * dev)154 msm_device_init(struct tu_device *dev)
155 {
156    int fd = open(dev->physical_device->fd_path, O_RDWR | O_CLOEXEC);
157    if (fd < 0) {
158       return vk_startup_errorf(
159             dev->physical_device->instance, VK_ERROR_INITIALIZATION_FAILED,
160             "failed to open device %s", dev->physical_device->fd_path);
161    }
162 
163    int ret = tu_drm_get_param(fd, MSM_PARAM_FAULTS, &dev->fault_count);
164    if (ret != 0) {
165       close(fd);
166       return vk_startup_errorf(dev->physical_device->instance,
167                                VK_ERROR_INITIALIZATION_FAILED,
168                                "Failed to get initial fault count: %d", ret);
169    }
170 
171    dev->fd = fd;
172 
173    return VK_SUCCESS;
174 }
175 
176 static void
msm_device_finish(struct tu_device * dev)177 msm_device_finish(struct tu_device *dev)
178 {
179    close(dev->fd);
180 }
181 
182 static int
msm_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)183 msm_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
184 {
185    return tu_drm_get_param(dev->fd, MSM_PARAM_TIMESTAMP, ts);
186 }
187 
188 static int
msm_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)189 msm_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
190 {
191    int ret = tu_drm_get_param(dev->fd, MSM_PARAM_SUSPENDS, suspend_count);
192    return ret;
193 }
194 
195 static VkResult
msm_device_check_status(struct tu_device * device)196 msm_device_check_status(struct tu_device *device)
197 {
198    uint64_t last_fault_count = device->fault_count;
199    int ret = tu_drm_get_param(device->fd, MSM_PARAM_FAULTS, &device->fault_count);
200    if (ret != 0)
201       return vk_device_set_lost(&device->vk, "error getting GPU fault count: %d", ret);
202 
203    if (last_fault_count != device->fault_count)
204       return vk_device_set_lost(&device->vk, "GPU faulted or hung");
205 
206    return VK_SUCCESS;
207 }
208 
209 static int
msm_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)210 msm_submitqueue_new(struct tu_device *dev,
211                     int priority,
212                     uint32_t *queue_id)
213 {
214    assert(priority >= 0 &&
215           priority < dev->physical_device->submitqueue_priority_count);
216    struct drm_msm_submitqueue req = {
217       .flags = 0,
218       .prio = priority,
219    };
220 
221    int ret = drmCommandWriteRead(dev->fd,
222                                  DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req));
223    if (ret)
224       return ret;
225 
226    *queue_id = req.id;
227    return 0;
228 }
229 
230 static void
msm_submitqueue_close(struct tu_device * dev,uint32_t queue_id)231 msm_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
232 {
233    drmCommandWrite(dev->fd, DRM_MSM_SUBMITQUEUE_CLOSE,
234                    &queue_id, sizeof(uint32_t));
235 }
236 
237 static void
tu_gem_close(const struct tu_device * dev,uint32_t gem_handle)238 tu_gem_close(const struct tu_device *dev, uint32_t gem_handle)
239 {
240    struct drm_gem_close req = {
241       .handle = gem_handle,
242    };
243 
244    drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
245 }
246 
247 /** Helper for DRM_MSM_GEM_INFO, returns 0 on error. */
248 static uint64_t
tu_gem_info(const struct tu_device * dev,uint32_t gem_handle,uint32_t info)249 tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info)
250 {
251    struct drm_msm_gem_info req = {
252       .handle = gem_handle,
253       .info = info,
254    };
255 
256    int ret = drmCommandWriteRead(dev->fd,
257                                  DRM_MSM_GEM_INFO, &req, sizeof(req));
258    if (ret < 0)
259       return 0;
260 
261    return req.value;
262 }
263 
264 static VkResult
tu_wait_fence(struct tu_device * dev,uint32_t queue_id,int fence,uint64_t timeout_ns)265 tu_wait_fence(struct tu_device *dev,
266               uint32_t queue_id,
267               int fence,
268               uint64_t timeout_ns)
269 {
270    /* fence was created when no work was yet submitted */
271    if (fence < 0)
272       return VK_SUCCESS;
273 
274    struct drm_msm_wait_fence req = {
275       .fence = fence,
276       .queueid = queue_id,
277    };
278    int ret;
279 
280    get_abs_timeout(&req.timeout, timeout_ns);
281 
282    ret = drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &req, sizeof(req));
283    if (ret) {
284       if (ret == -ETIMEDOUT) {
285          return VK_TIMEOUT;
286       } else {
287          mesa_loge("tu_wait_fence failed! %d (%s)", ret, strerror(errno));
288          return VK_ERROR_UNKNOWN;
289       }
290    }
291 
292    return VK_SUCCESS;
293 }
294 
295 static VkResult
tu_free_zombie_vma_locked(struct tu_device * dev,bool wait)296 tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
297 {
298    if (!u_vector_length(&dev->zombie_vmas))
299       return VK_SUCCESS;
300 
301    if (wait) {
302       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
303             u_vector_head(&dev->zombie_vmas);
304       /* Wait for 3s (arbitrary timeout) */
305       VkResult ret = tu_wait_fence(dev, dev->queues[0]->msm_queue_id,
306                                    vma->fence, 3000000000);
307 
308       if (ret != VK_SUCCESS)
309          return ret;
310    }
311 
312    int last_signaled_fence = -1;
313    while (u_vector_length(&dev->zombie_vmas) > 0) {
314       struct tu_zombie_vma *vma = (struct tu_zombie_vma *)
315             u_vector_tail(&dev->zombie_vmas);
316       if (vma->fence > last_signaled_fence) {
317          VkResult ret =
318             tu_wait_fence(dev, dev->queues[0]->msm_queue_id, vma->fence, 0);
319          if (ret != VK_SUCCESS)
320             return ret;
321 
322          last_signaled_fence = vma->fence;
323       }
324 
325       if (vma->gem_handle) {
326          /* Ensure that internal kernel's vma is freed. */
327          struct drm_msm_gem_info req = {
328             .handle = vma->gem_handle,
329             .info = MSM_INFO_SET_IOVA,
330             .value = 0,
331          };
332 
333          int ret =
334             drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
335          if (ret < 0) {
336             mesa_loge("MSM_INFO_SET_IOVA(0) failed! %d (%s)", ret,
337                       strerror(errno));
338             return VK_ERROR_UNKNOWN;
339          }
340 
341          tu_gem_close(dev, vma->gem_handle);
342 
343          util_vma_heap_free(&dev->vma, vma->iova, vma->size);
344       }
345 
346       u_vector_remove(&dev->zombie_vmas);
347    }
348 
349    return VK_SUCCESS;
350 }
351 
352 static bool
tu_restore_from_zombie_vma_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)353 tu_restore_from_zombie_vma_locked(struct tu_device *dev,
354                                   uint32_t gem_handle,
355                                   uint64_t *iova)
356 {
357    struct tu_zombie_vma *vma;
358    u_vector_foreach (vma, &dev->zombie_vmas) {
359       if (vma->gem_handle == gem_handle) {
360          *iova = vma->iova;
361 
362          /* mark to skip later gem and iova cleanup */
363          vma->gem_handle = 0;
364          return true;
365       }
366    }
367 
368    return false;
369 }
370 
371 static VkResult
msm_allocate_userspace_iova_locked(struct tu_device * dev,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,uint64_t * iova)372 msm_allocate_userspace_iova_locked(struct tu_device *dev,
373                                    uint32_t gem_handle,
374                                    uint64_t size,
375                                    uint64_t client_iova,
376                                    enum tu_bo_alloc_flags flags,
377                                    uint64_t *iova)
378 {
379    VkResult result;
380 
381    *iova = 0;
382 
383    if ((flags & TU_BO_ALLOC_DMABUF) &&
384        tu_restore_from_zombie_vma_locked(dev, gem_handle, iova))
385       return VK_SUCCESS;
386 
387    tu_free_zombie_vma_locked(dev, false);
388 
389    result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
390    if (result == VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS) {
391       /* Address may be already freed by us, but not considered as
392        * freed by the kernel. We have to wait until all work that
393        * may hold the address is done. Since addresses are meant to
394        * be replayed only by debug tooling, it should be ok to wait.
395        */
396       tu_free_zombie_vma_locked(dev, true);
397       result = tu_allocate_userspace_iova(dev, size, client_iova, flags, iova);
398    }
399 
400    if (result != VK_SUCCESS)
401       return result;
402 
403    struct drm_msm_gem_info req = {
404       .handle = gem_handle,
405       .info = MSM_INFO_SET_IOVA,
406       .value = *iova,
407    };
408 
409    int ret =
410       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
411    if (ret < 0) {
412       util_vma_heap_free(&dev->vma, *iova, size);
413       mesa_loge("MSM_INFO_SET_IOVA failed! %d (%s)", ret, strerror(errno));
414       return VK_ERROR_OUT_OF_HOST_MEMORY;
415    }
416 
417    return VK_SUCCESS;
418 }
419 
420 static VkResult
tu_allocate_kernel_iova(struct tu_device * dev,uint32_t gem_handle,uint64_t * iova)421 tu_allocate_kernel_iova(struct tu_device *dev,
422                         uint32_t gem_handle,
423                         uint64_t *iova)
424 {
425    *iova = tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA);
426    if (!*iova)
427       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
428 
429    return VK_SUCCESS;
430 }
431 
432 static VkResult
tu_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo * bo,uint32_t gem_handle,uint64_t size,uint64_t client_iova,enum tu_bo_alloc_flags flags,const char * name)433 tu_bo_init(struct tu_device *dev,
434            struct vk_object_base *base,
435            struct tu_bo *bo,
436            uint32_t gem_handle,
437            uint64_t size,
438            uint64_t client_iova,
439            enum tu_bo_alloc_flags flags,
440            const char *name)
441 {
442    VkResult result = VK_SUCCESS;
443    uint64_t iova = 0;
444 
445    assert(!client_iova || dev->physical_device->has_set_iova);
446 
447    if (dev->physical_device->has_set_iova) {
448       result = msm_allocate_userspace_iova_locked(dev, gem_handle, size,
449                                                   client_iova, flags, &iova);
450    } else {
451       result = tu_allocate_kernel_iova(dev, gem_handle, &iova);
452    }
453 
454    if (result != VK_SUCCESS) {
455       tu_gem_close(dev, gem_handle);
456       return result;
457    }
458 
459    name = tu_debug_bos_add(dev, size, name);
460 
461    mtx_lock(&dev->bo_mutex);
462    uint32_t idx = dev->bo_count++;
463 
464    /* grow the bo list if needed */
465    if (idx >= dev->bo_list_size) {
466       uint32_t new_len = idx + 64;
467       struct drm_msm_gem_submit_bo *new_ptr = (struct drm_msm_gem_submit_bo *)
468          vk_realloc(&dev->vk.alloc, dev->bo_list, new_len * sizeof(*dev->bo_list),
469                     8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
470       if (!new_ptr) {
471          dev->bo_count--;
472          mtx_unlock(&dev->bo_mutex);
473          if (dev->physical_device->has_set_iova)
474             util_vma_heap_free(&dev->vma, iova, size);
475          tu_gem_close(dev, gem_handle);
476          return VK_ERROR_OUT_OF_HOST_MEMORY;
477       }
478 
479       dev->bo_list = new_ptr;
480       dev->bo_list_size = new_len;
481    }
482 
483    bool dump = flags & TU_BO_ALLOC_ALLOW_DUMP;
484    dev->bo_list[idx] = (struct drm_msm_gem_submit_bo) {
485       .flags = MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE |
486                COND(dump, MSM_SUBMIT_BO_DUMP),
487       .handle = gem_handle,
488       .presumed = iova,
489    };
490 
491    *bo = (struct tu_bo) {
492       .gem_handle = gem_handle,
493       .size = size,
494       .iova = iova,
495       .name = name,
496       .refcnt = 1,
497       .bo_list_idx = idx,
498       .base = base,
499    };
500 
501    mtx_unlock(&dev->bo_mutex);
502 
503    TU_RMV(bo_allocate, dev, bo);
504 
505    return VK_SUCCESS;
506 }
507 
508 /**
509  * Sets the name in the kernel so that the contents of /debug/dri/0/gem are more
510  * useful.
511  *
512  * We skip this on release builds (when we're also not doing BO debugging) to
513  * reduce overhead.
514  */
515 static void
tu_bo_set_kernel_name(struct tu_device * dev,struct tu_bo * bo,const char * name)516 tu_bo_set_kernel_name(struct tu_device *dev, struct tu_bo *bo, const char *name)
517 {
518    bool kernel_bo_names = dev->bo_sizes != NULL;
519 #if MESA_DEBUG
520    kernel_bo_names = true;
521 #endif
522    if (!kernel_bo_names)
523       return;
524 
525    struct drm_msm_gem_info req = {
526       .handle = bo->gem_handle,
527       .info = MSM_INFO_SET_NAME,
528       .value = (uintptr_t)(void *)name,
529       .len = strlen(name),
530    };
531 
532    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
533    if (ret) {
534       mesa_logw_once("Failed to set BO name with DRM_MSM_GEM_INFO: %d",
535                      ret);
536    }
537 }
538 
539 static inline void
msm_vma_lock(struct tu_device * dev)540 msm_vma_lock(struct tu_device *dev)
541 {
542    if (dev->physical_device->has_set_iova)
543       mtx_lock(&dev->vma_mutex);
544 }
545 
546 static inline void
msm_vma_unlock(struct tu_device * dev)547 msm_vma_unlock(struct tu_device *dev)
548 {
549    if (dev->physical_device->has_set_iova)
550       mtx_unlock(&dev->vma_mutex);
551 }
552 
553 static VkResult
msm_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)554 msm_bo_init(struct tu_device *dev,
555             struct vk_object_base *base,
556             struct tu_bo **out_bo,
557             uint64_t size,
558             uint64_t client_iova,
559             VkMemoryPropertyFlags mem_property,
560             enum tu_bo_alloc_flags flags,
561             const char *name)
562 {
563    struct drm_msm_gem_new req = {
564       .size = size,
565       .flags = 0
566    };
567 
568    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
569       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
570          req.flags |= MSM_BO_CACHED_COHERENT;
571       } else {
572          req.flags |= MSM_BO_CACHED;
573       }
574    } else {
575       req.flags |= MSM_BO_WC;
576    }
577 
578    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
579       req.flags |= MSM_BO_GPU_READONLY;
580 
581    int ret = drmCommandWriteRead(dev->fd,
582                                  DRM_MSM_GEM_NEW, &req, sizeof(req));
583    if (ret)
584       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
585 
586    struct tu_bo* bo = tu_device_lookup_bo(dev, req.handle);
587    assert(bo && bo->gem_handle == 0);
588 
589    assert(!(flags & TU_BO_ALLOC_DMABUF));
590 
591    msm_vma_lock(dev);
592 
593    VkResult result =
594       tu_bo_init(dev, base, bo, req.handle, size, client_iova, flags, name);
595 
596    msm_vma_unlock(dev);
597 
598    if (result == VK_SUCCESS) {
599       *out_bo = bo;
600       if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
601          TU_RMV(internal_resource_create, dev, bo);
602          TU_RMV(resource_name, dev, bo, name);
603       }
604    } else
605       memset(bo, 0, sizeof(*bo));
606 
607    /* We don't use bo->name here because for the !TU_DEBUG=bo case bo->name is NULL. */
608    tu_bo_set_kernel_name(dev, bo, name);
609 
610    if (result == VK_SUCCESS &&
611        (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) &&
612        !(mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
613       tu_bo_map(dev, bo, NULL);
614 
615       /* Cached non-coherent memory may already have dirty cache lines,
616        * we should clean the cache lines before GPU got the chance to
617        * write into this memory.
618        *
619        * MSM already does this automatically for uncached (MSM_BO_WC) memory.
620        */
621       tu_bo_sync_cache(dev, bo, 0, VK_WHOLE_SIZE, TU_MEM_SYNC_CACHE_TO_GPU);
622    }
623 
624    return result;
625 }
626 
627 static VkResult
msm_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int prime_fd)628 msm_bo_init_dmabuf(struct tu_device *dev,
629                    struct tu_bo **out_bo,
630                    uint64_t size,
631                    int prime_fd)
632 {
633    /* lseek() to get the real size */
634    off_t real_size = lseek(prime_fd, 0, SEEK_END);
635    lseek(prime_fd, 0, SEEK_SET);
636    if (real_size < 0 || (uint64_t) real_size < size)
637       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
638 
639    /* iova allocation needs to consider the object's *real* size: */
640    size = real_size;
641 
642    /* Importing the same dmabuf several times would yield the same
643     * gem_handle. Thus there could be a race when destroying
644     * BO and importing the same dmabuf from different threads.
645     * We must not permit the creation of dmabuf BO and its release
646     * to happen in parallel.
647     */
648    u_rwlock_wrlock(&dev->dma_bo_lock);
649    msm_vma_lock(dev);
650 
651    uint32_t gem_handle;
652    int ret = drmPrimeFDToHandle(dev->fd, prime_fd,
653                                 &gem_handle);
654    if (ret) {
655       msm_vma_unlock(dev);
656       u_rwlock_wrunlock(&dev->dma_bo_lock);
657       return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
658    }
659 
660    struct tu_bo* bo = tu_device_lookup_bo(dev, gem_handle);
661 
662    if (bo->refcnt != 0) {
663       p_atomic_inc(&bo->refcnt);
664       msm_vma_unlock(dev);
665       u_rwlock_wrunlock(&dev->dma_bo_lock);
666 
667       *out_bo = bo;
668       return VK_SUCCESS;
669    }
670 
671    VkResult result =
672       tu_bo_init(dev, NULL, bo, gem_handle, size, 0, TU_BO_ALLOC_DMABUF, "dmabuf");
673 
674    if (result != VK_SUCCESS)
675       memset(bo, 0, sizeof(*bo));
676    else
677       *out_bo = bo;
678 
679    msm_vma_unlock(dev);
680    u_rwlock_wrunlock(&dev->dma_bo_lock);
681 
682    return result;
683 }
684 
685 static VkResult
msm_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)686 msm_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
687 {
688    uint64_t offset = tu_gem_info(dev, bo->gem_handle, MSM_INFO_GET_OFFSET);
689    if (!offset)
690       return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
691 
692    /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */
693    void *map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
694                     MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
695                     dev->fd, offset);
696    if (map == MAP_FAILED)
697       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
698 
699    bo->map = map;
700    TU_RMV(bo_map, dev, bo);
701 
702    return VK_SUCCESS;
703 }
704 
705 static void
msm_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)706 msm_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
707 {
708    mtx_lock(&dev->bo_mutex);
709    dev->bo_list[bo->bo_list_idx].flags |= MSM_SUBMIT_BO_DUMP;
710    mtx_unlock(&dev->bo_mutex);
711 }
712 
713 
714 static void
msm_bo_set_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)715 msm_bo_set_metadata(struct tu_device *dev, struct tu_bo *bo,
716                     void *metadata, uint32_t metadata_size)
717 {
718    struct drm_msm_gem_info req = {
719       .handle = bo->gem_handle,
720       .info = MSM_INFO_SET_METADATA,
721       .value = (uintptr_t)(void *)metadata,
722       .len = metadata_size,
723    };
724 
725    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
726    if (ret) {
727       mesa_logw_once("Failed to set BO metadata with DRM_MSM_GEM_INFO: %d",
728                      ret);
729    }
730 }
731 
732 static int
msm_bo_get_metadata(struct tu_device * dev,struct tu_bo * bo,void * metadata,uint32_t metadata_size)733 msm_bo_get_metadata(struct tu_device *dev, struct tu_bo *bo,
734                     void *metadata, uint32_t metadata_size)
735 {
736    struct drm_msm_gem_info req = {
737       .handle = bo->gem_handle,
738       .info = MSM_INFO_GET_METADATA,
739       .value = (uintptr_t)(void *)metadata,
740       .len = metadata_size,
741    };
742 
743    int ret = drmCommandWrite(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
744    if (ret) {
745       mesa_logw_once("Failed to get BO metadata with DRM_MSM_GEM_INFO: %d",
746                      ret);
747    }
748 
749    return ret;
750 }
751 
752 static VkResult
tu_queue_submit_create_locked(struct tu_queue * queue,struct vk_queue_submit * vk_submit,const uint32_t nr_in_syncobjs,const uint32_t nr_out_syncobjs,uint32_t perf_pass_index,struct tu_msm_queue_submit * new_submit)753 tu_queue_submit_create_locked(struct tu_queue *queue,
754                               struct vk_queue_submit *vk_submit,
755                               const uint32_t nr_in_syncobjs,
756                               const uint32_t nr_out_syncobjs,
757                               uint32_t perf_pass_index,
758                               struct tu_msm_queue_submit *new_submit)
759 {
760    VkResult result;
761 
762    bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
763    bool has_trace_points = false;
764 
765    struct vk_command_buffer **vk_cmd_buffers = vk_submit->command_buffers;
766 
767    memset(new_submit, 0, sizeof(struct tu_msm_queue_submit));
768 
769    new_submit->cmd_buffers = (struct tu_cmd_buffer **) vk_cmd_buffers;
770    new_submit->nr_cmd_buffers = vk_submit->command_buffer_count;
771    tu_insert_dynamic_cmdbufs(queue->device, &new_submit->cmd_buffers,
772                              &new_submit->nr_cmd_buffers);
773 
774    uint32_t entry_count = 0;
775    for (uint32_t j = 0; j < new_submit->nr_cmd_buffers; ++j) {
776       struct tu_cmd_buffer *cmdbuf = new_submit->cmd_buffers[j];
777 
778       if (perf_pass_index != ~0)
779          entry_count++;
780 
781       entry_count += cmdbuf->cs.entry_count;
782 
783       if (u_trace_enabled && u_trace_has_points(&cmdbuf->trace)) {
784          if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
785             entry_count++;
786 
787          has_trace_points = true;
788       }
789    }
790 
791    new_submit->autotune_fence =
792       tu_autotune_submit_requires_fence(new_submit->cmd_buffers, new_submit->nr_cmd_buffers);
793    if (new_submit->autotune_fence)
794       entry_count++;
795 
796    new_submit->cmds = (struct drm_msm_gem_submit_cmd *) vk_zalloc(
797       &queue->device->vk.alloc, entry_count * sizeof(*new_submit->cmds), 8,
798       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
799 
800    if (new_submit->cmds == NULL) {
801       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
802       goto fail_cmds;
803    }
804 
805    if (has_trace_points) {
806       result =
807          tu_u_trace_submission_data_create(
808             queue->device, new_submit->cmd_buffers,
809             new_submit->nr_cmd_buffers,
810             &new_submit->u_trace_submission_data);
811 
812       if (result != VK_SUCCESS) {
813          goto fail_u_trace_submission_data;
814       }
815    }
816 
817    /* Allocate without wait timeline semaphores */
818    new_submit->in_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
819       &queue->device->vk.alloc,
820       nr_in_syncobjs * sizeof(*new_submit->in_syncobjs), 8,
821       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
822 
823    if (new_submit->in_syncobjs == NULL) {
824       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
825       goto fail_in_syncobjs;
826    }
827 
828    /* Allocate with signal timeline semaphores considered */
829    new_submit->out_syncobjs = (struct drm_msm_gem_submit_syncobj *) vk_zalloc(
830       &queue->device->vk.alloc,
831       nr_out_syncobjs * sizeof(*new_submit->out_syncobjs), 8,
832       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
833 
834    if (new_submit->out_syncobjs == NULL) {
835       result = vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
836       goto fail_out_syncobjs;
837    }
838 
839    new_submit->entry_count = entry_count;
840    new_submit->nr_in_syncobjs = nr_in_syncobjs;
841    new_submit->nr_out_syncobjs = nr_out_syncobjs;
842    new_submit->perf_pass_index = perf_pass_index;
843    new_submit->vk_submit = vk_submit;
844 
845    return VK_SUCCESS;
846 
847 fail_out_syncobjs:
848    vk_free(&queue->device->vk.alloc, new_submit->in_syncobjs);
849 fail_in_syncobjs:
850    if (new_submit->u_trace_submission_data)
851       tu_u_trace_submission_data_finish(queue->device,
852                                         new_submit->u_trace_submission_data);
853 fail_u_trace_submission_data:
854    vk_free(&queue->device->vk.alloc, new_submit->cmds);
855 fail_cmds:
856    return result;
857 }
858 
859 static void
tu_queue_submit_finish(struct tu_queue * queue,struct tu_msm_queue_submit * submit)860 tu_queue_submit_finish(struct tu_queue *queue, struct tu_msm_queue_submit *submit)
861 {
862    vk_free(&queue->device->vk.alloc, submit->cmds);
863    vk_free(&queue->device->vk.alloc, submit->in_syncobjs);
864    vk_free(&queue->device->vk.alloc, submit->out_syncobjs);
865    if (submit->cmd_buffers != (void *) submit->vk_submit->command_buffers)
866       vk_free(&queue->device->vk.alloc, submit->cmd_buffers);
867 }
868 
869 static void
tu_fill_msm_gem_submit(struct tu_device * dev,struct drm_msm_gem_submit_cmd * cmd,struct tu_cs_entry * cs_entry)870 tu_fill_msm_gem_submit(struct tu_device *dev,
871                        struct drm_msm_gem_submit_cmd *cmd,
872                        struct tu_cs_entry *cs_entry)
873 {
874    cmd->type = MSM_SUBMIT_CMD_BUF;
875    cmd->submit_idx = cs_entry->bo->bo_list_idx;
876    cmd->submit_offset = cs_entry->offset;
877    cmd->size = cs_entry->size;
878    cmd->pad = 0;
879    cmd->nr_relocs = 0;
880    cmd->relocs = 0;
881 }
882 
883 static void
tu_queue_build_msm_gem_submit_cmds(struct tu_queue * queue,struct tu_msm_queue_submit * submit,struct tu_cs * autotune_cs)884 tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue,
885                                    struct tu_msm_queue_submit *submit,
886                                    struct tu_cs *autotune_cs)
887 {
888    struct tu_device *dev = queue->device;
889    struct drm_msm_gem_submit_cmd *cmds = submit->cmds;
890 
891    uint32_t entry_idx = 0;
892    for (uint32_t j = 0; j < submit->nr_cmd_buffers; ++j) {
893       struct tu_device *dev = queue->device;
894       struct tu_cmd_buffer *cmdbuf = submit->cmd_buffers[j];
895       struct tu_cs *cs = &cmdbuf->cs;
896 
897       if (submit->perf_pass_index != ~0) {
898          struct tu_cs_entry *perf_cs_entry =
899             &dev->perfcntrs_pass_cs_entries[submit->perf_pass_index];
900 
901          tu_fill_msm_gem_submit(dev, &cmds[entry_idx], perf_cs_entry);
902          entry_idx++;
903       }
904 
905       for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) {
906          tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &cs->entries[i]);
907       }
908 
909       if (submit->u_trace_submission_data) {
910          struct tu_cs *ts_cs =
911             submit->u_trace_submission_data->cmd_trace_data[j].timestamp_copy_cs;
912          if (ts_cs) {
913             tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &ts_cs->entries[0]);
914             entry_idx++;
915          }
916       }
917    }
918 
919    if (autotune_cs) {
920       assert(autotune_cs->entry_count == 1);
921       tu_fill_msm_gem_submit(dev, &cmds[entry_idx], &autotune_cs->entries[0]);
922       entry_idx++;
923    }
924 }
925 
926 static VkResult
tu_queue_submit_locked(struct tu_queue * queue,struct tu_msm_queue_submit * submit)927 tu_queue_submit_locked(struct tu_queue *queue, struct tu_msm_queue_submit *submit)
928 {
929    uint32_t submit_idx = queue->device->submit_count++;
930 
931    struct tu_cs *autotune_cs = NULL;
932    if (submit->autotune_fence) {
933       autotune_cs = tu_autotune_on_submit(queue->device,
934                                           &queue->device->autotune,
935                                           submit->cmd_buffers,
936                                           submit->nr_cmd_buffers);
937    }
938 
939    uint32_t flags = MSM_PIPE_3D0;
940 
941    if (submit->vk_submit->wait_count)
942       flags |= MSM_SUBMIT_SYNCOBJ_IN;
943 
944    if (submit->vk_submit->signal_count)
945       flags |= MSM_SUBMIT_SYNCOBJ_OUT;
946 
947    mtx_lock(&queue->device->bo_mutex);
948 
949    if (queue->device->implicit_sync_bo_count == 0)
950       flags |= MSM_SUBMIT_NO_IMPLICIT;
951 
952    /* drm_msm_gem_submit_cmd requires index of bo which could change at any
953     * time when bo_mutex is not locked. So we build submit cmds here the real
954     * place to submit.
955     */
956    tu_queue_build_msm_gem_submit_cmds(queue, submit, autotune_cs);
957 
958    struct drm_msm_gem_submit req = {
959       .flags = flags,
960       .nr_bos = submit->entry_count ? queue->device->bo_count : 0,
961       .nr_cmds = submit->entry_count,
962       .bos = (uint64_t)(uintptr_t) queue->device->bo_list,
963       .cmds = (uint64_t)(uintptr_t)submit->cmds,
964       .queueid = queue->msm_queue_id,
965       .in_syncobjs = (uint64_t)(uintptr_t)submit->in_syncobjs,
966       .out_syncobjs = (uint64_t)(uintptr_t)submit->out_syncobjs,
967       .nr_in_syncobjs = submit->nr_in_syncobjs,
968       .nr_out_syncobjs = submit->nr_out_syncobjs,
969       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
970    };
971 
972    if (req.nr_cmds && FD_RD_DUMP(ENABLE) &&
973        fd_rd_output_begin(&queue->device->rd_output, submit_idx)) {
974       struct tu_device *device = queue->device;
975       struct fd_rd_output *rd_output = &device->rd_output;
976 
977       if (FD_RD_DUMP(FULL)) {
978          VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0);
979          if (result != VK_SUCCESS) {
980             mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u",
981                       device->device_idx, queue->msm_queue_id, 0);
982          }
983       }
984 
985       fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8);
986       fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8);
987 
988       for (unsigned i = 0; i < device->bo_count; i++) {
989          struct drm_msm_gem_submit_bo bo = device->bo_list[i];
990          struct tu_bo *tu_bo = tu_device_lookup_bo(device, bo.handle);
991          uint64_t iova = bo.presumed;
992 
993          uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 };
994          fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12);
995          if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) {
996             tu_bo_map(device, tu_bo, NULL); /* note: this would need locking to be safe */
997             fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size);
998          }
999       }
1000 
1001       for (unsigned i = 0; i < req.nr_cmds; i++) {
1002          struct drm_msm_gem_submit_cmd *cmd = &submit->cmds[i];
1003          uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset;
1004          uint32_t size = cmd->size >> 2;
1005          uint32_t buf[3] = { iova, size, iova >> 32 };
1006          fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12);
1007       }
1008 
1009       fd_rd_output_end(rd_output);
1010    }
1011 
1012    int ret = drmCommandWriteRead(queue->device->fd,
1013                                  DRM_MSM_GEM_SUBMIT,
1014                                  &req, sizeof(req));
1015 
1016    mtx_unlock(&queue->device->bo_mutex);
1017 
1018    tu_debug_bos_print_stats(queue->device);
1019 
1020    if (ret)
1021       return vk_device_set_lost(&queue->device->vk, "submit failed: %m");
1022 
1023    p_atomic_set(&queue->fence, req.fence);
1024 
1025    uint64_t gpu_offset = 0;
1026 #if HAVE_PERFETTO
1027    struct tu_perfetto_clocks clocks =
1028       tu_perfetto_submit(queue->device, queue->device->submit_count, NULL);
1029    gpu_offset = clocks.gpu_ts_offset;
1030 #endif
1031 
1032    if (submit->u_trace_submission_data) {
1033       struct tu_u_trace_submission_data *submission_data =
1034          submit->u_trace_submission_data;
1035       submission_data->submission_id = queue->device->submit_count;
1036       submission_data->gpu_ts_offset = gpu_offset;
1037       /* We have to allocate it here since it is different between drm/kgsl */
1038       submission_data->syncobj = (struct tu_u_trace_syncobj *)
1039          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1040                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1041       submission_data->syncobj->fence = req.fence;
1042       submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
1043 
1044       submit->u_trace_submission_data = NULL;
1045 
1046       for (uint32_t i = 0; i < submission_data->cmd_buffer_count; i++) {
1047          bool free_data = i == submission_data->last_buffer_with_tracepoints;
1048          if (submission_data->cmd_trace_data[i].trace)
1049             u_trace_flush(submission_data->cmd_trace_data[i].trace,
1050                           submission_data, queue->device->vk.current_frame,
1051                           free_data);
1052 
1053          if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1054             /* u_trace is owned by cmd_buffer */
1055             submission_data->cmd_trace_data[i].trace = NULL;
1056          }
1057       }
1058    }
1059 
1060    for (uint32_t i = 0; i < submit->vk_submit->wait_count; i++) {
1061       if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->waits[i].sync))
1062          continue;
1063 
1064       struct tu_timeline_sync *sync =
1065          container_of(submit->vk_submit->waits[i].sync, struct tu_timeline_sync, base);
1066 
1067       assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
1068 
1069       /* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
1070        * is done and ready again so this can be garbage-collectioned later.
1071        */
1072       sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
1073    }
1074 
1075    for (uint32_t i = 0; i < submit->vk_submit->signal_count; i++) {
1076       if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->signals[i].sync))
1077          continue;
1078 
1079       struct tu_timeline_sync *sync =
1080          container_of(submit->vk_submit->signals[i].sync, struct tu_timeline_sync, base);
1081 
1082       assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
1083       /* Set SUBMITTED to the state of the signal timeline sync so we could wait for
1084        * this timeline sync until completed if necessary.
1085        */
1086       sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
1087    }
1088 
1089    pthread_cond_broadcast(&queue->device->timeline_cond);
1090 
1091    return VK_SUCCESS;
1092 }
1093 
1094 static VkResult
msm_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1095 msm_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1096 {
1097    return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000);
1098 }
1099 
1100 static VkResult
msm_queue_submit(struct tu_queue * queue,struct vk_queue_submit * submit)1101 msm_queue_submit(struct tu_queue *queue, struct vk_queue_submit *submit)
1102 {
1103    MESA_TRACE_FUNC();
1104    uint32_t perf_pass_index = queue->device->perfcntrs_pass_cs ?
1105                               submit->perf_pass_index : ~0;
1106    struct tu_msm_queue_submit submit_req;
1107 
1108    if (TU_DEBUG(LOG_SKIP_GMEM_OPS)) {
1109       tu_dbg_log_gmem_load_store_skips(queue->device);
1110    }
1111 
1112    pthread_mutex_lock(&queue->device->submit_mutex);
1113 
1114    VkResult ret = tu_queue_submit_create_locked(queue, submit,
1115          submit->wait_count, submit->signal_count,
1116          perf_pass_index, &submit_req);
1117 
1118    if (ret != VK_SUCCESS) {
1119       pthread_mutex_unlock(&queue->device->submit_mutex);
1120       return ret;
1121    }
1122 
1123    /* note: assuming there won't be any very large semaphore counts */
1124    struct drm_msm_gem_submit_syncobj *in_syncobjs = submit_req.in_syncobjs;
1125    struct drm_msm_gem_submit_syncobj *out_syncobjs = submit_req.out_syncobjs;
1126 
1127    uint32_t nr_in_syncobjs = 0, nr_out_syncobjs = 0;
1128 
1129    for (uint32_t i = 0; i < submit->wait_count; i++) {
1130       struct vk_sync *sync = submit->waits[i].sync;
1131 
1132       in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1133          .handle = tu_syncobj_from_vk_sync(sync),
1134          .flags = 0,
1135          .point = submit->waits[i].wait_value,
1136       };
1137    }
1138 
1139    for (uint32_t i = 0; i < submit->signal_count; i++) {
1140       struct vk_sync *sync = submit->signals[i].sync;
1141 
1142       out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
1143          .handle = tu_syncobj_from_vk_sync(sync),
1144          .flags = 0,
1145          .point = submit->signals[i].signal_value,
1146       };
1147    }
1148 
1149    ret = tu_queue_submit_locked(queue, &submit_req);
1150 
1151    pthread_mutex_unlock(&queue->device->submit_mutex);
1152    tu_queue_submit_finish(queue, &submit_req);
1153 
1154    if (ret != VK_SUCCESS)
1155        return ret;
1156 
1157    u_trace_context_process(&queue->device->trace_context, false);
1158 
1159    return VK_SUCCESS;
1160 }
1161 
1162 static const struct tu_knl msm_knl_funcs = {
1163       .name = "msm",
1164 
1165       .device_init = msm_device_init,
1166       .device_finish = msm_device_finish,
1167       .device_get_gpu_timestamp = msm_device_get_gpu_timestamp,
1168       .device_get_suspend_count = msm_device_get_suspend_count,
1169       .device_check_status = msm_device_check_status,
1170       .submitqueue_new = msm_submitqueue_new,
1171       .submitqueue_close = msm_submitqueue_close,
1172       .bo_init = msm_bo_init,
1173       .bo_init_dmabuf = msm_bo_init_dmabuf,
1174       .bo_export_dmabuf = tu_drm_export_dmabuf,
1175       .bo_map = msm_bo_map,
1176       .bo_allow_dump = msm_bo_allow_dump,
1177       .bo_finish = tu_drm_bo_finish,
1178       .bo_set_metadata = msm_bo_set_metadata,
1179       .bo_get_metadata = msm_bo_get_metadata,
1180       .device_wait_u_trace = msm_device_wait_u_trace,
1181       .queue_submit = msm_queue_submit,
1182 };
1183 
1184 VkResult
tu_knl_drm_msm_load(struct tu_instance * instance,int fd,struct _drmVersion * version,struct tu_physical_device ** out)1185 tu_knl_drm_msm_load(struct tu_instance *instance,
1186                     int fd, struct _drmVersion *version,
1187                     struct tu_physical_device **out)
1188 {
1189    VkResult result = VK_SUCCESS;
1190 
1191    /* Version 1.6 added SYNCOBJ support. */
1192    const int min_version_major = 1;
1193    const int min_version_minor = 6;
1194 
1195    if (version->version_major != min_version_major ||
1196        version->version_minor < min_version_minor) {
1197       result = vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1198                                  "kernel driver for device %s has version %d.%d, "
1199                                  "but Vulkan requires version >= %d.%d",
1200                                  version->name,
1201                                  version->version_major, version->version_minor,
1202                                  min_version_major, min_version_minor);
1203       return result;
1204    }
1205 
1206    struct tu_physical_device *device = (struct tu_physical_device *)
1207       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1208                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1209    if (!device) {
1210       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1211       goto fail;
1212    }
1213 
1214    device->msm_major_version = version->version_major;
1215    device->msm_minor_version = version->version_minor;
1216 
1217    device->instance = instance;
1218    device->local_fd = fd;
1219 
1220    if (tu_drm_get_gpu_id(device, &device->dev_id.gpu_id)) {
1221       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1222                                  "could not get GPU ID");
1223       goto fail;
1224    }
1225 
1226    if (tu_drm_get_param(fd, MSM_PARAM_CHIP_ID, &device->dev_id.chip_id)) {
1227       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1228                                  "could not get CHIP ID");
1229       goto fail;
1230    }
1231 
1232    if (tu_drm_get_gmem_size(device, &device->gmem_size)) {
1233       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1234                                 "could not get GMEM size");
1235       goto fail;
1236    }
1237    device->gmem_size = debug_get_num_option("TU_GMEM", device->gmem_size);
1238 
1239    if (tu_drm_get_gmem_base(device, &device->gmem_base)) {
1240       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1241                                  "could not get GMEM size");
1242       goto fail;
1243    }
1244 
1245    device->has_set_iova = !tu_drm_get_va_prop(device, &device->va_start,
1246                                               &device->va_size);
1247 
1248    /* Even if kernel is new enough, the GPU itself may not support it. */
1249    device->has_cached_coherent_memory =
1250       (device->msm_minor_version >= 8) &&
1251       tu_drm_is_memory_type_supported(fd, MSM_BO_CACHED_COHERENT);
1252 
1253    device->submitqueue_priority_count = tu_drm_get_priorities(device);
1254 
1255    device->syncobj_type = vk_drm_syncobj_get_type(fd);
1256    /* we don't support DRM_CAP_SYNCOBJ_TIMELINE, but drm-shim does */
1257    if (!(device->syncobj_type.features & VK_SYNC_FEATURE_TIMELINE))
1258       device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
1259 
1260    device->sync_types[0] = &device->syncobj_type;
1261    device->sync_types[1] = &device->timeline_type.sync;
1262    device->sync_types[2] = NULL;
1263 
1264    device->heap.size = tu_get_system_heap_size(device);
1265    device->heap.used = 0u;
1266    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1267 
1268    instance->knl = &msm_knl_funcs;
1269 
1270    *out = device;
1271 
1272    return VK_SUCCESS;
1273 
1274 fail:
1275    vk_free(&instance->vk.alloc, device);
1276    return result;
1277 }
1278