xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/xe/anv_kmd_backend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <sys/mman.h>
25 
26 #include "common/xe/intel_engine.h"
27 
28 #include "anv_private.h"
29 
30 #include "xe/anv_batch_chain.h"
31 
32 #include "drm-uapi/gpu_scheduler.h"
33 #include "drm-uapi/xe_drm.h"
34 
35 static uint32_t
xe_gem_create(struct anv_device * device,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum anv_bo_alloc_flags alloc_flags,uint64_t * actual_size)36 xe_gem_create(struct anv_device *device,
37               const struct intel_memory_class_instance **regions,
38               uint16_t regions_count, uint64_t size,
39               enum anv_bo_alloc_flags alloc_flags,
40               uint64_t *actual_size)
41 {
42    /* TODO: protected content */
43    assert((alloc_flags & ANV_BO_ALLOC_PROTECTED) == 0);
44    /* WB+0 way coherent not supported by Xe KMD */
45    assert((alloc_flags & ANV_BO_ALLOC_HOST_CACHED) == 0 ||
46           (alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT);
47 
48    uint32_t flags = 0;
49    if (alloc_flags & ANV_BO_ALLOC_SCANOUT)
50       flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
51    if ((alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE)) &&
52        !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) &&
53        device->physical->vram_non_mappable.size > 0)
54       flags |= DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
55 
56    struct drm_xe_gem_create gem_create = {
57      /* From xe_drm.h: If a VM is specified, this BO must:
58       * 1. Only ever be bound to that VM.
59       * 2. Cannot be exported as a PRIME fd.
60       */
61      .vm_id = alloc_flags & ANV_BO_ALLOC_EXTERNAL ? 0 : device->vm_id,
62      .size = align64(size, device->info->mem_alignment),
63      .flags = flags,
64    };
65    for (uint16_t i = 0; i < regions_count; i++)
66       gem_create.placement |= BITFIELD_BIT(regions[i]->instance);
67 
68    const struct intel_device_info_pat_entry *pat_entry =
69          anv_device_get_pat_entry(device, alloc_flags);
70    switch (pat_entry->mmap) {
71    case INTEL_DEVICE_INFO_MMAP_MODE_WC:
72       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
73       break;
74    case INTEL_DEVICE_INFO_MMAP_MODE_WB:
75       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB;
76       break;
77    default:
78       unreachable("missing");
79       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
80    }
81 
82    if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create))
83       return 0;
84 
85    *actual_size = gem_create.size;
86    return gem_create.handle;
87 }
88 
89 static void
xe_gem_close(struct anv_device * device,struct anv_bo * bo)90 xe_gem_close(struct anv_device *device, struct anv_bo *bo)
91 {
92    if (bo->from_host_ptr)
93       return;
94 
95    struct drm_gem_close close = {
96       .handle = bo->gem_handle,
97    };
98    intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
99 }
100 
101 static void *
xe_gem_mmap(struct anv_device * device,struct anv_bo * bo,uint64_t offset,uint64_t size,void * placed_addr)102 xe_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
103             uint64_t size, void *placed_addr)
104 {
105    struct drm_xe_gem_mmap_offset args = {
106       .handle = bo->gem_handle,
107    };
108    if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &args))
109       return MAP_FAILED;
110 
111    return mmap(placed_addr, size, PROT_READ | PROT_WRITE,
112                (placed_addr != NULL ? MAP_FIXED : 0) | MAP_SHARED,
113                device->fd, args.offset);
114 }
115 
116 static inline uint32_t
capture_vm_in_error_dump(struct anv_device * device,struct anv_bo * bo)117 capture_vm_in_error_dump(struct anv_device *device, struct anv_bo *bo)
118 {
119    enum anv_bo_alloc_flags alloc_flags = bo ? bo->alloc_flags : 0;
120    bool capture = INTEL_DEBUG(DEBUG_CAPTURE_ALL) ||
121                   (alloc_flags & ANV_BO_ALLOC_CAPTURE);
122 
123    return capture ? DRM_XE_VM_BIND_FLAG_DUMPABLE : 0;
124 }
125 
126 static struct drm_xe_vm_bind_op
anv_vm_bind_to_drm_xe_vm_bind(struct anv_device * device,struct anv_vm_bind * anv_bind)127 anv_vm_bind_to_drm_xe_vm_bind(struct anv_device *device,
128                               struct anv_vm_bind *anv_bind)
129 {
130    struct anv_bo *bo = anv_bind->bo;
131    uint16_t pat_index = bo ?
132       anv_device_get_pat_entry(device, bo->alloc_flags)->index : 0;
133 
134    struct drm_xe_vm_bind_op xe_bind = {
135          .obj = 0,
136          .obj_offset = anv_bind->bo_offset,
137          .range = anv_bind->size,
138          .addr = intel_48b_address(anv_bind->address),
139          .op = DRM_XE_VM_BIND_OP_UNMAP,
140          .flags = capture_vm_in_error_dump(device, bo),
141          .prefetch_mem_region_instance = 0,
142          .pat_index = pat_index,
143    };
144 
145    if (anv_bind->op == ANV_VM_BIND) {
146       if (!bo) {
147          xe_bind.op = DRM_XE_VM_BIND_OP_MAP;
148          xe_bind.flags |= DRM_XE_VM_BIND_FLAG_NULL;
149          assert(xe_bind.obj_offset == 0);
150       } else if (bo->from_host_ptr) {
151          xe_bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
152       } else {
153          xe_bind.op = DRM_XE_VM_BIND_OP_MAP;
154          xe_bind.obj = bo->gem_handle;
155       }
156    } else if (anv_bind->op == ANV_VM_UNBIND_ALL) {
157       xe_bind.op = DRM_XE_VM_BIND_OP_UNMAP_ALL;
158       xe_bind.obj = bo->gem_handle;
159       assert(anv_bind->address == 0);
160       assert(anv_bind->size == 0);
161    } else {
162       assert(anv_bind->op == ANV_VM_UNBIND);
163    }
164 
165    /* userptr and bo_offset are an union! */
166    if (bo && bo->from_host_ptr)
167       xe_bind.userptr = (uintptr_t)bo->map;
168 
169    return xe_bind;
170 }
171 
172 static inline VkResult
xe_vm_bind_op(struct anv_device * device,struct anv_sparse_submission * submit,enum anv_vm_bind_flags flags)173 xe_vm_bind_op(struct anv_device *device,
174               struct anv_sparse_submission *submit,
175               enum anv_vm_bind_flags flags)
176 {
177    VkResult result = VK_SUCCESS;
178    const bool signal_bind_timeline =
179       flags & ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE;
180 
181    int num_syncs = submit->wait_count + submit->signal_count +
182                    signal_bind_timeline;
183    STACK_ARRAY(struct drm_xe_sync, xe_syncs, num_syncs);
184    if (!xe_syncs)
185       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
186 
187    int sync_idx = 0;
188    for (int s = 0; s < submit->wait_count; s++) {
189       xe_syncs[sync_idx++] =
190          vk_sync_to_drm_xe_sync(submit->waits[s].sync,
191                                 submit->waits[s].wait_value,
192                                 false);
193    }
194    for (int s = 0; s < submit->signal_count; s++) {
195       xe_syncs[sync_idx++] =
196          vk_sync_to_drm_xe_sync(submit->signals[s].sync,
197                                 submit->signals[s].signal_value,
198                                 true);
199    }
200    if (signal_bind_timeline) {
201       xe_syncs[sync_idx++] = (struct drm_xe_sync) {
202          .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
203          .flags = DRM_XE_SYNC_FLAG_SIGNAL,
204          .handle = intel_bind_timeline_get_syncobj(&device->bind_timeline),
205          /* .timeline_value will be set later. */
206       };
207    }
208    assert(sync_idx == num_syncs);
209 
210    struct drm_xe_vm_bind args = {
211       .vm_id = device->vm_id,
212       .num_binds = submit->binds_len,
213       .bind = {},
214       .num_syncs = num_syncs,
215       .syncs = (uintptr_t)xe_syncs,
216    };
217 
218    STACK_ARRAY(struct drm_xe_vm_bind_op, xe_binds_stackarray,
219                submit->binds_len);
220    struct drm_xe_vm_bind_op *xe_binds;
221    if (submit->binds_len > 1) {
222       if (!xe_binds_stackarray) {
223          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
224          goto out_syncs;
225       }
226 
227       xe_binds = xe_binds_stackarray;
228       args.vector_of_binds = (uintptr_t)xe_binds;
229    } else {
230       xe_binds = &args.bind;
231    }
232 
233    for (int i = 0; i < submit->binds_len; i++)
234       xe_binds[i] = anv_vm_bind_to_drm_xe_vm_bind(device, &submit->binds[i]);
235 
236    if (signal_bind_timeline) {
237       xe_syncs[num_syncs - 1].timeline_value =
238          intel_bind_timeline_bind_begin(&device->bind_timeline);
239    }
240    int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args);
241    int errno_ = errno;
242    if (signal_bind_timeline)
243       intel_bind_timeline_bind_end(&device->bind_timeline);
244 
245    /* The vm_bind ioctl can return a wide variety of error codes, but most of
246     * them shouldn't happen in the real world. Here we list the interesting
247     * error case:
248     *
249     * - EINVAL: shouldn't happen. This is most likely a bug in our driver.
250     * - ENOMEM: generic out-of-memory error.
251     * - ENOBUFS: an out-of-memory error that is related to having too many
252     *   bind operations in the same ioctl, so the recommendation here is to
253     *   try to issue fewer binds per ioctl (ideally 1).
254     *
255     * The xe.ko team has plans to differentiate between lack of device memory
256     * vs lack of host memory in the future.
257     */
258    if (ret) {
259       assert(errno_ != EINVAL);
260       if (errno_ == ENOMEM || errno_ == ENOBUFS)
261          result = VK_ERROR_OUT_OF_HOST_MEMORY;
262       else
263          result = vk_device_set_lost(&device->vk,
264                                      "vm_bind failed with errno %d", errno_);
265       goto out_stackarray;
266    }
267 
268    ANV_RMV(vm_binds, device, submit->binds, submit->binds_len);
269 
270 out_stackarray:
271    STACK_ARRAY_FINISH(xe_binds_stackarray);
272 out_syncs:
273    STACK_ARRAY_FINISH(xe_syncs);
274 
275    return result;
276 }
277 
278 static VkResult
xe_vm_bind(struct anv_device * device,struct anv_sparse_submission * submit,enum anv_vm_bind_flags flags)279 xe_vm_bind(struct anv_device *device, struct anv_sparse_submission *submit,
280            enum anv_vm_bind_flags flags)
281 {
282    return xe_vm_bind_op(device, submit, flags);
283 }
284 
285 static VkResult
xe_vm_bind_bo(struct anv_device * device,struct anv_bo * bo)286 xe_vm_bind_bo(struct anv_device *device, struct anv_bo *bo)
287 {
288    struct anv_vm_bind bind = {
289       .bo = bo,
290       .address = bo->offset,
291       .bo_offset = 0,
292       .size = bo->actual_size,
293       .op = ANV_VM_BIND,
294    };
295    struct anv_sparse_submission submit = {
296       .queue = NULL,
297       .binds = &bind,
298       .binds_len = 1,
299       .binds_capacity = 1,
300       .wait_count = 0,
301       .signal_count = 0,
302    };
303    return xe_vm_bind_op(device, &submit,
304                         ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE);
305 }
306 
307 static VkResult
xe_vm_unbind_bo(struct anv_device * device,struct anv_bo * bo)308 xe_vm_unbind_bo(struct anv_device *device, struct anv_bo *bo)
309 {
310    struct anv_vm_bind bind = {
311       .bo = bo,
312       .address = 0,
313       .bo_offset = 0,
314       .size = 0,
315       .op = ANV_VM_UNBIND_ALL,
316    };
317    struct anv_sparse_submission submit = {
318       .queue = NULL,
319       .binds = &bind,
320       .binds_len = 1,
321       .binds_capacity = 1,
322       .wait_count = 0,
323       .signal_count = 0,
324    };
325    if (bo->from_host_ptr) {
326       bind.address = bo->offset;
327       bind.size = bo->actual_size;
328       bind.op = ANV_VM_UNBIND;
329    }
330    return xe_vm_bind_op(device, &submit,
331                         ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE);
332 }
333 
334 static uint32_t
xe_gem_create_userptr(struct anv_device * device,void * mem,uint64_t size)335 xe_gem_create_userptr(struct anv_device *device, void *mem, uint64_t size)
336 {
337    /* We return the workaround BO gem_handle here, because Xe doesn't
338     * create handles for userptrs. But we still need to make it look
339     * to the rest of Anv that the operation succeeded.
340     */
341    return device->workaround_bo->gem_handle;
342 }
343 
344 static uint32_t
xe_bo_alloc_flags_to_bo_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)345 xe_bo_alloc_flags_to_bo_flags(struct anv_device *device,
346                               enum anv_bo_alloc_flags alloc_flags)
347 {
348    return 0;
349 }
350 
351 const struct anv_kmd_backend *
anv_xe_kmd_backend_get(void)352 anv_xe_kmd_backend_get(void)
353 {
354    static const struct anv_kmd_backend xe_backend = {
355       .gem_create = xe_gem_create,
356       .gem_create_userptr = xe_gem_create_userptr,
357       .gem_close = xe_gem_close,
358       .gem_mmap = xe_gem_mmap,
359       .vm_bind = xe_vm_bind,
360       .vm_bind_bo = xe_vm_bind_bo,
361       .vm_unbind_bo = xe_vm_unbind_bo,
362       .queue_exec_locked = xe_queue_exec_locked,
363       .queue_exec_async = xe_queue_exec_async,
364       .bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
365    };
366    return &xe_backend;
367 }
368