1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <sys/mman.h>
25
26 #include "common/xe/intel_engine.h"
27
28 #include "anv_private.h"
29
30 #include "xe/anv_batch_chain.h"
31
32 #include "drm-uapi/gpu_scheduler.h"
33 #include "drm-uapi/xe_drm.h"
34
35 static uint32_t
xe_gem_create(struct anv_device * device,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum anv_bo_alloc_flags alloc_flags,uint64_t * actual_size)36 xe_gem_create(struct anv_device *device,
37 const struct intel_memory_class_instance **regions,
38 uint16_t regions_count, uint64_t size,
39 enum anv_bo_alloc_flags alloc_flags,
40 uint64_t *actual_size)
41 {
42 /* TODO: protected content */
43 assert((alloc_flags & ANV_BO_ALLOC_PROTECTED) == 0);
44 /* WB+0 way coherent not supported by Xe KMD */
45 assert((alloc_flags & ANV_BO_ALLOC_HOST_CACHED) == 0 ||
46 (alloc_flags & ANV_BO_ALLOC_HOST_CACHED_COHERENT) == ANV_BO_ALLOC_HOST_CACHED_COHERENT);
47
48 uint32_t flags = 0;
49 if (alloc_flags & ANV_BO_ALLOC_SCANOUT)
50 flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
51 if ((alloc_flags & (ANV_BO_ALLOC_MAPPED | ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE)) &&
52 !(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) &&
53 device->physical->vram_non_mappable.size > 0)
54 flags |= DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
55
56 struct drm_xe_gem_create gem_create = {
57 /* From xe_drm.h: If a VM is specified, this BO must:
58 * 1. Only ever be bound to that VM.
59 * 2. Cannot be exported as a PRIME fd.
60 */
61 .vm_id = alloc_flags & ANV_BO_ALLOC_EXTERNAL ? 0 : device->vm_id,
62 .size = align64(size, device->info->mem_alignment),
63 .flags = flags,
64 };
65 for (uint16_t i = 0; i < regions_count; i++)
66 gem_create.placement |= BITFIELD_BIT(regions[i]->instance);
67
68 const struct intel_device_info_pat_entry *pat_entry =
69 anv_device_get_pat_entry(device, alloc_flags);
70 switch (pat_entry->mmap) {
71 case INTEL_DEVICE_INFO_MMAP_MODE_WC:
72 gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
73 break;
74 case INTEL_DEVICE_INFO_MMAP_MODE_WB:
75 gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB;
76 break;
77 default:
78 unreachable("missing");
79 gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
80 }
81
82 if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create))
83 return 0;
84
85 *actual_size = gem_create.size;
86 return gem_create.handle;
87 }
88
89 static void
xe_gem_close(struct anv_device * device,struct anv_bo * bo)90 xe_gem_close(struct anv_device *device, struct anv_bo *bo)
91 {
92 if (bo->from_host_ptr)
93 return;
94
95 struct drm_gem_close close = {
96 .handle = bo->gem_handle,
97 };
98 intel_ioctl(device->fd, DRM_IOCTL_GEM_CLOSE, &close);
99 }
100
101 static void *
xe_gem_mmap(struct anv_device * device,struct anv_bo * bo,uint64_t offset,uint64_t size,void * placed_addr)102 xe_gem_mmap(struct anv_device *device, struct anv_bo *bo, uint64_t offset,
103 uint64_t size, void *placed_addr)
104 {
105 struct drm_xe_gem_mmap_offset args = {
106 .handle = bo->gem_handle,
107 };
108 if (intel_ioctl(device->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &args))
109 return MAP_FAILED;
110
111 return mmap(placed_addr, size, PROT_READ | PROT_WRITE,
112 (placed_addr != NULL ? MAP_FIXED : 0) | MAP_SHARED,
113 device->fd, args.offset);
114 }
115
116 static inline uint32_t
capture_vm_in_error_dump(struct anv_device * device,struct anv_bo * bo)117 capture_vm_in_error_dump(struct anv_device *device, struct anv_bo *bo)
118 {
119 enum anv_bo_alloc_flags alloc_flags = bo ? bo->alloc_flags : 0;
120 bool capture = INTEL_DEBUG(DEBUG_CAPTURE_ALL) ||
121 (alloc_flags & ANV_BO_ALLOC_CAPTURE);
122
123 return capture ? DRM_XE_VM_BIND_FLAG_DUMPABLE : 0;
124 }
125
126 static struct drm_xe_vm_bind_op
anv_vm_bind_to_drm_xe_vm_bind(struct anv_device * device,struct anv_vm_bind * anv_bind)127 anv_vm_bind_to_drm_xe_vm_bind(struct anv_device *device,
128 struct anv_vm_bind *anv_bind)
129 {
130 struct anv_bo *bo = anv_bind->bo;
131 uint16_t pat_index = bo ?
132 anv_device_get_pat_entry(device, bo->alloc_flags)->index : 0;
133
134 struct drm_xe_vm_bind_op xe_bind = {
135 .obj = 0,
136 .obj_offset = anv_bind->bo_offset,
137 .range = anv_bind->size,
138 .addr = intel_48b_address(anv_bind->address),
139 .op = DRM_XE_VM_BIND_OP_UNMAP,
140 .flags = capture_vm_in_error_dump(device, bo),
141 .prefetch_mem_region_instance = 0,
142 .pat_index = pat_index,
143 };
144
145 if (anv_bind->op == ANV_VM_BIND) {
146 if (!bo) {
147 xe_bind.op = DRM_XE_VM_BIND_OP_MAP;
148 xe_bind.flags |= DRM_XE_VM_BIND_FLAG_NULL;
149 assert(xe_bind.obj_offset == 0);
150 } else if (bo->from_host_ptr) {
151 xe_bind.op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
152 } else {
153 xe_bind.op = DRM_XE_VM_BIND_OP_MAP;
154 xe_bind.obj = bo->gem_handle;
155 }
156 } else if (anv_bind->op == ANV_VM_UNBIND_ALL) {
157 xe_bind.op = DRM_XE_VM_BIND_OP_UNMAP_ALL;
158 xe_bind.obj = bo->gem_handle;
159 assert(anv_bind->address == 0);
160 assert(anv_bind->size == 0);
161 } else {
162 assert(anv_bind->op == ANV_VM_UNBIND);
163 }
164
165 /* userptr and bo_offset are an union! */
166 if (bo && bo->from_host_ptr)
167 xe_bind.userptr = (uintptr_t)bo->map;
168
169 return xe_bind;
170 }
171
172 static inline VkResult
xe_vm_bind_op(struct anv_device * device,struct anv_sparse_submission * submit,enum anv_vm_bind_flags flags)173 xe_vm_bind_op(struct anv_device *device,
174 struct anv_sparse_submission *submit,
175 enum anv_vm_bind_flags flags)
176 {
177 VkResult result = VK_SUCCESS;
178 const bool signal_bind_timeline =
179 flags & ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE;
180
181 int num_syncs = submit->wait_count + submit->signal_count +
182 signal_bind_timeline;
183 STACK_ARRAY(struct drm_xe_sync, xe_syncs, num_syncs);
184 if (!xe_syncs)
185 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
186
187 int sync_idx = 0;
188 for (int s = 0; s < submit->wait_count; s++) {
189 xe_syncs[sync_idx++] =
190 vk_sync_to_drm_xe_sync(submit->waits[s].sync,
191 submit->waits[s].wait_value,
192 false);
193 }
194 for (int s = 0; s < submit->signal_count; s++) {
195 xe_syncs[sync_idx++] =
196 vk_sync_to_drm_xe_sync(submit->signals[s].sync,
197 submit->signals[s].signal_value,
198 true);
199 }
200 if (signal_bind_timeline) {
201 xe_syncs[sync_idx++] = (struct drm_xe_sync) {
202 .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
203 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
204 .handle = intel_bind_timeline_get_syncobj(&device->bind_timeline),
205 /* .timeline_value will be set later. */
206 };
207 }
208 assert(sync_idx == num_syncs);
209
210 struct drm_xe_vm_bind args = {
211 .vm_id = device->vm_id,
212 .num_binds = submit->binds_len,
213 .bind = {},
214 .num_syncs = num_syncs,
215 .syncs = (uintptr_t)xe_syncs,
216 };
217
218 STACK_ARRAY(struct drm_xe_vm_bind_op, xe_binds_stackarray,
219 submit->binds_len);
220 struct drm_xe_vm_bind_op *xe_binds;
221 if (submit->binds_len > 1) {
222 if (!xe_binds_stackarray) {
223 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
224 goto out_syncs;
225 }
226
227 xe_binds = xe_binds_stackarray;
228 args.vector_of_binds = (uintptr_t)xe_binds;
229 } else {
230 xe_binds = &args.bind;
231 }
232
233 for (int i = 0; i < submit->binds_len; i++)
234 xe_binds[i] = anv_vm_bind_to_drm_xe_vm_bind(device, &submit->binds[i]);
235
236 if (signal_bind_timeline) {
237 xe_syncs[num_syncs - 1].timeline_value =
238 intel_bind_timeline_bind_begin(&device->bind_timeline);
239 }
240 int ret = intel_ioctl(device->fd, DRM_IOCTL_XE_VM_BIND, &args);
241 int errno_ = errno;
242 if (signal_bind_timeline)
243 intel_bind_timeline_bind_end(&device->bind_timeline);
244
245 /* The vm_bind ioctl can return a wide variety of error codes, but most of
246 * them shouldn't happen in the real world. Here we list the interesting
247 * error case:
248 *
249 * - EINVAL: shouldn't happen. This is most likely a bug in our driver.
250 * - ENOMEM: generic out-of-memory error.
251 * - ENOBUFS: an out-of-memory error that is related to having too many
252 * bind operations in the same ioctl, so the recommendation here is to
253 * try to issue fewer binds per ioctl (ideally 1).
254 *
255 * The xe.ko team has plans to differentiate between lack of device memory
256 * vs lack of host memory in the future.
257 */
258 if (ret) {
259 assert(errno_ != EINVAL);
260 if (errno_ == ENOMEM || errno_ == ENOBUFS)
261 result = VK_ERROR_OUT_OF_HOST_MEMORY;
262 else
263 result = vk_device_set_lost(&device->vk,
264 "vm_bind failed with errno %d", errno_);
265 goto out_stackarray;
266 }
267
268 ANV_RMV(vm_binds, device, submit->binds, submit->binds_len);
269
270 out_stackarray:
271 STACK_ARRAY_FINISH(xe_binds_stackarray);
272 out_syncs:
273 STACK_ARRAY_FINISH(xe_syncs);
274
275 return result;
276 }
277
278 static VkResult
xe_vm_bind(struct anv_device * device,struct anv_sparse_submission * submit,enum anv_vm_bind_flags flags)279 xe_vm_bind(struct anv_device *device, struct anv_sparse_submission *submit,
280 enum anv_vm_bind_flags flags)
281 {
282 return xe_vm_bind_op(device, submit, flags);
283 }
284
285 static VkResult
xe_vm_bind_bo(struct anv_device * device,struct anv_bo * bo)286 xe_vm_bind_bo(struct anv_device *device, struct anv_bo *bo)
287 {
288 struct anv_vm_bind bind = {
289 .bo = bo,
290 .address = bo->offset,
291 .bo_offset = 0,
292 .size = bo->actual_size,
293 .op = ANV_VM_BIND,
294 };
295 struct anv_sparse_submission submit = {
296 .queue = NULL,
297 .binds = &bind,
298 .binds_len = 1,
299 .binds_capacity = 1,
300 .wait_count = 0,
301 .signal_count = 0,
302 };
303 return xe_vm_bind_op(device, &submit,
304 ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE);
305 }
306
307 static VkResult
xe_vm_unbind_bo(struct anv_device * device,struct anv_bo * bo)308 xe_vm_unbind_bo(struct anv_device *device, struct anv_bo *bo)
309 {
310 struct anv_vm_bind bind = {
311 .bo = bo,
312 .address = 0,
313 .bo_offset = 0,
314 .size = 0,
315 .op = ANV_VM_UNBIND_ALL,
316 };
317 struct anv_sparse_submission submit = {
318 .queue = NULL,
319 .binds = &bind,
320 .binds_len = 1,
321 .binds_capacity = 1,
322 .wait_count = 0,
323 .signal_count = 0,
324 };
325 if (bo->from_host_ptr) {
326 bind.address = bo->offset;
327 bind.size = bo->actual_size;
328 bind.op = ANV_VM_UNBIND;
329 }
330 return xe_vm_bind_op(device, &submit,
331 ANV_VM_BIND_FLAG_SIGNAL_BIND_TIMELINE);
332 }
333
334 static uint32_t
xe_gem_create_userptr(struct anv_device * device,void * mem,uint64_t size)335 xe_gem_create_userptr(struct anv_device *device, void *mem, uint64_t size)
336 {
337 /* We return the workaround BO gem_handle here, because Xe doesn't
338 * create handles for userptrs. But we still need to make it look
339 * to the rest of Anv that the operation succeeded.
340 */
341 return device->workaround_bo->gem_handle;
342 }
343
344 static uint32_t
xe_bo_alloc_flags_to_bo_flags(struct anv_device * device,enum anv_bo_alloc_flags alloc_flags)345 xe_bo_alloc_flags_to_bo_flags(struct anv_device *device,
346 enum anv_bo_alloc_flags alloc_flags)
347 {
348 return 0;
349 }
350
351 const struct anv_kmd_backend *
anv_xe_kmd_backend_get(void)352 anv_xe_kmd_backend_get(void)
353 {
354 static const struct anv_kmd_backend xe_backend = {
355 .gem_create = xe_gem_create,
356 .gem_create_userptr = xe_gem_create_userptr,
357 .gem_close = xe_gem_close,
358 .gem_mmap = xe_gem_mmap,
359 .vm_bind = xe_vm_bind,
360 .vm_bind_bo = xe_vm_bind_bo,
361 .vm_unbind_bo = xe_vm_unbind_bo,
362 .queue_exec_locked = xe_queue_exec_locked,
363 .queue_exec_async = xe_queue_exec_async,
364 .bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,
365 };
366 return &xe_backend;
367 }
368