1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "i915/anv_batch_chain.h"
25 #include "anv_private.h"
26 #include "anv_measure.h"
27
28 #include "perf/intel_perf.h"
29 #include "util/u_debug.h"
30
31 #include "drm-uapi/i915_drm.h"
32
33 struct anv_execbuf {
34 struct drm_i915_gem_execbuffer2 execbuf;
35
36 struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
37
38 struct drm_i915_gem_exec_object2 * objects;
39 uint32_t bo_count;
40 uint32_t bo_array_length;
41 struct anv_bo ** bos;
42
43 uint32_t syncobj_count;
44 uint32_t syncobj_array_length;
45 struct drm_i915_gem_exec_fence * syncobjs;
46 uint64_t * syncobj_values;
47
48 uint32_t cmd_buffer_count;
49 struct anv_query_pool *perf_query_pool;
50
51 const VkAllocationCallbacks * alloc;
52 VkSystemAllocationScope alloc_scope;
53 };
54
55 static void
anv_execbuf_finish(struct anv_execbuf * exec)56 anv_execbuf_finish(struct anv_execbuf *exec)
57 {
58 vk_free(exec->alloc, exec->syncobjs);
59 vk_free(exec->alloc, exec->syncobj_values);
60 vk_free(exec->alloc, exec->objects);
61 vk_free(exec->alloc, exec->bos);
62 }
63
64 static void
anv_execbuf_add_ext(struct anv_execbuf * exec,uint32_t ext_name,struct i915_user_extension * ext)65 anv_execbuf_add_ext(struct anv_execbuf *exec,
66 uint32_t ext_name,
67 struct i915_user_extension *ext)
68 {
69 __u64 *iter = &exec->execbuf.cliprects_ptr;
70
71 exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
72
73 while (*iter != 0) {
74 iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
75 }
76
77 ext->name = ext_name;
78
79 *iter = (uintptr_t) ext;
80 }
81
82 static VkResult
83 anv_execbuf_add_bo_bitset(struct anv_device *device,
84 struct anv_execbuf *exec,
85 uint32_t dep_words,
86 BITSET_WORD *deps,
87 uint32_t extra_flags);
88
89 static VkResult
anv_execbuf_add_bo(struct anv_device * device,struct anv_execbuf * exec,struct anv_bo * bo,struct anv_reloc_list * relocs,uint32_t extra_flags)90 anv_execbuf_add_bo(struct anv_device *device,
91 struct anv_execbuf *exec,
92 struct anv_bo *bo,
93 struct anv_reloc_list *relocs,
94 uint32_t extra_flags)
95 {
96 struct drm_i915_gem_exec_object2 *obj = NULL;
97
98 if (bo->exec_obj_index < exec->bo_count &&
99 exec->bos[bo->exec_obj_index] == bo)
100 obj = &exec->objects[bo->exec_obj_index];
101
102 if (obj == NULL) {
103 /* We've never seen this one before. Add it to the list and assign
104 * an id that we can use later.
105 */
106 if (exec->bo_count >= exec->bo_array_length) {
107 uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64;
108
109 struct drm_i915_gem_exec_object2 *new_objects =
110 vk_realloc(exec->alloc, exec->objects,
111 new_len * sizeof(*new_objects), 8, exec->alloc_scope);
112 if (new_objects == NULL)
113 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
114
115 exec->objects = new_objects;
116
117 struct anv_bo **new_bos =
118 vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8,
119 exec->alloc_scope);
120 if (new_bos == NULL)
121 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
122
123 exec->bos = new_bos;
124 exec->bo_array_length = new_len;
125 }
126
127 assert(exec->bo_count < exec->bo_array_length);
128
129 bo->exec_obj_index = exec->bo_count++;
130 obj = &exec->objects[bo->exec_obj_index];
131 exec->bos[bo->exec_obj_index] = bo;
132
133 obj->handle = bo->gem_handle;
134 obj->relocation_count = 0;
135 obj->relocs_ptr = 0;
136 obj->alignment = 0;
137 obj->offset = bo->offset;
138 obj->flags = bo->flags | extra_flags;
139 obj->rsvd1 = 0;
140 obj->rsvd2 = 0;
141 }
142
143 if (extra_flags & EXEC_OBJECT_WRITE) {
144 obj->flags |= EXEC_OBJECT_WRITE;
145 obj->flags &= ~EXEC_OBJECT_ASYNC;
146 }
147
148 if (relocs != NULL) {
149 return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
150 relocs->deps, extra_flags);
151 }
152
153 return VK_SUCCESS;
154 }
155
156 /* Add BO dependencies to execbuf */
157 static VkResult
anv_execbuf_add_bo_bitset(struct anv_device * device,struct anv_execbuf * exec,uint32_t dep_words,BITSET_WORD * deps,uint32_t extra_flags)158 anv_execbuf_add_bo_bitset(struct anv_device *device,
159 struct anv_execbuf *exec,
160 uint32_t dep_words,
161 BITSET_WORD *deps,
162 uint32_t extra_flags)
163 {
164 for (uint32_t w = 0; w < dep_words; w++) {
165 BITSET_WORD mask = deps[w];
166 while (mask) {
167 int i = u_bit_scan(&mask);
168 uint32_t gem_handle = w * BITSET_WORDBITS + i;
169 struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
170 assert(bo->refcount > 0);
171 VkResult result =
172 anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
173 if (result != VK_SUCCESS)
174 return result;
175 }
176 }
177
178 return VK_SUCCESS;
179 }
180
181 static VkResult
anv_execbuf_add_syncobj(struct anv_device * device,struct anv_execbuf * exec,uint32_t syncobj,uint32_t flags,uint64_t timeline_value)182 anv_execbuf_add_syncobj(struct anv_device *device,
183 struct anv_execbuf *exec,
184 uint32_t syncobj,
185 uint32_t flags,
186 uint64_t timeline_value)
187 {
188 if (exec->syncobj_count >= exec->syncobj_array_length) {
189 uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
190
191 struct drm_i915_gem_exec_fence *new_syncobjs =
192 vk_realloc(exec->alloc, exec->syncobjs,
193 new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope);
194 if (new_syncobjs == NULL)
195 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
196
197 exec->syncobjs = new_syncobjs;
198
199 if (exec->syncobj_values) {
200 uint64_t *new_syncobj_values =
201 vk_realloc(exec->alloc, exec->syncobj_values,
202 new_len * sizeof(*new_syncobj_values), 8,
203 exec->alloc_scope);
204 if (new_syncobj_values == NULL)
205 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
206
207 exec->syncobj_values = new_syncobj_values;
208 }
209
210 exec->syncobj_array_length = new_len;
211 }
212
213 if (timeline_value && !exec->syncobj_values) {
214 exec->syncobj_values =
215 vk_zalloc(exec->alloc, exec->syncobj_array_length *
216 sizeof(*exec->syncobj_values),
217 8, exec->alloc_scope);
218 if (!exec->syncobj_values)
219 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
220 }
221
222 exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
223 .handle = syncobj,
224 .flags = flags,
225 };
226 if (exec->syncobj_values)
227 exec->syncobj_values[exec->syncobj_count] = timeline_value;
228
229 exec->syncobj_count++;
230
231 return VK_SUCCESS;
232 }
233
234 static VkResult
anv_execbuf_add_sync(struct anv_device * device,struct anv_execbuf * execbuf,struct vk_sync * sync,bool is_signal,uint64_t value)235 anv_execbuf_add_sync(struct anv_device *device,
236 struct anv_execbuf *execbuf,
237 struct vk_sync *sync,
238 bool is_signal,
239 uint64_t value)
240 {
241 /* It's illegal to signal a timeline with value 0 because that's never
242 * higher than the current value. A timeline wait on value 0 is always
243 * trivial because 0 <= uint64_t always.
244 */
245 if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
246 return VK_SUCCESS;
247
248 if (vk_sync_is_anv_bo_sync(sync)) {
249 struct anv_bo_sync *bo_sync =
250 container_of(sync, struct anv_bo_sync, sync);
251
252 assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
253
254 return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
255 is_signal ? EXEC_OBJECT_WRITE : 0);
256 } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
257 struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
258
259 if (!(sync->flags & VK_SYNC_IS_TIMELINE))
260 value = 0;
261
262 return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
263 is_signal ? I915_EXEC_FENCE_SIGNAL :
264 I915_EXEC_FENCE_WAIT,
265 value);
266 }
267
268 unreachable("Invalid sync type");
269 }
270
271 static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf * execbuf,struct anv_cmd_buffer * cmd_buffer)272 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
273 struct anv_cmd_buffer *cmd_buffer)
274 {
275 VkResult result;
276 /* Add surface dependencies (BOs) to the execbuf */
277 result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
278 cmd_buffer->surface_relocs.dep_words,
279 cmd_buffer->surface_relocs.deps, 0);
280 if (result != VK_SUCCESS)
281 return result;
282
283 /* First, we walk over all of the bos we've seen and add them and their
284 * relocations to the validate list.
285 */
286 struct anv_batch_bo **bbo;
287 u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
288 result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
289 (*bbo)->bo, &(*bbo)->relocs, 0);
290 if (result != VK_SUCCESS)
291 return result;
292 }
293
294 struct anv_bo **bo_entry;
295 u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
296 result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
297 *bo_entry, NULL, 0);
298 if (result != VK_SUCCESS)
299 return result;
300 }
301
302 return VK_SUCCESS;
303 }
304
305 static VkResult
pin_state_pool(struct anv_device * device,struct anv_execbuf * execbuf,struct anv_state_pool * pool)306 pin_state_pool(struct anv_device *device,
307 struct anv_execbuf *execbuf,
308 struct anv_state_pool *pool)
309 {
310 anv_block_pool_foreach_bo(bo, &pool->block_pool) {
311 VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
312 if (result != VK_SUCCESS)
313 return result;
314 }
315
316 return VK_SUCCESS;
317 }
318
319 static void
get_context_and_exec_flags(struct anv_queue * queue,bool is_companion_rcs_batch,uint64_t * exec_flags,uint32_t * context_id)320 get_context_and_exec_flags(struct anv_queue *queue,
321 bool is_companion_rcs_batch,
322 uint64_t *exec_flags,
323 uint32_t *context_id)
324 {
325 assert(queue != NULL);
326
327 struct anv_device *device = queue->device;
328
329 /** Submit batch to index 0 which is the main virtual engine */
330 *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags;
331
332 *context_id = device->physical->has_vm_control ?
333 is_companion_rcs_batch ?
334 queue->companion_rcs_id :
335 queue->context_id :
336 device->context_id;
337 }
338
339 static VkResult
anv_execbuf_add_trtt_bos(struct anv_device * device,struct anv_execbuf * execbuf)340 anv_execbuf_add_trtt_bos(struct anv_device *device,
341 struct anv_execbuf *execbuf)
342 {
343 struct anv_trtt *trtt = &device->trtt;
344 VkResult result = VK_SUCCESS;
345
346 /* If l3_addr is zero we're not using TR-TT, there's no bo to add. */
347 if (!trtt->l3_addr)
348 return VK_SUCCESS;
349
350 simple_mtx_lock(&trtt->mutex);
351
352 for (int i = 0; i < trtt->num_page_table_bos; i++) {
353 result = anv_execbuf_add_bo(device, execbuf, trtt->page_table_bos[i],
354 NULL, 0);
355 if (result != VK_SUCCESS)
356 goto out;
357 }
358
359 out:
360 simple_mtx_unlock(&trtt->mutex);
361 return result;
362 }
363
364 static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_cmd_buffer ** cmd_buffers,uint32_t num_cmd_buffers)365 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
366 struct anv_queue *queue,
367 struct anv_cmd_buffer **cmd_buffers,
368 uint32_t num_cmd_buffers)
369 {
370 struct anv_device *device = queue->device;
371 VkResult result;
372
373 if (unlikely(device->physical->measure_device.config)) {
374 for (uint32_t i = 0; i < num_cmd_buffers; i++)
375 anv_measure_submit(cmd_buffers[i]);
376 }
377
378 /* Edit the tail of the command buffers to chain them all together if they
379 * can be.
380 */
381 anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
382
383 for (uint32_t i = 0; i < num_cmd_buffers; i++) {
384 result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
385 if (result != VK_SUCCESS)
386 return result;
387 }
388
389 /* Add all the global BOs to the object list for softpin case. */
390 result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
391 if (result != VK_SUCCESS)
392 return result;
393
394 if (device->physical->va.bindless_surface_state_pool.size > 0) {
395 result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
396 if (result != VK_SUCCESS)
397 return result;
398 }
399
400 if (device->physical->va.indirect_push_descriptor_pool.size > 0) {
401 result = pin_state_pool(device, execbuf, &device->indirect_push_descriptor_pool);
402 if (result != VK_SUCCESS)
403 return result;
404 }
405
406 result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
407 if (result != VK_SUCCESS)
408 return result;
409
410 result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
411 if (result != VK_SUCCESS)
412 return result;
413
414 result = pin_state_pool(device, execbuf, &device->general_state_pool);
415 if (result != VK_SUCCESS)
416 return result;
417
418 result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
419 if (result != VK_SUCCESS)
420 return result;
421
422 result = pin_state_pool(device, execbuf, &device->binding_table_pool);
423 if (result != VK_SUCCESS)
424 return result;
425
426 if (device->physical->va.aux_tt_pool.size > 0) {
427 result = pin_state_pool(device, execbuf, &device->aux_tt_pool);
428 if (result != VK_SUCCESS)
429 return result;
430 }
431
432 if (device->physical->va.push_descriptor_buffer_pool.size > 0) {
433 result = pin_state_pool(device, execbuf, &device->push_descriptor_buffer_pool);
434 if (result != VK_SUCCESS)
435 return result;
436 }
437
438 /* Add the BOs for all user allocated memory objects because we can't
439 * track after binding updates of VK_EXT_descriptor_indexing and due to how
440 * sparse resources work.
441 */
442 list_for_each_entry(struct anv_device_memory, mem,
443 &device->memory_objects, link) {
444 result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
445 if (result != VK_SUCCESS)
446 return result;
447 }
448
449 result = anv_execbuf_add_trtt_bos(device, execbuf);
450 if (result != VK_SUCCESS)
451 return result;
452
453 /* Add all the private BOs from images because we can't track after binding
454 * updates of VK_EXT_descriptor_indexing.
455 */
456 list_for_each_entry(struct anv_image, image,
457 &device->image_private_objects, link) {
458 struct anv_bo *private_bo =
459 image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo;
460 result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0);
461 if (result != VK_SUCCESS)
462 return result;
463 }
464
465 struct list_head *batch_bo = &cmd_buffers[0]->batch_bos;
466 struct anv_batch_bo *first_batch_bo =
467 list_first_entry(batch_bo, struct anv_batch_bo, link);
468
469 /* The kernel requires that the last entry in the validation list be the
470 * batch buffer to execute. We can simply swap the element
471 * corresponding to the first batch_bo in the chain with the last
472 * element in the list.
473 */
474 if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
475 uint32_t idx = first_batch_bo->bo->exec_obj_index;
476 uint32_t last_idx = execbuf->bo_count - 1;
477
478 struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
479 assert(execbuf->bos[idx] == first_batch_bo->bo);
480
481 execbuf->objects[idx] = execbuf->objects[last_idx];
482 execbuf->bos[idx] = execbuf->bos[last_idx];
483 execbuf->bos[idx]->exec_obj_index = idx;
484
485 execbuf->objects[last_idx] = tmp_obj;
486 execbuf->bos[last_idx] = first_batch_bo->bo;
487 first_batch_bo->bo->exec_obj_index = last_idx;
488 }
489
490 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
491 if (device->physical->memory.need_flush &&
492 anv_bo_needs_host_cache_flush(device->batch_bo_pool.bo_alloc_flags))
493 anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
494 #endif
495
496 assert(!cmd_buffers[0]->is_companion_rcs_cmd_buffer || device->physical->has_vm_control);
497 uint64_t exec_flags = 0;
498 uint32_t context_id;
499 get_context_and_exec_flags(queue, cmd_buffers[0]->is_companion_rcs_cmd_buffer,
500 &exec_flags, &context_id);
501
502 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
503 .buffers_ptr = (uintptr_t) execbuf->objects,
504 .buffer_count = execbuf->bo_count,
505 .batch_start_offset = 0,
506 .batch_len = 0,
507 .cliprects_ptr = 0,
508 .num_cliprects = 0,
509 .DR1 = 0,
510 .DR4 = 0,
511 .flags = I915_EXEC_NO_RELOC |
512 I915_EXEC_HANDLE_LUT |
513 exec_flags,
514 .rsvd1 = context_id,
515 .rsvd2 = 0,
516 };
517
518 return VK_SUCCESS;
519 }
520
521 static VkResult
setup_empty_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue)522 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
523 {
524 struct anv_device *device = queue->device;
525 VkResult result = anv_execbuf_add_bo(device, execbuf,
526 device->trivial_batch_bo,
527 NULL, 0);
528 if (result != VK_SUCCESS)
529 return result;
530
531 uint64_t exec_flags = 0;
532 uint32_t context_id;
533 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
534
535 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
536 .buffers_ptr = (uintptr_t) execbuf->objects,
537 .buffer_count = execbuf->bo_count,
538 .batch_start_offset = 0,
539 .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
540 .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
541 .rsvd1 = context_id,
542 .rsvd2 = 0,
543 };
544
545 return VK_SUCCESS;
546 }
547
548 static void
setup_execbuf_fence_params(struct anv_execbuf * execbuf)549 setup_execbuf_fence_params(struct anv_execbuf *execbuf)
550 {
551 if (execbuf->syncobj_values) {
552 execbuf->timeline_fences.fence_count = execbuf->syncobj_count;
553 execbuf->timeline_fences.handles_ptr = (uintptr_t)execbuf->syncobjs;
554 execbuf->timeline_fences.values_ptr = (uintptr_t)execbuf->syncobj_values;
555 anv_execbuf_add_ext(execbuf,
556 DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
557 &execbuf->timeline_fences.base);
558 } else if (execbuf->syncobjs) {
559 execbuf->execbuf.flags |= I915_EXEC_FENCE_ARRAY;
560 execbuf->execbuf.num_cliprects = execbuf->syncobj_count;
561 execbuf->execbuf.cliprects_ptr = (uintptr_t)execbuf->syncobjs;
562 }
563 }
564
565 static VkResult
setup_async_execbuf(struct anv_execbuf * execbuf,struct anv_async_submit * submit,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t signal_count,const struct vk_sync_signal * signals)566 setup_async_execbuf(struct anv_execbuf *execbuf,
567 struct anv_async_submit *submit,
568 uint32_t wait_count,
569 const struct vk_sync_wait *waits,
570 uint32_t signal_count,
571 const struct vk_sync_signal *signals)
572 {
573 struct anv_queue *queue = submit->queue;
574 struct anv_device *device = queue->device;
575
576 /* Always add the workaround BO as it includes a driver identifier for the
577 * error_state.
578 */
579 VkResult result = anv_execbuf_add_bo(device, execbuf,
580 device->workaround_bo,
581 NULL, 0);
582 if (result != VK_SUCCESS)
583 return result;
584
585 util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, _bo) {
586 struct anv_bo *bo = *_bo;
587
588 result = anv_execbuf_add_bo(device, execbuf, bo,
589 &submit->relocs, 0);
590 if (result != VK_SUCCESS)
591 return result;
592
593 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
594 if (device->physical->memory.need_flush &&
595 anv_bo_needs_host_cache_flush(bo->alloc_flags))
596 intel_flush_range(bo->map, bo->size);
597 #endif
598 }
599
600 for (uint32_t i = 0; i < wait_count; i++) {
601 result = anv_execbuf_add_sync(device, execbuf,
602 waits[i].sync,
603 false /* is_signal */,
604 waits[i].wait_value);
605 if (result != VK_SUCCESS)
606 return result;
607 }
608 for (uint32_t i = 0; i < signal_count; i++) {
609 result = anv_execbuf_add_sync(device, execbuf,
610 signals[i].sync,
611 true /* is_signal */,
612 signals[i].signal_value);
613 if (result != VK_SUCCESS)
614 return result;
615 }
616 if (submit->signal.sync) {
617 result = anv_execbuf_add_sync(device, execbuf,
618 submit->signal.sync,
619 true /* is_signal */,
620 submit->signal.signal_value);
621 if (result != VK_SUCCESS)
622 return result;
623 }
624 if (queue->sync) {
625 result = anv_execbuf_add_sync(device, execbuf,
626 queue->sync,
627 true /* is_signal */,
628 0 /* signal_value */);
629 if (result != VK_SUCCESS)
630 return result;
631 }
632
633 struct anv_bo *batch_bo =
634 *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
635 if (batch_bo->exec_obj_index != execbuf->bo_count - 1) {
636 uint32_t idx = batch_bo->exec_obj_index;
637 uint32_t last_idx = execbuf->bo_count - 1;
638
639 struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
640 assert(execbuf->bos[idx] == batch_bo);
641
642 execbuf->objects[idx] = execbuf->objects[last_idx];
643 execbuf->bos[idx] = execbuf->bos[last_idx];
644 execbuf->bos[idx]->exec_obj_index = idx;
645
646 execbuf->objects[last_idx] = tmp_obj;
647 execbuf->bos[last_idx] = batch_bo;
648 batch_bo->exec_obj_index = last_idx;
649 }
650
651 uint64_t exec_flags = 0;
652 uint32_t context_id;
653 get_context_and_exec_flags(queue, submit->use_companion_rcs,
654 &exec_flags, &context_id);
655
656 execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
657 .buffers_ptr = (uintptr_t) execbuf->objects,
658 .buffer_count = execbuf->bo_count,
659 .batch_start_offset = 0,
660 .flags = I915_EXEC_NO_RELOC |
661 I915_EXEC_HANDLE_LUT |
662 exec_flags,
663 .rsvd1 = context_id,
664 .rsvd2 = 0,
665 };
666
667 setup_execbuf_fence_params(execbuf);
668
669 return VK_SUCCESS;
670 }
671
672 static int
anv_gem_execbuffer(struct anv_device * device,struct drm_i915_gem_execbuffer2 * execbuf)673 anv_gem_execbuffer(struct anv_device *device,
674 struct drm_i915_gem_execbuffer2 *execbuf)
675 {
676 int ret;
677 const unsigned long request = (execbuf->flags & I915_EXEC_FENCE_OUT) ?
678 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR :
679 DRM_IOCTL_I915_GEM_EXECBUFFER2;
680
681 do {
682 ret = intel_ioctl(device->fd, request, execbuf);
683 } while (ret && errno == ENOMEM);
684
685 return ret;
686 }
687
688 static void
anv_i915_debug_submit(const struct anv_execbuf * execbuf)689 anv_i915_debug_submit(const struct anv_execbuf *execbuf)
690 {
691 uint32_t total_size_kb = 0, total_vram_only_size_kb = 0;
692 for (uint32_t i = 0; i < execbuf->bo_count; i++) {
693 const struct anv_bo *bo = execbuf->bos[i];
694 total_size_kb += bo->size / 1024;
695 if (anv_bo_is_vram_only(bo))
696 total_vram_only_size_kb += bo->size / 1024;
697 }
698
699 fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n",
700 execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len,
701 (float)total_size_kb / 1024.0f,
702 (float)total_vram_only_size_kb / 1024.0f);
703 for (uint32_t i = 0; i < execbuf->bo_count; i++) {
704 const struct anv_bo *bo = execbuf->bos[i];
705
706 fprintf(stderr, " BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
707 "KB handle=%05u capture=%u vram_only=%u name=%s\n",
708 bo->offset, bo->offset + bo->size - 1, bo->size / 1024,
709 bo->gem_handle, (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
710 anv_bo_is_vram_only(bo), bo->name);
711 }
712 }
713
714 VkResult
i915_queue_exec_async(struct anv_async_submit * submit,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t signal_count,const struct vk_sync_signal * signals)715 i915_queue_exec_async(struct anv_async_submit *submit,
716 uint32_t wait_count,
717 const struct vk_sync_wait *waits,
718 uint32_t signal_count,
719 const struct vk_sync_signal *signals)
720 {
721 assert(util_dynarray_num_elements(&submit->batch_bos,
722 struct anv_bo *) > 0);
723
724 struct anv_queue *queue = submit->queue;
725 struct anv_device *device = queue->device;
726 struct anv_execbuf execbuf = {
727 .alloc = &device->vk.alloc,
728 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
729 };
730
731 VkResult result = setup_async_execbuf(&execbuf, submit,
732 wait_count, waits,
733 signal_count, signals);
734 if (result != VK_SUCCESS)
735 goto error;
736
737 if (INTEL_DEBUG(DEBUG_SUBMIT))
738 anv_i915_debug_submit(&execbuf);
739
740 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
741
742 int ret = queue->device->info->no_hw ? 0 :
743 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
744 if (ret)
745 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
746
747 result = anv_queue_post_submit(queue, result);
748
749 error:
750 anv_execbuf_finish(&execbuf);
751
752 return result;
753 }
754
755 static VkResult
i915_companion_rcs_queue_exec_locked(struct anv_queue * queue,struct anv_cmd_buffer * companion_rcs_cmd_buffer,uint32_t wait_count,const struct vk_sync_wait * waits)756 i915_companion_rcs_queue_exec_locked(struct anv_queue *queue,
757 struct anv_cmd_buffer *companion_rcs_cmd_buffer,
758 uint32_t wait_count,
759 const struct vk_sync_wait *waits)
760 {
761 struct anv_device *device = queue->device;
762 struct anv_execbuf execbuf = {
763 .alloc = &queue->device->vk.alloc,
764 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
765 };
766
767 /* Always add the workaround BO as it includes a driver identifier for the
768 * error_state.
769 */
770 VkResult result =
771 anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
772 if (result != VK_SUCCESS)
773 goto error;
774
775 for (uint32_t i = 0; i < wait_count; i++) {
776 result = anv_execbuf_add_sync(device, &execbuf,
777 waits[i].sync,
778 false /* is_signal */,
779 waits[i].wait_value);
780 if (result != VK_SUCCESS)
781 goto error;
782 }
783
784 if (queue->companion_sync) {
785 result = anv_execbuf_add_sync(device, &execbuf,
786 queue->companion_sync,
787 true /* is_signal */, 0);
788 if (result != VK_SUCCESS)
789 goto error;
790 }
791
792 result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
793 &companion_rcs_cmd_buffer, 1);
794 if (result != VK_SUCCESS)
795 goto error;
796
797 if (INTEL_DEBUG(DEBUG_SUBMIT))
798 anv_i915_debug_submit(&execbuf);
799
800 anv_cmd_buffer_exec_batch_debug(queue, 1, &companion_rcs_cmd_buffer, NULL, 0);
801
802 setup_execbuf_fence_params(&execbuf);
803
804 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
805
806 int ret = queue->device->info->no_hw ? 0 :
807 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
808 if (ret) {
809 anv_i915_debug_submit(&execbuf);
810 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
811 }
812
813 error:
814 anv_execbuf_finish(&execbuf);
815 return result;
816 }
817
818 VkResult
i915_queue_exec_locked(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t signal_count,const struct vk_sync_signal * signals,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct anv_utrace_submit * utrace_submit)819 i915_queue_exec_locked(struct anv_queue *queue,
820 uint32_t wait_count,
821 const struct vk_sync_wait *waits,
822 uint32_t cmd_buffer_count,
823 struct anv_cmd_buffer **cmd_buffers,
824 uint32_t signal_count,
825 const struct vk_sync_signal *signals,
826 struct anv_query_pool *perf_query_pool,
827 uint32_t perf_query_pass,
828 struct anv_utrace_submit *utrace_submit)
829 {
830 struct anv_device *device = queue->device;
831 struct anv_execbuf execbuf = {
832 .alloc = &queue->device->vk.alloc,
833 .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
834 };
835 VkResult result;
836
837 /* If there is a utrace submission but no batch, it means there are no
838 * commands to run for utrace. But we still have to signal the associated
839 * syncs, so add them to the submission.
840 */
841 if (utrace_submit &&
842 util_dynarray_num_elements(&utrace_submit->base.batch_bos,
843 struct anv_bo *) == 0) {
844 result = anv_execbuf_add_sync(device, &execbuf,
845 utrace_submit->base.signal.sync,
846 true /* is_signal */,
847 utrace_submit->base.signal.signal_value);
848 if (result != VK_SUCCESS)
849 goto error;
850
851 /* Avoid doing a submission after the application's batch since there
852 * are no commands.
853 */
854 utrace_submit = NULL;
855 }
856
857 /* Always add the workaround BO as it includes a driver identifier for the
858 * error_state.
859 */
860 result =
861 anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
862 if (result != VK_SUCCESS)
863 goto error;
864
865 if (device->printf.bo) {
866 result =
867 anv_execbuf_add_bo(device, &execbuf, device->printf.bo, NULL, 0);
868 if (result != VK_SUCCESS)
869 goto error;
870 }
871
872 for (uint32_t i = 0; i < wait_count; i++) {
873 result = anv_execbuf_add_sync(device, &execbuf,
874 waits[i].sync,
875 false /* is_signal */,
876 waits[i].wait_value);
877 if (result != VK_SUCCESS)
878 goto error;
879 }
880
881 for (uint32_t i = 0; i < signal_count; i++) {
882 result = anv_execbuf_add_sync(device, &execbuf,
883 signals[i].sync,
884 true /* is_signal */,
885 signals[i].signal_value);
886 if (result != VK_SUCCESS)
887 goto error;
888 }
889
890 if (queue->sync) {
891 result = anv_execbuf_add_sync(device, &execbuf,
892 queue->sync,
893 true /* is_signal */,
894 0 /* signal_value */);
895 if (result != VK_SUCCESS)
896 goto error;
897 }
898
899 if (cmd_buffer_count) {
900 result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers,
901 cmd_buffer_count);
902 } else {
903 result = setup_empty_execbuf(&execbuf, queue);
904 }
905
906 if (result != VK_SUCCESS)
907 goto error;
908
909 const bool has_perf_query = perf_query_pool && cmd_buffer_count;
910
911 if (INTEL_DEBUG(DEBUG_SUBMIT))
912 anv_i915_debug_submit(&execbuf);
913
914 anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
915 perf_query_pool, perf_query_pass);
916
917 setup_execbuf_fence_params(&execbuf);
918
919 if (has_perf_query) {
920 assert(perf_query_pass < perf_query_pool->n_passes);
921 struct intel_perf_query_info *query_info =
922 perf_query_pool->pass_query[perf_query_pass];
923
924 /* Some performance queries just the pipeline statistic HW, no need for
925 * OA in that case, so no need to reconfigure.
926 */
927 if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
928 (query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
929 query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
930 int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
931 device->perf_fd,
932 query_info->oa_metrics_set_id);
933 if (ret < 0) {
934 result = vk_device_set_lost(&device->vk,
935 "i915-perf config failed: %s",
936 strerror(errno));
937 }
938 }
939
940 struct anv_bo *pass_batch_bo = perf_query_pool->bo;
941
942 struct drm_i915_gem_exec_object2 query_pass_object = {
943 .handle = pass_batch_bo->gem_handle,
944 .offset = pass_batch_bo->offset,
945 .flags = pass_batch_bo->flags,
946 };
947
948 uint64_t exec_flags = 0;
949 uint32_t context_id;
950 get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
951
952 struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
953 .buffers_ptr = (uintptr_t) &query_pass_object,
954 .buffer_count = 1,
955 .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
956 perf_query_pass),
957 .flags = I915_EXEC_HANDLE_LUT | exec_flags,
958 .rsvd1 = context_id,
959 };
960
961 int ret = queue->device->info->no_hw ? 0 :
962 anv_gem_execbuffer(queue->device, &query_pass_execbuf);
963 if (ret)
964 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
965 }
966
967 ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
968
969 int ret = queue->device->info->no_hw ? 0 :
970 anv_gem_execbuffer(queue->device, &execbuf.execbuf);
971 if (ret) {
972 anv_i915_debug_submit(&execbuf);
973 result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
974 }
975
976 if (cmd_buffer_count != 0 && cmd_buffers[0]->companion_rcs_cmd_buffer) {
977 struct anv_cmd_buffer *companion_rcs_cmd_buffer =
978 cmd_buffers[0]->companion_rcs_cmd_buffer;
979 assert(companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer);
980 assert(cmd_buffer_count == 1);
981 result = i915_companion_rcs_queue_exec_locked(queue,
982 cmd_buffers[0]->companion_rcs_cmd_buffer, wait_count,
983 waits);
984 }
985
986 result = anv_queue_post_submit(queue, result);
987
988 error:
989 anv_execbuf_finish(&execbuf);
990
991 if (result == VK_SUCCESS && utrace_submit) {
992 struct vk_sync_signal signal = {
993 .sync = utrace_submit->base.signal.sync,
994 .signal_value = utrace_submit->base.signal.signal_value,
995 };
996 result = i915_queue_exec_async(&utrace_submit->base, 0, NULL, 1, &signal);
997 }
998
999 return result;
1000 }
1001