xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/i915/anv_batch_chain.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "i915/anv_batch_chain.h"
25 #include "anv_private.h"
26 #include "anv_measure.h"
27 
28 #include "perf/intel_perf.h"
29 #include "util/u_debug.h"
30 
31 #include "drm-uapi/i915_drm.h"
32 
33 struct anv_execbuf {
34    struct drm_i915_gem_execbuffer2           execbuf;
35 
36    struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
37 
38    struct drm_i915_gem_exec_object2 *        objects;
39    uint32_t                                  bo_count;
40    uint32_t                                  bo_array_length;
41    struct anv_bo **                          bos;
42 
43    uint32_t                                  syncobj_count;
44    uint32_t                                  syncobj_array_length;
45    struct drm_i915_gem_exec_fence *          syncobjs;
46    uint64_t *                                syncobj_values;
47 
48    uint32_t                                  cmd_buffer_count;
49    struct anv_query_pool                     *perf_query_pool;
50 
51    const VkAllocationCallbacks *             alloc;
52    VkSystemAllocationScope                   alloc_scope;
53 };
54 
55 static void
anv_execbuf_finish(struct anv_execbuf * exec)56 anv_execbuf_finish(struct anv_execbuf *exec)
57 {
58    vk_free(exec->alloc, exec->syncobjs);
59    vk_free(exec->alloc, exec->syncobj_values);
60    vk_free(exec->alloc, exec->objects);
61    vk_free(exec->alloc, exec->bos);
62 }
63 
64 static void
anv_execbuf_add_ext(struct anv_execbuf * exec,uint32_t ext_name,struct i915_user_extension * ext)65 anv_execbuf_add_ext(struct anv_execbuf *exec,
66                     uint32_t ext_name,
67                     struct i915_user_extension *ext)
68 {
69    __u64 *iter = &exec->execbuf.cliprects_ptr;
70 
71    exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS;
72 
73    while (*iter != 0) {
74       iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension;
75    }
76 
77    ext->name = ext_name;
78 
79    *iter = (uintptr_t) ext;
80 }
81 
82 static VkResult
83 anv_execbuf_add_bo_bitset(struct anv_device *device,
84                           struct anv_execbuf *exec,
85                           uint32_t dep_words,
86                           BITSET_WORD *deps,
87                           uint32_t extra_flags);
88 
89 static VkResult
anv_execbuf_add_bo(struct anv_device * device,struct anv_execbuf * exec,struct anv_bo * bo,struct anv_reloc_list * relocs,uint32_t extra_flags)90 anv_execbuf_add_bo(struct anv_device *device,
91                    struct anv_execbuf *exec,
92                    struct anv_bo *bo,
93                    struct anv_reloc_list *relocs,
94                    uint32_t extra_flags)
95 {
96    struct drm_i915_gem_exec_object2 *obj = NULL;
97 
98    if (bo->exec_obj_index < exec->bo_count &&
99        exec->bos[bo->exec_obj_index] == bo)
100       obj = &exec->objects[bo->exec_obj_index];
101 
102    if (obj == NULL) {
103       /* We've never seen this one before.  Add it to the list and assign
104        * an id that we can use later.
105        */
106       if (exec->bo_count >= exec->bo_array_length) {
107          uint32_t new_len = exec->objects ? exec->bo_array_length * 2 : 64;
108 
109          struct drm_i915_gem_exec_object2 *new_objects =
110             vk_realloc(exec->alloc, exec->objects,
111                        new_len * sizeof(*new_objects), 8, exec->alloc_scope);
112          if (new_objects == NULL)
113             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
114 
115          exec->objects = new_objects;
116 
117          struct anv_bo **new_bos =
118             vk_realloc(exec->alloc, exec->bos, new_len * sizeof(*new_bos), 8,
119                        exec->alloc_scope);
120          if (new_bos == NULL)
121             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
122 
123          exec->bos = new_bos;
124          exec->bo_array_length = new_len;
125       }
126 
127       assert(exec->bo_count < exec->bo_array_length);
128 
129       bo->exec_obj_index = exec->bo_count++;
130       obj = &exec->objects[bo->exec_obj_index];
131       exec->bos[bo->exec_obj_index] = bo;
132 
133       obj->handle = bo->gem_handle;
134       obj->relocation_count = 0;
135       obj->relocs_ptr = 0;
136       obj->alignment = 0;
137       obj->offset = bo->offset;
138       obj->flags = bo->flags | extra_flags;
139       obj->rsvd1 = 0;
140       obj->rsvd2 = 0;
141    }
142 
143    if (extra_flags & EXEC_OBJECT_WRITE) {
144       obj->flags |= EXEC_OBJECT_WRITE;
145       obj->flags &= ~EXEC_OBJECT_ASYNC;
146    }
147 
148    if (relocs != NULL) {
149       return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words,
150                                        relocs->deps, extra_flags);
151    }
152 
153    return VK_SUCCESS;
154 }
155 
156 /* Add BO dependencies to execbuf */
157 static VkResult
anv_execbuf_add_bo_bitset(struct anv_device * device,struct anv_execbuf * exec,uint32_t dep_words,BITSET_WORD * deps,uint32_t extra_flags)158 anv_execbuf_add_bo_bitset(struct anv_device *device,
159                           struct anv_execbuf *exec,
160                           uint32_t dep_words,
161                           BITSET_WORD *deps,
162                           uint32_t extra_flags)
163 {
164    for (uint32_t w = 0; w < dep_words; w++) {
165       BITSET_WORD mask = deps[w];
166       while (mask) {
167          int i = u_bit_scan(&mask);
168          uint32_t gem_handle = w * BITSET_WORDBITS + i;
169          struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle);
170          assert(bo->refcount > 0);
171          VkResult result =
172             anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags);
173          if (result != VK_SUCCESS)
174             return result;
175       }
176    }
177 
178    return VK_SUCCESS;
179 }
180 
181 static VkResult
anv_execbuf_add_syncobj(struct anv_device * device,struct anv_execbuf * exec,uint32_t syncobj,uint32_t flags,uint64_t timeline_value)182 anv_execbuf_add_syncobj(struct anv_device *device,
183                         struct anv_execbuf *exec,
184                         uint32_t syncobj,
185                         uint32_t flags,
186                         uint64_t timeline_value)
187 {
188    if (exec->syncobj_count >= exec->syncobj_array_length) {
189       uint32_t new_len = MAX2(exec->syncobj_array_length * 2, 16);
190 
191       struct drm_i915_gem_exec_fence *new_syncobjs =
192          vk_realloc(exec->alloc, exec->syncobjs,
193                     new_len * sizeof(*new_syncobjs), 8, exec->alloc_scope);
194       if (new_syncobjs == NULL)
195          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
196 
197       exec->syncobjs = new_syncobjs;
198 
199       if (exec->syncobj_values) {
200          uint64_t *new_syncobj_values =
201             vk_realloc(exec->alloc, exec->syncobj_values,
202                        new_len * sizeof(*new_syncobj_values), 8,
203                        exec->alloc_scope);
204          if (new_syncobj_values == NULL)
205             return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
206 
207          exec->syncobj_values = new_syncobj_values;
208       }
209 
210       exec->syncobj_array_length = new_len;
211    }
212 
213    if (timeline_value && !exec->syncobj_values) {
214       exec->syncobj_values =
215          vk_zalloc(exec->alloc, exec->syncobj_array_length *
216                                 sizeof(*exec->syncobj_values),
217                    8, exec->alloc_scope);
218       if (!exec->syncobj_values)
219          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
220    }
221 
222    exec->syncobjs[exec->syncobj_count] = (struct drm_i915_gem_exec_fence) {
223       .handle = syncobj,
224       .flags = flags,
225    };
226    if (exec->syncobj_values)
227       exec->syncobj_values[exec->syncobj_count] = timeline_value;
228 
229    exec->syncobj_count++;
230 
231    return VK_SUCCESS;
232 }
233 
234 static VkResult
anv_execbuf_add_sync(struct anv_device * device,struct anv_execbuf * execbuf,struct vk_sync * sync,bool is_signal,uint64_t value)235 anv_execbuf_add_sync(struct anv_device *device,
236                      struct anv_execbuf *execbuf,
237                      struct vk_sync *sync,
238                      bool is_signal,
239                      uint64_t value)
240 {
241    /* It's illegal to signal a timeline with value 0 because that's never
242     * higher than the current value.  A timeline wait on value 0 is always
243     * trivial because 0 <= uint64_t always.
244     */
245    if ((sync->flags & VK_SYNC_IS_TIMELINE) && value == 0)
246       return VK_SUCCESS;
247 
248    if (vk_sync_is_anv_bo_sync(sync)) {
249       struct anv_bo_sync *bo_sync =
250          container_of(sync, struct anv_bo_sync, sync);
251 
252       assert(is_signal == (bo_sync->state == ANV_BO_SYNC_STATE_RESET));
253 
254       return anv_execbuf_add_bo(device, execbuf, bo_sync->bo, NULL,
255                                 is_signal ? EXEC_OBJECT_WRITE : 0);
256    } else if (vk_sync_type_is_drm_syncobj(sync->type)) {
257       struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
258 
259       if (!(sync->flags & VK_SYNC_IS_TIMELINE))
260          value = 0;
261 
262       return anv_execbuf_add_syncobj(device, execbuf, syncobj->syncobj,
263                                      is_signal ? I915_EXEC_FENCE_SIGNAL :
264                                                  I915_EXEC_FENCE_WAIT,
265                                      value);
266    }
267 
268    unreachable("Invalid sync type");
269 }
270 
271 static VkResult
setup_execbuf_for_cmd_buffer(struct anv_execbuf * execbuf,struct anv_cmd_buffer * cmd_buffer)272 setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
273                              struct anv_cmd_buffer *cmd_buffer)
274 {
275    VkResult result;
276    /* Add surface dependencies (BOs) to the execbuf */
277    result = anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf,
278                                       cmd_buffer->surface_relocs.dep_words,
279                                       cmd_buffer->surface_relocs.deps, 0);
280    if (result != VK_SUCCESS)
281       return result;
282 
283    /* First, we walk over all of the bos we've seen and add them and their
284     * relocations to the validate list.
285     */
286    struct anv_batch_bo **bbo;
287    u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
288       result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
289                                   (*bbo)->bo, &(*bbo)->relocs, 0);
290       if (result != VK_SUCCESS)
291          return result;
292    }
293 
294    struct anv_bo **bo_entry;
295    u_vector_foreach(bo_entry, &cmd_buffer->dynamic_bos) {
296       result = anv_execbuf_add_bo(cmd_buffer->device, execbuf,
297                                   *bo_entry, NULL, 0);
298       if (result != VK_SUCCESS)
299          return result;
300    }
301 
302    return VK_SUCCESS;
303 }
304 
305 static VkResult
pin_state_pool(struct anv_device * device,struct anv_execbuf * execbuf,struct anv_state_pool * pool)306 pin_state_pool(struct anv_device *device,
307                struct anv_execbuf *execbuf,
308                struct anv_state_pool *pool)
309 {
310    anv_block_pool_foreach_bo(bo, &pool->block_pool) {
311       VkResult result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0);
312       if (result != VK_SUCCESS)
313          return result;
314    }
315 
316    return VK_SUCCESS;
317 }
318 
319 static void
get_context_and_exec_flags(struct anv_queue * queue,bool is_companion_rcs_batch,uint64_t * exec_flags,uint32_t * context_id)320 get_context_and_exec_flags(struct anv_queue *queue,
321                            bool is_companion_rcs_batch,
322                            uint64_t *exec_flags,
323                            uint32_t *context_id)
324 {
325    assert(queue != NULL);
326 
327    struct anv_device *device = queue->device;
328 
329    /** Submit batch to index 0 which is the main virtual engine */
330    *exec_flags = device->physical->has_vm_control ? 0 : queue->exec_flags;
331 
332    *context_id = device->physical->has_vm_control ?
333                  is_companion_rcs_batch ?
334                  queue->companion_rcs_id :
335                  queue->context_id :
336                  device->context_id;
337 }
338 
339 static VkResult
anv_execbuf_add_trtt_bos(struct anv_device * device,struct anv_execbuf * execbuf)340 anv_execbuf_add_trtt_bos(struct anv_device *device,
341                          struct anv_execbuf *execbuf)
342 {
343    struct anv_trtt *trtt = &device->trtt;
344    VkResult result = VK_SUCCESS;
345 
346    /* If l3_addr is zero we're not using TR-TT, there's no bo to add. */
347    if (!trtt->l3_addr)
348       return VK_SUCCESS;
349 
350    simple_mtx_lock(&trtt->mutex);
351 
352    for (int i = 0; i < trtt->num_page_table_bos; i++) {
353       result = anv_execbuf_add_bo(device, execbuf, trtt->page_table_bos[i],
354                                   NULL, 0);
355       if (result != VK_SUCCESS)
356          goto out;
357    }
358 
359 out:
360    simple_mtx_unlock(&trtt->mutex);
361    return result;
362 }
363 
364 static VkResult
setup_execbuf_for_cmd_buffers(struct anv_execbuf * execbuf,struct anv_queue * queue,struct anv_cmd_buffer ** cmd_buffers,uint32_t num_cmd_buffers)365 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
366                               struct anv_queue *queue,
367                               struct anv_cmd_buffer **cmd_buffers,
368                               uint32_t num_cmd_buffers)
369 {
370    struct anv_device *device = queue->device;
371    VkResult result;
372 
373    if (unlikely(device->physical->measure_device.config)) {
374       for (uint32_t i = 0; i < num_cmd_buffers; i++)
375          anv_measure_submit(cmd_buffers[i]);
376    }
377 
378    /* Edit the tail of the command buffers to chain them all together if they
379     * can be.
380     */
381    anv_cmd_buffer_chain_command_buffers(cmd_buffers, num_cmd_buffers);
382 
383    for (uint32_t i = 0; i < num_cmd_buffers; i++) {
384       result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]);
385       if (result != VK_SUCCESS)
386          return result;
387    }
388 
389    /* Add all the global BOs to the object list for softpin case. */
390    result = pin_state_pool(device, execbuf, &device->scratch_surface_state_pool);
391    if (result != VK_SUCCESS)
392       return result;
393 
394    if (device->physical->va.bindless_surface_state_pool.size > 0) {
395       result = pin_state_pool(device, execbuf, &device->bindless_surface_state_pool);
396       if (result != VK_SUCCESS)
397          return result;
398    }
399 
400    if (device->physical->va.indirect_push_descriptor_pool.size > 0) {
401       result = pin_state_pool(device, execbuf, &device->indirect_push_descriptor_pool);
402       if (result != VK_SUCCESS)
403          return result;
404    }
405 
406    result = pin_state_pool(device, execbuf, &device->internal_surface_state_pool);
407    if (result != VK_SUCCESS)
408       return result;
409 
410    result = pin_state_pool(device, execbuf, &device->dynamic_state_pool);
411    if (result != VK_SUCCESS)
412       return result;
413 
414    result = pin_state_pool(device, execbuf, &device->general_state_pool);
415    if (result != VK_SUCCESS)
416       return result;
417 
418    result = pin_state_pool(device, execbuf, &device->instruction_state_pool);
419    if (result != VK_SUCCESS)
420       return result;
421 
422    result = pin_state_pool(device, execbuf, &device->binding_table_pool);
423    if (result != VK_SUCCESS)
424       return result;
425 
426    if (device->physical->va.aux_tt_pool.size > 0) {
427       result = pin_state_pool(device, execbuf, &device->aux_tt_pool);
428       if (result != VK_SUCCESS)
429          return result;
430    }
431 
432    if (device->physical->va.push_descriptor_buffer_pool.size > 0) {
433       result = pin_state_pool(device, execbuf, &device->push_descriptor_buffer_pool);
434       if (result != VK_SUCCESS)
435          return result;
436    }
437 
438    /* Add the BOs for all user allocated memory objects because we can't
439     * track after binding updates of VK_EXT_descriptor_indexing and due to how
440     * sparse resources work.
441     */
442    list_for_each_entry(struct anv_device_memory, mem,
443                        &device->memory_objects, link) {
444       result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0);
445       if (result != VK_SUCCESS)
446          return result;
447    }
448 
449    result = anv_execbuf_add_trtt_bos(device, execbuf);
450    if (result != VK_SUCCESS)
451       return result;
452 
453    /* Add all the private BOs from images because we can't track after binding
454     * updates of VK_EXT_descriptor_indexing.
455     */
456    list_for_each_entry(struct anv_image, image,
457                        &device->image_private_objects, link) {
458       struct anv_bo *private_bo =
459          image->bindings[ANV_IMAGE_MEMORY_BINDING_PRIVATE].address.bo;
460       result = anv_execbuf_add_bo(device, execbuf, private_bo, NULL, 0);
461       if (result != VK_SUCCESS)
462          return result;
463    }
464 
465    struct list_head *batch_bo = &cmd_buffers[0]->batch_bos;
466    struct anv_batch_bo *first_batch_bo =
467       list_first_entry(batch_bo, struct anv_batch_bo, link);
468 
469    /* The kernel requires that the last entry in the validation list be the
470     * batch buffer to execute.  We can simply swap the element
471     * corresponding to the first batch_bo in the chain with the last
472     * element in the list.
473     */
474    if (first_batch_bo->bo->exec_obj_index != execbuf->bo_count - 1) {
475       uint32_t idx = first_batch_bo->bo->exec_obj_index;
476       uint32_t last_idx = execbuf->bo_count - 1;
477 
478       struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
479       assert(execbuf->bos[idx] == first_batch_bo->bo);
480 
481       execbuf->objects[idx] = execbuf->objects[last_idx];
482       execbuf->bos[idx] = execbuf->bos[last_idx];
483       execbuf->bos[idx]->exec_obj_index = idx;
484 
485       execbuf->objects[last_idx] = tmp_obj;
486       execbuf->bos[last_idx] = first_batch_bo->bo;
487       first_batch_bo->bo->exec_obj_index = last_idx;
488    }
489 
490 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
491    if (device->physical->memory.need_flush &&
492        anv_bo_needs_host_cache_flush(device->batch_bo_pool.bo_alloc_flags))
493       anv_cmd_buffer_clflush(cmd_buffers, num_cmd_buffers);
494 #endif
495 
496    assert(!cmd_buffers[0]->is_companion_rcs_cmd_buffer || device->physical->has_vm_control);
497    uint64_t exec_flags = 0;
498    uint32_t context_id;
499    get_context_and_exec_flags(queue, cmd_buffers[0]->is_companion_rcs_cmd_buffer,
500                               &exec_flags, &context_id);
501 
502    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
503       .buffers_ptr = (uintptr_t) execbuf->objects,
504       .buffer_count = execbuf->bo_count,
505       .batch_start_offset = 0,
506       .batch_len = 0,
507       .cliprects_ptr = 0,
508       .num_cliprects = 0,
509       .DR1 = 0,
510       .DR4 = 0,
511       .flags = I915_EXEC_NO_RELOC |
512                I915_EXEC_HANDLE_LUT |
513                exec_flags,
514       .rsvd1 = context_id,
515       .rsvd2 = 0,
516    };
517 
518    return VK_SUCCESS;
519 }
520 
521 static VkResult
setup_empty_execbuf(struct anv_execbuf * execbuf,struct anv_queue * queue)522 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue)
523 {
524    struct anv_device *device = queue->device;
525    VkResult result = anv_execbuf_add_bo(device, execbuf,
526                                         device->trivial_batch_bo,
527                                         NULL, 0);
528    if (result != VK_SUCCESS)
529       return result;
530 
531    uint64_t exec_flags = 0;
532    uint32_t context_id;
533    get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
534 
535    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
536       .buffers_ptr = (uintptr_t) execbuf->objects,
537       .buffer_count = execbuf->bo_count,
538       .batch_start_offset = 0,
539       .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */
540       .flags = I915_EXEC_HANDLE_LUT | exec_flags | I915_EXEC_NO_RELOC,
541       .rsvd1 = context_id,
542       .rsvd2 = 0,
543    };
544 
545    return VK_SUCCESS;
546 }
547 
548 static void
setup_execbuf_fence_params(struct anv_execbuf * execbuf)549 setup_execbuf_fence_params(struct anv_execbuf *execbuf)
550 {
551    if (execbuf->syncobj_values) {
552       execbuf->timeline_fences.fence_count = execbuf->syncobj_count;
553       execbuf->timeline_fences.handles_ptr = (uintptr_t)execbuf->syncobjs;
554       execbuf->timeline_fences.values_ptr = (uintptr_t)execbuf->syncobj_values;
555       anv_execbuf_add_ext(execbuf,
556                           DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES,
557                           &execbuf->timeline_fences.base);
558    } else if (execbuf->syncobjs) {
559       execbuf->execbuf.flags |= I915_EXEC_FENCE_ARRAY;
560       execbuf->execbuf.num_cliprects = execbuf->syncobj_count;
561       execbuf->execbuf.cliprects_ptr = (uintptr_t)execbuf->syncobjs;
562    }
563 }
564 
565 static VkResult
setup_async_execbuf(struct anv_execbuf * execbuf,struct anv_async_submit * submit,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t signal_count,const struct vk_sync_signal * signals)566 setup_async_execbuf(struct anv_execbuf *execbuf,
567                     struct anv_async_submit *submit,
568                     uint32_t wait_count,
569                     const struct vk_sync_wait *waits,
570                     uint32_t signal_count,
571                     const struct vk_sync_signal *signals)
572 {
573    struct anv_queue *queue = submit->queue;
574    struct anv_device *device = queue->device;
575 
576    /* Always add the workaround BO as it includes a driver identifier for the
577     * error_state.
578     */
579    VkResult result = anv_execbuf_add_bo(device, execbuf,
580                                         device->workaround_bo,
581                                         NULL, 0);
582    if (result != VK_SUCCESS)
583       return result;
584 
585    util_dynarray_foreach(&submit->batch_bos, struct anv_bo *, _bo) {
586       struct anv_bo *bo = *_bo;
587 
588       result = anv_execbuf_add_bo(device, execbuf, bo,
589                                   &submit->relocs, 0);
590       if (result != VK_SUCCESS)
591          return result;
592 
593 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
594       if (device->physical->memory.need_flush &&
595           anv_bo_needs_host_cache_flush(bo->alloc_flags))
596          intel_flush_range(bo->map, bo->size);
597 #endif
598    }
599 
600    for (uint32_t i = 0; i < wait_count; i++) {
601       result = anv_execbuf_add_sync(device, execbuf,
602                                     waits[i].sync,
603                                     false /* is_signal */,
604                                     waits[i].wait_value);
605       if (result != VK_SUCCESS)
606          return result;
607    }
608    for (uint32_t i = 0; i < signal_count; i++) {
609       result = anv_execbuf_add_sync(device, execbuf,
610                                     signals[i].sync,
611                                     true /* is_signal */,
612                                     signals[i].signal_value);
613       if (result != VK_SUCCESS)
614          return result;
615    }
616    if (submit->signal.sync) {
617       result = anv_execbuf_add_sync(device, execbuf,
618                                     submit->signal.sync,
619                                     true /* is_signal */,
620                                     submit->signal.signal_value);
621       if (result != VK_SUCCESS)
622          return result;
623    }
624    if (queue->sync) {
625       result = anv_execbuf_add_sync(device, execbuf,
626                                     queue->sync,
627                                     true /* is_signal */,
628                                     0 /* signal_value */);
629       if (result != VK_SUCCESS)
630          return result;
631    }
632 
633    struct anv_bo *batch_bo =
634       *util_dynarray_element(&submit->batch_bos, struct anv_bo *, 0);
635    if (batch_bo->exec_obj_index != execbuf->bo_count - 1) {
636       uint32_t idx = batch_bo->exec_obj_index;
637       uint32_t last_idx = execbuf->bo_count - 1;
638 
639       struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
640       assert(execbuf->bos[idx] == batch_bo);
641 
642       execbuf->objects[idx] = execbuf->objects[last_idx];
643       execbuf->bos[idx] = execbuf->bos[last_idx];
644       execbuf->bos[idx]->exec_obj_index = idx;
645 
646       execbuf->objects[last_idx] = tmp_obj;
647       execbuf->bos[last_idx] = batch_bo;
648       batch_bo->exec_obj_index = last_idx;
649    }
650 
651    uint64_t exec_flags = 0;
652    uint32_t context_id;
653    get_context_and_exec_flags(queue, submit->use_companion_rcs,
654                               &exec_flags, &context_id);
655 
656    execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
657       .buffers_ptr = (uintptr_t) execbuf->objects,
658       .buffer_count = execbuf->bo_count,
659       .batch_start_offset = 0,
660       .flags = I915_EXEC_NO_RELOC |
661                I915_EXEC_HANDLE_LUT |
662                exec_flags,
663       .rsvd1 = context_id,
664       .rsvd2 = 0,
665    };
666 
667    setup_execbuf_fence_params(execbuf);
668 
669    return VK_SUCCESS;
670 }
671 
672 static int
anv_gem_execbuffer(struct anv_device * device,struct drm_i915_gem_execbuffer2 * execbuf)673 anv_gem_execbuffer(struct anv_device *device,
674                    struct drm_i915_gem_execbuffer2 *execbuf)
675 {
676    int ret;
677    const unsigned long request = (execbuf->flags & I915_EXEC_FENCE_OUT) ?
678       DRM_IOCTL_I915_GEM_EXECBUFFER2_WR :
679       DRM_IOCTL_I915_GEM_EXECBUFFER2;
680 
681    do {
682       ret = intel_ioctl(device->fd, request, execbuf);
683    } while (ret && errno == ENOMEM);
684 
685    return ret;
686 }
687 
688 static void
anv_i915_debug_submit(const struct anv_execbuf * execbuf)689 anv_i915_debug_submit(const struct anv_execbuf *execbuf)
690 {
691    uint32_t total_size_kb = 0, total_vram_only_size_kb = 0;
692    for (uint32_t i = 0; i < execbuf->bo_count; i++) {
693       const struct anv_bo *bo = execbuf->bos[i];
694       total_size_kb += bo->size / 1024;
695       if (anv_bo_is_vram_only(bo))
696          total_vram_only_size_kb += bo->size / 1024;
697    }
698 
699    fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0 (aperture: %.1fMb, %.1fMb VRAM only)\n",
700            execbuf->execbuf.batch_start_offset, execbuf->execbuf.batch_len,
701            (float)total_size_kb / 1024.0f,
702            (float)total_vram_only_size_kb / 1024.0f);
703    for (uint32_t i = 0; i < execbuf->bo_count; i++) {
704       const struct anv_bo *bo = execbuf->bos[i];
705 
706       fprintf(stderr, "   BO: addr=0x%016"PRIx64"-0x%016"PRIx64" size=%7"PRIu64
707               "KB handle=%05u capture=%u vram_only=%u name=%s\n",
708               bo->offset, bo->offset + bo->size - 1, bo->size / 1024,
709               bo->gem_handle, (bo->flags & EXEC_OBJECT_CAPTURE) != 0,
710               anv_bo_is_vram_only(bo), bo->name);
711    }
712 }
713 
714 VkResult
i915_queue_exec_async(struct anv_async_submit * submit,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t signal_count,const struct vk_sync_signal * signals)715 i915_queue_exec_async(struct anv_async_submit *submit,
716                       uint32_t wait_count,
717                       const struct vk_sync_wait *waits,
718                       uint32_t signal_count,
719                       const struct vk_sync_signal *signals)
720 {
721    assert(util_dynarray_num_elements(&submit->batch_bos,
722                                      struct anv_bo *) > 0);
723 
724    struct anv_queue *queue = submit->queue;
725    struct anv_device *device = queue->device;
726    struct anv_execbuf execbuf = {
727       .alloc = &device->vk.alloc,
728       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
729    };
730 
731    VkResult result = setup_async_execbuf(&execbuf, submit,
732                                          wait_count, waits,
733                                          signal_count, signals);
734    if (result != VK_SUCCESS)
735       goto error;
736 
737    if (INTEL_DEBUG(DEBUG_SUBMIT))
738       anv_i915_debug_submit(&execbuf);
739 
740    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
741 
742    int ret = queue->device->info->no_hw ? 0 :
743       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
744    if (ret)
745       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
746 
747    result = anv_queue_post_submit(queue, result);
748 
749  error:
750    anv_execbuf_finish(&execbuf);
751 
752    return result;
753 }
754 
755 static VkResult
i915_companion_rcs_queue_exec_locked(struct anv_queue * queue,struct anv_cmd_buffer * companion_rcs_cmd_buffer,uint32_t wait_count,const struct vk_sync_wait * waits)756 i915_companion_rcs_queue_exec_locked(struct anv_queue *queue,
757                                      struct anv_cmd_buffer *companion_rcs_cmd_buffer,
758                                      uint32_t wait_count,
759                                      const struct vk_sync_wait *waits)
760 {
761    struct anv_device *device = queue->device;
762    struct anv_execbuf execbuf = {
763       .alloc = &queue->device->vk.alloc,
764       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
765    };
766 
767    /* Always add the workaround BO as it includes a driver identifier for the
768     * error_state.
769     */
770    VkResult result =
771       anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
772    if (result != VK_SUCCESS)
773       goto error;
774 
775    for (uint32_t i = 0; i < wait_count; i++) {
776       result = anv_execbuf_add_sync(device, &execbuf,
777                                     waits[i].sync,
778                                     false /* is_signal */,
779                                     waits[i].wait_value);
780       if (result != VK_SUCCESS)
781          goto error;
782    }
783 
784    if (queue->companion_sync) {
785       result = anv_execbuf_add_sync(device, &execbuf,
786                                     queue->companion_sync,
787                                     true /* is_signal */, 0);
788       if (result != VK_SUCCESS)
789          goto error;
790    }
791 
792    result = setup_execbuf_for_cmd_buffers(&execbuf, queue,
793                                           &companion_rcs_cmd_buffer, 1);
794    if (result != VK_SUCCESS)
795       goto error;
796 
797    if (INTEL_DEBUG(DEBUG_SUBMIT))
798       anv_i915_debug_submit(&execbuf);
799 
800    anv_cmd_buffer_exec_batch_debug(queue, 1, &companion_rcs_cmd_buffer, NULL, 0);
801 
802    setup_execbuf_fence_params(&execbuf);
803 
804    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
805 
806    int ret = queue->device->info->no_hw ? 0 :
807       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
808    if (ret) {
809       anv_i915_debug_submit(&execbuf);
810       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
811    }
812 
813  error:
814    anv_execbuf_finish(&execbuf);
815    return result;
816 }
817 
818 VkResult
i915_queue_exec_locked(struct anv_queue * queue,uint32_t wait_count,const struct vk_sync_wait * waits,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t signal_count,const struct vk_sync_signal * signals,struct anv_query_pool * perf_query_pool,uint32_t perf_query_pass,struct anv_utrace_submit * utrace_submit)819 i915_queue_exec_locked(struct anv_queue *queue,
820                        uint32_t wait_count,
821                        const struct vk_sync_wait *waits,
822                        uint32_t cmd_buffer_count,
823                        struct anv_cmd_buffer **cmd_buffers,
824                        uint32_t signal_count,
825                        const struct vk_sync_signal *signals,
826                        struct anv_query_pool *perf_query_pool,
827                        uint32_t perf_query_pass,
828                        struct anv_utrace_submit *utrace_submit)
829 {
830    struct anv_device *device = queue->device;
831    struct anv_execbuf execbuf = {
832       .alloc = &queue->device->vk.alloc,
833       .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
834    };
835    VkResult result;
836 
837    /* If there is a utrace submission but no batch, it means there are no
838     * commands to run for utrace. But we still have to signal the associated
839     * syncs, so add them to the submission.
840     */
841    if (utrace_submit &&
842        util_dynarray_num_elements(&utrace_submit->base.batch_bos,
843                                   struct anv_bo *) == 0) {
844       result = anv_execbuf_add_sync(device, &execbuf,
845                                     utrace_submit->base.signal.sync,
846                                     true /* is_signal */,
847                                     utrace_submit->base.signal.signal_value);
848       if (result != VK_SUCCESS)
849          goto error;
850 
851       /* Avoid doing a submission after the application's batch since there
852        * are no commands.
853        */
854       utrace_submit = NULL;
855    }
856 
857    /* Always add the workaround BO as it includes a driver identifier for the
858     * error_state.
859     */
860    result =
861       anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0);
862    if (result != VK_SUCCESS)
863       goto error;
864 
865    if (device->printf.bo) {
866       result =
867          anv_execbuf_add_bo(device, &execbuf, device->printf.bo, NULL, 0);
868       if (result != VK_SUCCESS)
869          goto error;
870    }
871 
872    for (uint32_t i = 0; i < wait_count; i++) {
873       result = anv_execbuf_add_sync(device, &execbuf,
874                                     waits[i].sync,
875                                     false /* is_signal */,
876                                     waits[i].wait_value);
877       if (result != VK_SUCCESS)
878          goto error;
879    }
880 
881    for (uint32_t i = 0; i < signal_count; i++) {
882       result = anv_execbuf_add_sync(device, &execbuf,
883                                     signals[i].sync,
884                                     true /* is_signal */,
885                                     signals[i].signal_value);
886       if (result != VK_SUCCESS)
887          goto error;
888    }
889 
890    if (queue->sync) {
891       result = anv_execbuf_add_sync(device, &execbuf,
892                                     queue->sync,
893                                     true /* is_signal */,
894                                     0 /* signal_value */);
895       if (result != VK_SUCCESS)
896          goto error;
897    }
898 
899    if (cmd_buffer_count) {
900       result = setup_execbuf_for_cmd_buffers(&execbuf, queue, cmd_buffers,
901                                              cmd_buffer_count);
902    } else {
903       result = setup_empty_execbuf(&execbuf, queue);
904    }
905 
906    if (result != VK_SUCCESS)
907       goto error;
908 
909    const bool has_perf_query = perf_query_pool && cmd_buffer_count;
910 
911    if (INTEL_DEBUG(DEBUG_SUBMIT))
912       anv_i915_debug_submit(&execbuf);
913 
914    anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
915                                    perf_query_pool, perf_query_pass);
916 
917    setup_execbuf_fence_params(&execbuf);
918 
919    if (has_perf_query) {
920       assert(perf_query_pass < perf_query_pool->n_passes);
921       struct intel_perf_query_info *query_info =
922          perf_query_pool->pass_query[perf_query_pass];
923 
924       /* Some performance queries just the pipeline statistic HW, no need for
925        * OA in that case, so no need to reconfigure.
926        */
927       if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) &&
928           (query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
929            query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
930          int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
931                                                     device->perf_fd,
932                                                     query_info->oa_metrics_set_id);
933          if (ret < 0) {
934             result = vk_device_set_lost(&device->vk,
935                                         "i915-perf config failed: %s",
936                                         strerror(errno));
937          }
938       }
939 
940       struct anv_bo *pass_batch_bo = perf_query_pool->bo;
941 
942       struct drm_i915_gem_exec_object2 query_pass_object = {
943          .handle = pass_batch_bo->gem_handle,
944          .offset = pass_batch_bo->offset,
945          .flags  = pass_batch_bo->flags,
946       };
947 
948       uint64_t exec_flags = 0;
949       uint32_t context_id;
950       get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
951 
952       struct drm_i915_gem_execbuffer2 query_pass_execbuf = {
953          .buffers_ptr = (uintptr_t) &query_pass_object,
954          .buffer_count = 1,
955          .batch_start_offset = khr_perf_query_preamble_offset(perf_query_pool,
956                                                               perf_query_pass),
957          .flags = I915_EXEC_HANDLE_LUT | exec_flags,
958          .rsvd1 = context_id,
959       };
960 
961       int ret = queue->device->info->no_hw ? 0 :
962          anv_gem_execbuffer(queue->device, &query_pass_execbuf);
963       if (ret)
964          result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
965    }
966 
967    ANV_RMV(bos_gtt_map, device, execbuf.bos, execbuf.bo_count);
968 
969    int ret = queue->device->info->no_hw ? 0 :
970       anv_gem_execbuffer(queue->device, &execbuf.execbuf);
971    if (ret) {
972       anv_i915_debug_submit(&execbuf);
973       result = vk_queue_set_lost(&queue->vk, "execbuf2 failed: %m");
974    }
975 
976    if (cmd_buffer_count != 0 && cmd_buffers[0]->companion_rcs_cmd_buffer) {
977       struct anv_cmd_buffer *companion_rcs_cmd_buffer =
978          cmd_buffers[0]->companion_rcs_cmd_buffer;
979       assert(companion_rcs_cmd_buffer->is_companion_rcs_cmd_buffer);
980       assert(cmd_buffer_count == 1);
981       result = i915_companion_rcs_queue_exec_locked(queue,
982                                                     cmd_buffers[0]->companion_rcs_cmd_buffer, wait_count,
983                                                     waits);
984    }
985 
986    result = anv_queue_post_submit(queue, result);
987 
988  error:
989    anv_execbuf_finish(&execbuf);
990 
991    if (result == VK_SUCCESS && utrace_submit) {
992       struct vk_sync_signal signal = {
993          .sync = utrace_submit->base.signal.sync,
994          .signal_value = utrace_submit->base.signal.signal_value,
995       };
996       result = i915_queue_exec_async(&utrace_submit->base, 0, NULL, 1, &signal);
997    }
998 
999    return result;
1000 }
1001