xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/jm/panvk_vX_queue.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_device.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "decode.h"
15 
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_device.h"
18 #include "panvk_entrypoints.h"
19 #include "panvk_event.h"
20 #include "panvk_image.h"
21 #include "panvk_image_view.h"
22 #include "panvk_instance.h"
23 #include "panvk_physical_device.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_queue.h"
26 
27 #include "vk_drm_syncobj.h"
28 #include "vk_framebuffer.h"
29 
30 #include "drm-uapi/panfrost_drm.h"
31 
32 static void
panvk_queue_submit_batch(struct panvk_queue * queue,struct panvk_batch * batch,uint32_t * bos,unsigned nr_bos,uint32_t * in_fences,unsigned nr_in_fences)33 panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
34                          uint32_t *bos, unsigned nr_bos, uint32_t *in_fences,
35                          unsigned nr_in_fences)
36 {
37    struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
38    struct panvk_physical_device *phys_dev =
39       to_panvk_physical_device(dev->vk.physical);
40    struct panvk_instance *instance =
41       to_panvk_instance(dev->vk.physical->instance);
42    unsigned debug = instance->debug_flags;
43    int ret;
44 
45    /* Reset the batch if it's already been issued */
46    if (batch->issued) {
47       util_dynarray_foreach(&batch->jobs, void *, job)
48          memset((*job), 0, 4 * 4);
49 
50       /* Reset the tiler before re-issuing the batch */
51       if (batch->tiler.ctx_descs.cpu) {
52          memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
53                 sizeof(batch->tiler.heap_templ));
54 
55          struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
56 
57          for (uint32_t i = 0; i < batch->fb.layer_count; i++)
58             memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
59       }
60    }
61 
62    if (batch->vtc_jc.first_job) {
63       struct drm_panfrost_submit submit = {
64          .bo_handles = (uintptr_t)bos,
65          .bo_handle_count = nr_bos,
66          .in_syncs = (uintptr_t)in_fences,
67          .in_sync_count = nr_in_fences,
68          .out_sync = queue->sync,
69          .jc = batch->vtc_jc.first_job,
70       };
71 
72       ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
73       assert(!ret);
74 
75       if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
76          ret = drmSyncobjWait(dev->vk.drm_fd, &submit.out_sync, 1, INT64_MAX, 0,
77                               NULL);
78          assert(!ret);
79       }
80 
81       if (debug & PANVK_DEBUG_TRACE) {
82          pandecode_jc(dev->debug.decode_ctx, batch->vtc_jc.first_job,
83                       phys_dev->kmod.props.gpu_prod_id);
84       }
85 
86       if (debug & PANVK_DEBUG_DUMP)
87          pandecode_dump_mappings(dev->debug.decode_ctx);
88 
89       if (debug & PANVK_DEBUG_SYNC)
90          pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
91                                   phys_dev->kmod.props.gpu_prod_id);
92    }
93 
94    if (batch->frag_jc.first_job) {
95       struct drm_panfrost_submit submit = {
96          .bo_handles = (uintptr_t)bos,
97          .bo_handle_count = nr_bos,
98          .out_sync = queue->sync,
99          .jc = batch->frag_jc.first_job,
100          .requirements = PANFROST_JD_REQ_FS,
101       };
102 
103       if (batch->vtc_jc.first_job) {
104          submit.in_syncs = (uintptr_t)(&queue->sync);
105          submit.in_sync_count = 1;
106       } else {
107          submit.in_syncs = (uintptr_t)in_fences;
108          submit.in_sync_count = nr_in_fences;
109       }
110 
111       ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
112       assert(!ret);
113       if (debug & (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC)) {
114          ret = drmSyncobjWait(dev->vk.drm_fd, &submit.out_sync, 1, INT64_MAX, 0,
115                               NULL);
116          assert(!ret);
117       }
118 
119       if (debug & PANVK_DEBUG_TRACE)
120          pandecode_jc(dev->debug.decode_ctx, batch->frag_jc.first_job,
121                       phys_dev->kmod.props.gpu_prod_id);
122 
123       if (debug & PANVK_DEBUG_DUMP)
124          pandecode_dump_mappings(dev->debug.decode_ctx);
125 
126       if (debug & PANVK_DEBUG_SYNC)
127          pandecode_abort_on_fault(dev->debug.decode_ctx, submit.jc,
128                                   phys_dev->kmod.props.gpu_prod_id);
129    }
130 
131    if (debug & PANVK_DEBUG_TRACE)
132       pandecode_next_frame(dev->debug.decode_ctx);
133 
134    batch->issued = true;
135 }
136 
137 static void
panvk_queue_transfer_sync(struct panvk_queue * queue,uint32_t syncobj)138 panvk_queue_transfer_sync(struct panvk_queue *queue, uint32_t syncobj)
139 {
140    struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
141    int ret;
142 
143    struct drm_syncobj_handle handle = {
144       .handle = queue->sync,
145       .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
146       .fd = -1,
147    };
148 
149    ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD, &handle);
150    assert(!ret);
151    assert(handle.fd >= 0);
152 
153    handle.handle = syncobj;
154    ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE, &handle);
155    assert(!ret);
156 
157    close(handle.fd);
158 }
159 
160 static void
panvk_add_wait_event_syncobjs(struct panvk_batch * batch,uint32_t * in_fences,unsigned * nr_in_fences)161 panvk_add_wait_event_syncobjs(struct panvk_batch *batch, uint32_t *in_fences,
162                               unsigned *nr_in_fences)
163 {
164    util_dynarray_foreach(&batch->event_ops, struct panvk_cmd_event_op, op) {
165       switch (op->type) {
166       case PANVK_EVENT_OP_SET:
167          /* Nothing to do yet */
168          break;
169       case PANVK_EVENT_OP_RESET:
170          /* Nothing to do yet */
171          break;
172       case PANVK_EVENT_OP_WAIT:
173          in_fences[(*nr_in_fences)++] = op->event->syncobj;
174          break;
175       default:
176          unreachable("bad panvk_cmd_event_op type\n");
177       }
178    }
179 }
180 
181 static void
panvk_signal_event_syncobjs(struct panvk_queue * queue,struct panvk_batch * batch)182 panvk_signal_event_syncobjs(struct panvk_queue *queue,
183                             struct panvk_batch *batch)
184 {
185    struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
186 
187    util_dynarray_foreach(&batch->event_ops, struct panvk_cmd_event_op, op) {
188       switch (op->type) {
189       case PANVK_EVENT_OP_SET: {
190          panvk_queue_transfer_sync(queue, op->event->syncobj);
191          break;
192       }
193       case PANVK_EVENT_OP_RESET: {
194          struct panvk_event *event = op->event;
195 
196          struct drm_syncobj_array objs = {
197             .handles = (uint64_t)(uintptr_t)&event->syncobj,
198             .count_handles = 1};
199 
200          int ret = drmIoctl(dev->vk.drm_fd, DRM_IOCTL_SYNCOBJ_RESET, &objs);
201          assert(!ret);
202          break;
203       }
204       case PANVK_EVENT_OP_WAIT:
205          /* Nothing left to do */
206          break;
207       default:
208          unreachable("bad panvk_cmd_event_op type\n");
209       }
210    }
211 }
212 
213 static VkResult
panvk_queue_submit(struct vk_queue * vk_queue,struct vk_queue_submit * submit)214 panvk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
215 {
216    struct panvk_queue *queue = container_of(vk_queue, struct panvk_queue, vk);
217    struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
218 
219    unsigned nr_semaphores = submit->wait_count + 1;
220    uint32_t semaphores[nr_semaphores];
221 
222    semaphores[0] = queue->sync;
223    for (unsigned i = 0; i < submit->wait_count; i++) {
224       assert(vk_sync_type_is_drm_syncobj(submit->waits[i].sync->type));
225       struct vk_drm_syncobj *syncobj =
226          vk_sync_as_drm_syncobj(submit->waits[i].sync);
227 
228       semaphores[i + 1] = syncobj->syncobj;
229    }
230 
231    for (uint32_t j = 0; j < submit->command_buffer_count; ++j) {
232       struct panvk_cmd_buffer *cmdbuf =
233          container_of(submit->command_buffers[j], struct panvk_cmd_buffer, vk);
234 
235       list_for_each_entry(struct panvk_batch, batch, &cmdbuf->batches, node) {
236          /* FIXME: should be done at the batch level */
237          unsigned nr_bos = panvk_pool_num_bos(&cmdbuf->desc_pool) +
238                            panvk_pool_num_bos(&cmdbuf->varying_pool) +
239                            panvk_pool_num_bos(&cmdbuf->tls_pool) +
240                            batch->fb.bo_count + (batch->blit.src ? 1 : 0) +
241                            (batch->blit.dst ? 1 : 0) +
242                            (batch->vtc_jc.first_tiler ? 1 : 0) + 1;
243          unsigned bo_idx = 0;
244          uint32_t bos[nr_bos];
245 
246          panvk_pool_get_bo_handles(&cmdbuf->desc_pool, &bos[bo_idx]);
247          bo_idx += panvk_pool_num_bos(&cmdbuf->desc_pool);
248 
249          panvk_pool_get_bo_handles(&cmdbuf->varying_pool, &bos[bo_idx]);
250          bo_idx += panvk_pool_num_bos(&cmdbuf->varying_pool);
251 
252          panvk_pool_get_bo_handles(&cmdbuf->tls_pool, &bos[bo_idx]);
253          bo_idx += panvk_pool_num_bos(&cmdbuf->tls_pool);
254 
255          for (unsigned i = 0; i < batch->fb.bo_count; i++)
256             bos[bo_idx++] = pan_kmod_bo_handle(batch->fb.bos[i]);
257 
258          if (batch->blit.src)
259             bos[bo_idx++] = pan_kmod_bo_handle(batch->blit.src);
260 
261          if (batch->blit.dst)
262             bos[bo_idx++] = pan_kmod_bo_handle(batch->blit.dst);
263 
264          if (batch->vtc_jc.first_tiler)
265             bos[bo_idx++] = pan_kmod_bo_handle(dev->tiler_heap->bo);
266 
267          bos[bo_idx++] = pan_kmod_bo_handle(dev->sample_positions->bo);
268          assert(bo_idx == nr_bos);
269 
270          /* Merge identical BO entries. */
271          for (unsigned x = 0; x < nr_bos; x++) {
272             for (unsigned y = x + 1; y < nr_bos;) {
273                if (bos[x] == bos[y])
274                   bos[y] = bos[--nr_bos];
275                else
276                   y++;
277             }
278          }
279 
280          unsigned nr_in_fences = 0;
281          unsigned max_wait_event_syncobjs = util_dynarray_num_elements(
282             &batch->event_ops, struct panvk_cmd_event_op);
283          uint32_t in_fences[nr_semaphores + max_wait_event_syncobjs];
284          memcpy(in_fences, semaphores, nr_semaphores * sizeof(*in_fences));
285          nr_in_fences += nr_semaphores;
286 
287          panvk_add_wait_event_syncobjs(batch, in_fences, &nr_in_fences);
288 
289          panvk_queue_submit_batch(queue, batch, bos, nr_bos, in_fences,
290                                   nr_in_fences);
291 
292          panvk_signal_event_syncobjs(queue, batch);
293       }
294    }
295 
296    /* Transfer the out fence to signal semaphores */
297    for (unsigned i = 0; i < submit->signal_count; i++) {
298       assert(vk_sync_type_is_drm_syncobj(submit->signals[i].sync->type));
299       struct vk_drm_syncobj *syncobj =
300          vk_sync_as_drm_syncobj(submit->signals[i].sync);
301 
302       panvk_queue_transfer_sync(queue, syncobj->syncobj);
303    }
304 
305    return VK_SUCCESS;
306 }
307 
308 VkResult
panvk_per_arch(queue_init)309 panvk_per_arch(queue_init)(struct panvk_device *device,
310                            struct panvk_queue *queue, int idx,
311                            const VkDeviceQueueCreateInfo *create_info)
312 {
313    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
314    if (result != VK_SUCCESS)
315       return result;
316 
317    int ret = drmSyncobjCreate(device->vk.drm_fd, DRM_SYNCOBJ_CREATE_SIGNALED,
318                               &queue->sync);
319    if (ret) {
320       vk_queue_finish(&queue->vk);
321       return VK_ERROR_OUT_OF_HOST_MEMORY;
322    }
323 
324    queue->vk.driver_submit = panvk_queue_submit;
325    return VK_SUCCESS;
326 }
327 
328 VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(QueueWaitIdle)329 panvk_per_arch(QueueWaitIdle)(VkQueue _queue)
330 {
331    VK_FROM_HANDLE(panvk_queue, queue, _queue);
332    struct panvk_device *dev = to_panvk_device(queue->vk.base.device);
333 
334    if (vk_device_is_lost(&dev->vk))
335       return VK_ERROR_DEVICE_LOST;
336 
337    int ret = drmSyncobjWait(queue->vk.base.device->drm_fd, &queue->sync, 1,
338                             INT64_MAX, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
339    assert(!ret);
340 
341    return VK_SUCCESS;
342 }
343