xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/jm/panvk_vX_cmd_buffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "genxml/gen_macros.h"
30 
31 #include "panvk_buffer.h"
32 #include "panvk_cmd_alloc.h"
33 #include "panvk_cmd_buffer.h"
34 #include "panvk_cmd_desc_state.h"
35 #include "panvk_cmd_pool.h"
36 #include "panvk_cmd_push_constant.h"
37 #include "panvk_device.h"
38 #include "panvk_entrypoints.h"
39 #include "panvk_instance.h"
40 #include "panvk_physical_device.h"
41 #include "panvk_priv_bo.h"
42 
43 #include "pan_blitter.h"
44 #include "pan_desc.h"
45 #include "pan_encoder.h"
46 #include "pan_props.h"
47 #include "pan_samples.h"
48 
49 #include "vk_descriptor_update_template.h"
50 #include "vk_format.h"
51 
52 static VkResult
panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer * cmdbuf,mali_ptr fbd)53 panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf, mali_ptr fbd)
54 {
55    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
56    struct panvk_batch *batch = cmdbuf->cur_batch;
57    struct panfrost_ptr job_ptr = panvk_cmd_alloc_desc(cmdbuf, FRAGMENT_JOB);
58 
59    if (!job_ptr.gpu)
60       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
61 
62    GENX(pan_emit_fragment_job_payload)(fbinfo, fbd, job_ptr.cpu);
63 
64    pan_section_pack(job_ptr.cpu, FRAGMENT_JOB, HEADER, header) {
65       header.type = MALI_JOB_TYPE_FRAGMENT;
66       header.index = 1;
67    }
68 
69    pan_jc_add_job(&batch->frag_jc, MALI_JOB_TYPE_FRAGMENT, false, false, 0, 0,
70                   &job_ptr, false);
71    util_dynarray_append(&batch->jobs, void *, job_ptr.cpu);
72    return VK_SUCCESS;
73 }
74 
75 void
panvk_per_arch(cmd_close_batch)76 panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf)
77 {
78    struct panvk_batch *batch = cmdbuf->cur_batch;
79 
80    if (!batch)
81       return;
82 
83    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
84 
85    assert(batch);
86 
87    if (!batch->fb.desc.gpu && !batch->vtc_jc.first_job) {
88       if (util_dynarray_num_elements(&batch->event_ops,
89                                      struct panvk_cmd_event_op) == 0) {
90          /* Content-less batch, let's drop it */
91          vk_free(&cmdbuf->vk.pool->alloc, batch);
92       } else {
93          /* Batch has no jobs but is needed for synchronization, let's add a
94           * NULL job so the SUBMIT ioctl doesn't choke on it.
95           */
96          struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, JOB_HEADER);
97 
98          if (ptr.gpu) {
99             util_dynarray_append(&batch->jobs, void *, ptr.cpu);
100             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_NULL, false, false, 0,
101                            0, &ptr, false);
102          }
103 
104          list_addtail(&batch->node, &cmdbuf->batches);
105       }
106       cmdbuf->cur_batch = NULL;
107       return;
108    }
109 
110    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
111    struct panvk_physical_device *phys_dev =
112       to_panvk_physical_device(dev->vk.physical);
113 
114    list_addtail(&batch->node, &cmdbuf->batches);
115 
116    if (batch->tlsinfo.tls.size) {
117       unsigned thread_tls_alloc =
118          panfrost_query_thread_tls_alloc(&phys_dev->kmod.props);
119       unsigned core_id_range;
120 
121       panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
122 
123       unsigned size = panfrost_get_total_stack_size(
124          batch->tlsinfo.tls.size, thread_tls_alloc, core_id_range);
125       batch->tlsinfo.tls.ptr =
126          panvk_cmd_alloc_dev_mem(cmdbuf, tls, size, 4096).gpu;
127    }
128 
129    if (batch->tlsinfo.wls.size) {
130       assert(batch->wls_total_size);
131       batch->tlsinfo.wls.ptr =
132          panvk_cmd_alloc_dev_mem(cmdbuf, tls, batch->wls_total_size, 4096).gpu;
133    }
134 
135    if (batch->tls.cpu)
136       GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu);
137 
138    if (batch->fb.desc.cpu) {
139       fbinfo->sample_positions = dev->sample_positions->addr.dev +
140                                  panfrost_sample_positions_offset(
141                                     pan_sample_pattern(fbinfo->nr_samples));
142 
143       for (uint32_t i = 0; i < batch->fb.layer_count; i++) {
144          VkResult result;
145 
146          mali_ptr fbd = batch->fb.desc.gpu + (batch->fb.desc_stride * i);
147          if (batch->vtc_jc.first_tiler) {
148             cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds.gpu = 0;
149 
150             ASSERTED unsigned num_preload_jobs = GENX(pan_preload_fb)(
151                &dev->blitter.cache, &cmdbuf->desc_pool.base,
152                &cmdbuf->state.gfx.render.fb.info, i, batch->tls.gpu, NULL);
153 
154             /* Bifrost GPUs use pre frame DCDs to preload the FB content. We
155              * thus expect num_preload_jobs to be zero.
156              */
157             assert(!num_preload_jobs);
158          }
159 
160          result = panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, i);
161          if (result != VK_SUCCESS)
162             break;
163 
164          fbd |= GENX(pan_emit_fbd)(
165             &cmdbuf->state.gfx.render.fb.info, i, &batch->tlsinfo,
166             &batch->tiler.ctx,
167             batch->fb.desc.cpu + (batch->fb.desc_stride * i));
168 
169          result = panvk_cmd_prepare_fragment_job(cmdbuf, fbd);
170          if (result != VK_SUCCESS)
171             break;
172       }
173    }
174 
175    cmdbuf->cur_batch = NULL;
176 }
177 
178 VkResult
panvk_per_arch(cmd_alloc_fb_desc)179 panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf)
180 {
181    struct panvk_batch *batch = cmdbuf->cur_batch;
182 
183    if (batch->fb.desc.gpu)
184       return VK_SUCCESS;
185 
186    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
187    bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s;
188    batch->fb.layer_count = cmdbuf->state.gfx.render.layer_count;
189    unsigned fbd_size = pan_size(FRAMEBUFFER);
190 
191    if (has_zs_ext)
192       fbd_size = ALIGN_POT(fbd_size, pan_alignment(ZS_CRC_EXTENSION)) +
193                  pan_size(ZS_CRC_EXTENSION);
194 
195    fbd_size = ALIGN_POT(fbd_size, pan_alignment(RENDER_TARGET)) +
196               (MAX2(fbinfo->rt_count, 1) * pan_size(RENDER_TARGET));
197 
198    batch->fb.bo_count = cmdbuf->state.gfx.render.fb.bo_count;
199    memcpy(batch->fb.bos, cmdbuf->state.gfx.render.fb.bos,
200           batch->fb.bo_count * sizeof(batch->fb.bos[0]));
201 
202    batch->fb.desc =
203       panvk_cmd_alloc_dev_mem(cmdbuf, desc, fbd_size * batch->fb.layer_count,
204                               pan_alignment(FRAMEBUFFER));
205    batch->fb.desc_stride = fbd_size;
206 
207    memset(&cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds, 0,
208           sizeof(cmdbuf->state.gfx.render.fb.info.bifrost.pre_post.dcds));
209 
210    return batch->fb.desc.gpu ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
211 }
212 
213 VkResult
panvk_per_arch(cmd_alloc_tls_desc)214 panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx)
215 {
216    struct panvk_batch *batch = cmdbuf->cur_batch;
217 
218    assert(batch);
219    if (!batch->tls.gpu) {
220       batch->tls = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
221       if (!batch->tls.gpu)
222          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
223    }
224 
225    return VK_SUCCESS;
226 }
227 
228 VkResult
panvk_per_arch(cmd_prepare_tiler_context)229 panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf,
230                                           uint32_t layer_idx)
231 {
232    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
233    struct panvk_batch *batch = cmdbuf->cur_batch;
234    mali_ptr tiler_desc;
235 
236    if (batch->tiler.ctx_descs.gpu) {
237       tiler_desc =
238          batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
239       goto out_set_layer_ctx;
240    }
241 
242    const struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
243    uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
244    batch->tiler.heap_desc = panvk_cmd_alloc_desc(cmdbuf, TILER_HEAP);
245    batch->tiler.ctx_descs =
246       panvk_cmd_alloc_desc_array(cmdbuf, layer_count, TILER_CONTEXT);
247    if (!batch->tiler.heap_desc.gpu || !batch->tiler.ctx_descs.gpu)
248       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
249 
250    tiler_desc =
251       batch->tiler.ctx_descs.gpu + (pan_size(TILER_CONTEXT) * layer_idx);
252 
253    pan_pack(&batch->tiler.heap_templ, TILER_HEAP, cfg) {
254       cfg.size = pan_kmod_bo_size(dev->tiler_heap->bo);
255       cfg.base = dev->tiler_heap->addr.dev;
256       cfg.bottom = dev->tiler_heap->addr.dev;
257       cfg.top = cfg.base + cfg.size;
258    }
259 
260    pan_pack(&batch->tiler.ctx_templ, TILER_CONTEXT, cfg) {
261       cfg.hierarchy_mask = 0x28;
262       cfg.fb_width = fbinfo->width;
263       cfg.fb_height = fbinfo->height;
264       cfg.heap = batch->tiler.heap_desc.gpu;
265       cfg.sample_pattern = pan_sample_pattern(fbinfo->nr_samples);
266    }
267 
268    memcpy(batch->tiler.heap_desc.cpu, &batch->tiler.heap_templ,
269           sizeof(batch->tiler.heap_templ));
270 
271    struct mali_tiler_context_packed *ctxs = batch->tiler.ctx_descs.cpu;
272 
273    assert(layer_count > 0);
274    for (uint32_t i = 0; i < layer_count; i++) {
275       STATIC_ASSERT(
276          !(pan_size(TILER_CONTEXT) & (pan_alignment(TILER_CONTEXT) - 1)));
277 
278       memcpy(&ctxs[i], &batch->tiler.ctx_templ, sizeof(*ctxs));
279    }
280 
281 out_set_layer_ctx:
282    if (PAN_ARCH >= 9)
283       batch->tiler.ctx.valhall.desc = tiler_desc;
284    else
285       batch->tiler.ctx.bifrost.desc = tiler_desc;
286 
287    return VK_SUCCESS;
288 }
289 
290 struct panvk_batch *
panvk_per_arch(cmd_open_batch)291 panvk_per_arch(cmd_open_batch)(struct panvk_cmd_buffer *cmdbuf)
292 {
293    assert(!cmdbuf->cur_batch);
294    cmdbuf->cur_batch =
295       vk_zalloc(&cmdbuf->vk.pool->alloc, sizeof(*cmdbuf->cur_batch), 8,
296                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
297    util_dynarray_init(&cmdbuf->cur_batch->jobs, NULL);
298    util_dynarray_init(&cmdbuf->cur_batch->event_ops, NULL);
299    assert(cmdbuf->cur_batch);
300    return cmdbuf->cur_batch;
301 }
302 
303 VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(EndCommandBuffer)304 panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer)
305 {
306    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
307 
308    panvk_per_arch(cmd_close_batch)(cmdbuf);
309 
310    return vk_command_buffer_end(&cmdbuf->vk);
311 }
312 
313 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPipelineBarrier2)314 panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer,
315                                     const VkDependencyInfo *pDependencyInfo)
316 {
317    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
318 
319    /* Caches are flushed/invalidated at batch boundaries for now, nothing to do
320     * for memory barriers assuming we implement barriers with the creation of a
321     * new batch.
322     * FIXME: We can probably do better with a CacheFlush job that has the
323     * barrier flag set to true.
324     */
325    if (cmdbuf->cur_batch) {
326       panvk_per_arch(cmd_close_batch)(cmdbuf);
327       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
328       panvk_per_arch(cmd_open_batch)(cmdbuf);
329    }
330 }
331 
332 static void
panvk_reset_cmdbuf(struct vk_command_buffer * vk_cmdbuf,VkCommandBufferResetFlags flags)333 panvk_reset_cmdbuf(struct vk_command_buffer *vk_cmdbuf,
334                    VkCommandBufferResetFlags flags)
335 {
336    struct panvk_cmd_buffer *cmdbuf =
337       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
338 
339    vk_command_buffer_reset(&cmdbuf->vk);
340 
341    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
342       list_del(&batch->node);
343       util_dynarray_fini(&batch->jobs);
344       util_dynarray_fini(&batch->event_ops);
345 
346       vk_free(&cmdbuf->vk.pool->alloc, batch);
347    }
348 
349    panvk_pool_reset(&cmdbuf->desc_pool);
350    panvk_pool_reset(&cmdbuf->tls_pool);
351    panvk_pool_reset(&cmdbuf->varying_pool);
352    panvk_cmd_buffer_obj_list_reset(cmdbuf, push_sets);
353 
354    memset(&cmdbuf->state, 0, sizeof(cmdbuf->state));
355 }
356 
357 static void
panvk_destroy_cmdbuf(struct vk_command_buffer * vk_cmdbuf)358 panvk_destroy_cmdbuf(struct vk_command_buffer *vk_cmdbuf)
359 {
360    struct panvk_cmd_buffer *cmdbuf =
361       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
362    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
363 
364    list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) {
365       list_del(&batch->node);
366       util_dynarray_fini(&batch->jobs);
367       util_dynarray_fini(&batch->event_ops);
368 
369       vk_free(&cmdbuf->vk.pool->alloc, batch);
370    }
371 
372    panvk_pool_cleanup(&cmdbuf->desc_pool);
373    panvk_pool_cleanup(&cmdbuf->tls_pool);
374    panvk_pool_cleanup(&cmdbuf->varying_pool);
375    panvk_cmd_buffer_obj_list_cleanup(cmdbuf, push_sets);
376    vk_command_buffer_finish(&cmdbuf->vk);
377    vk_free(&dev->vk.alloc, cmdbuf);
378 }
379 
380 static VkResult
panvk_create_cmdbuf(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmdbuf_out)381 panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
382                     struct vk_command_buffer **cmdbuf_out)
383 {
384    struct panvk_device *device =
385       container_of(vk_pool->base.device, struct panvk_device, vk);
386    struct panvk_cmd_pool *pool =
387       container_of(vk_pool, struct panvk_cmd_pool, vk);
388    struct panvk_cmd_buffer *cmdbuf;
389 
390    cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8,
391                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
392    if (!cmdbuf)
393       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
394 
395    VkResult result = vk_command_buffer_init(
396       &pool->vk, &cmdbuf->vk, &panvk_per_arch(cmd_buffer_ops), level);
397    if (result != VK_SUCCESS) {
398       vk_free(&device->vk.alloc, cmdbuf);
399       return result;
400    }
401 
402    panvk_cmd_buffer_obj_list_init(cmdbuf, push_sets);
403    cmdbuf->vk.dynamic_graphics_state.vi = &cmdbuf->state.gfx.dynamic.vi;
404    cmdbuf->vk.dynamic_graphics_state.ms.sample_locations =
405       &cmdbuf->state.gfx.dynamic.sl;
406 
407    struct panvk_pool_properties desc_pool_props = {
408       .create_flags = 0,
409       .slab_size = 64 * 1024,
410       .label = "Command buffer descriptor pool",
411       .prealloc = true,
412       .owns_bos = true,
413       .needs_locking = false,
414    };
415    panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool,
416                    &desc_pool_props);
417 
418    struct panvk_pool_properties tls_pool_props = {
419       .create_flags =
420          panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
421       .slab_size = 64 * 1024,
422       .label = "TLS pool",
423       .prealloc = false,
424       .owns_bos = true,
425       .needs_locking = false,
426    };
427    panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
428                    &tls_pool_props);
429 
430    struct panvk_pool_properties var_pool_props = {
431       .create_flags =
432          panvk_device_adjust_bo_flags(device, PAN_KMOD_BO_FLAG_NO_MMAP),
433       .slab_size = 64 * 1024,
434       .label = "Varying pool",
435       .prealloc = false,
436       .owns_bos = true,
437       .needs_locking = false,
438    };
439    panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool,
440                    &var_pool_props);
441 
442    list_inithead(&cmdbuf->batches);
443    *cmdbuf_out = &cmdbuf->vk;
444    return VK_SUCCESS;
445 }
446 
447 const struct vk_command_buffer_ops panvk_per_arch(cmd_buffer_ops) = {
448    .create = panvk_create_cmdbuf,
449    .reset = panvk_reset_cmdbuf,
450    .destroy = panvk_destroy_cmdbuf,
451 };
452 
453 VKAPI_ATTR VkResult VKAPI_CALL
panvk_per_arch(BeginCommandBuffer)454 panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer,
455                                    const VkCommandBufferBeginInfo *pBeginInfo)
456 {
457    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
458 
459    vk_command_buffer_begin(&cmdbuf->vk, pBeginInfo);
460 
461    return VK_SUCCESS;
462 }
463