xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "panvk_cmd_alloc.h"
15 #include "panvk_cmd_buffer.h"
16 #include "panvk_cmd_desc_state.h"
17 #include "panvk_device.h"
18 #include "panvk_entrypoints.h"
19 #include "panvk_meta.h"
20 #include "panvk_physical_device.h"
21 
22 #include "pan_desc.h"
23 #include "pan_encoder.h"
24 #include "pan_jc.h"
25 #include "pan_props.h"
26 
27 #include <vulkan/vulkan_core.h>
28 
29 struct panvk_dispatch_info {
30    struct pan_compute_dim wg_count;
31    mali_ptr tsd;
32    mali_ptr push_uniforms;
33 };
34 
35 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchBase)36 panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
37                                 uint32_t baseGroupX, uint32_t baseGroupY,
38                                 uint32_t baseGroupZ, uint32_t groupCountX,
39                                 uint32_t groupCountY, uint32_t groupCountZ)
40 {
41    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
42    const struct panvk_shader *shader = cmdbuf->state.compute.shader;
43    VkResult result;
44 
45    if (groupCountX == 0 || groupCountY == 0 || groupCountZ == 0)
46       return;
47 
48    /* If there's no compute shader, we can skip the dispatch. */
49    if (!panvk_priv_mem_dev_addr(shader->rsd))
50       return;
51 
52    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
53    struct panvk_physical_device *phys_dev =
54       to_panvk_physical_device(dev->vk.physical);
55    struct panvk_dispatch_info dispatch = {
56       .wg_count = {groupCountX, groupCountY, groupCountZ},
57    };
58 
59    panvk_per_arch(cmd_close_batch)(cmdbuf);
60    struct panvk_batch *batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
61 
62    struct panvk_descriptor_state *desc_state =
63       &cmdbuf->state.compute.desc_state;
64    struct panvk_shader_desc_state *cs_desc_state =
65       &cmdbuf->state.compute.cs.desc;
66 
67    panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
68    dispatch.tsd = batch->tls.gpu;
69 
70    result = panvk_per_arch(cmd_prepare_push_descs)(
71       cmdbuf, desc_state, shader->desc_info.used_set_mask);
72    if (result != VK_SUCCESS)
73       return;
74 
75    struct panvk_compute_sysvals *sysvals = &cmdbuf->state.compute.sysvals;
76    sysvals->base.x = baseGroupX;
77    sysvals->base.y = baseGroupY;
78    sysvals->base.z = baseGroupZ;
79    sysvals->num_work_groups.x = groupCountX;
80    sysvals->num_work_groups.y = groupCountY;
81    sysvals->num_work_groups.z = groupCountZ;
82    sysvals->local_group_size.x = shader->local_size.x;
83    sysvals->local_group_size.y = shader->local_size.y;
84    sysvals->local_group_size.z = shader->local_size.z;
85 
86    result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, shader,
87                                                   cs_desc_state);
88    if (result != VK_SUCCESS)
89       return;
90 
91    sysvals->desc.dyn_ssbos = cs_desc_state->dyn_ssbos;
92 
93    for (uint32_t i = 0; i < MAX_SETS; i++) {
94       if (shader->desc_info.used_set_mask & BITFIELD_BIT(i))
95          sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev;
96    }
97 
98    cmdbuf->state.compute.push_uniforms = 0;
99 
100    if (!cmdbuf->state.compute.push_uniforms) {
101       cmdbuf->state.compute.push_uniforms = panvk_per_arch(
102          cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.compute.sysvals,
103                                     sizeof(cmdbuf->state.compute.sysvals));
104       if (!cmdbuf->state.compute.push_uniforms)
105          return;
106    }
107 
108    dispatch.push_uniforms = cmdbuf->state.compute.push_uniforms;
109 
110    result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
111       cmdbuf, desc_state, shader, cs_desc_state);
112 
113    struct panfrost_ptr copy_desc_job;
114    result = panvk_per_arch(meta_get_copy_desc_job)(
115       cmdbuf, shader, &cmdbuf->state.compute.desc_state, cs_desc_state, 0,
116       &copy_desc_job);
117    if (result != VK_SUCCESS)
118       return;
119 
120    if (copy_desc_job.cpu)
121       util_dynarray_append(&batch->jobs, void *, copy_desc_job.cpu);
122 
123    struct panfrost_ptr job = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
124    if (!job.gpu)
125       return;
126 
127    util_dynarray_append(&batch->jobs, void *, job.cpu);
128 
129    panfrost_pack_work_groups_compute(
130       pan_section_ptr(job.cpu, COMPUTE_JOB, INVOCATION), dispatch.wg_count.x,
131       dispatch.wg_count.y, dispatch.wg_count.z, shader->local_size.x,
132       shader->local_size.y, shader->local_size.z, false, false);
133 
134    pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
135       cfg.job_task_split = util_logbase2_ceil(shader->local_size.x + 1) +
136                            util_logbase2_ceil(shader->local_size.y + 1) +
137                            util_logbase2_ceil(shader->local_size.z + 1);
138    }
139 
140    pan_section_pack(job.cpu, COMPUTE_JOB, DRAW, cfg) {
141       cfg.state = panvk_priv_mem_dev_addr(shader->rsd);
142       cfg.attributes = cs_desc_state->img_attrib_table;
143       cfg.attribute_buffers =
144          cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
145       cfg.thread_storage = dispatch.tsd;
146       cfg.uniform_buffers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
147       cfg.push_uniforms = dispatch.push_uniforms;
148       cfg.textures = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
149       cfg.samplers = cs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
150    }
151 
152    unsigned copy_desc_dep =
153       copy_desc_job.gpu
154          ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
155                           0, 0, &copy_desc_job, false)
156          : 0;
157 
158    pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
159                   copy_desc_dep, &job, false);
160 
161    batch->tlsinfo.tls.size = shader->info.tls_size;
162    batch->tlsinfo.wls.size = shader->info.wls_size;
163    if (batch->tlsinfo.wls.size) {
164       unsigned core_id_range;
165 
166       panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
167       batch->tlsinfo.wls.instances = pan_wls_instances(&dispatch.wg_count);
168       batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
169                               batch->tlsinfo.wls.instances * core_id_range;
170    }
171 
172    panvk_per_arch(cmd_close_batch)(cmdbuf);
173 }
174 
175 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDispatchIndirect)176 panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer,
177                                     VkBuffer _buffer, VkDeviceSize offset)
178 {
179    panvk_stub();
180 }
181