1 /*
2 * Copyright © 2024 Collabora Ltd.
3 *
4 * Derived from tu_cmd_buffer.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "genxml/gen_macros.h"
13
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_entrypoints.h"
19
20 #include "pan_pool.h"
21
22 #include "util/rounding.h"
23
24 #include "vk_alloc.h"
25 #include "vk_command_buffer.h"
26 #include "vk_command_pool.h"
27
28 static void
cmd_desc_state_bind_sets(struct panvk_descriptor_state * desc_state,const VkBindDescriptorSetsInfoKHR * info)29 cmd_desc_state_bind_sets(struct panvk_descriptor_state *desc_state,
30 const VkBindDescriptorSetsInfoKHR *info)
31 {
32 unsigned dynoffset_idx = 0;
33 for (unsigned i = 0; i < info->descriptorSetCount; ++i) {
34 unsigned set_idx = i + info->firstSet;
35 VK_FROM_HANDLE(panvk_descriptor_set, set, info->pDescriptorSets[i]);
36
37 /* Invalidate the push set. */
38 if (desc_state->sets[set_idx] &&
39 desc_state->sets[set_idx] == desc_state->push_sets[set_idx])
40 desc_state->push_sets[set_idx]->descs.dev = 0;
41
42 desc_state->sets[set_idx] = set;
43
44 if (!set || !set->layout->dyn_buf_count)
45 continue;
46
47 for (unsigned b = 0; b < set->layout->binding_count; b++) {
48 VkDescriptorType type = set->layout->bindings[b].type;
49
50 if (type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
51 type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
52 continue;
53
54 unsigned dyn_buf_idx = set->layout->bindings[b].desc_idx;
55 for (unsigned e = 0; e < set->layout->bindings[b].desc_count; e++) {
56 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx++] =
57 info->pDynamicOffsets[dynoffset_idx++];
58 }
59 }
60 }
61
62 assert(dynoffset_idx == info->dynamicOffsetCount);
63 }
64
65 static struct panvk_descriptor_set *
cmd_get_push_desc_set(struct vk_command_buffer * vk_cmdbuf,struct panvk_descriptor_state * desc_state,uint32_t set_idx)66 cmd_get_push_desc_set(struct vk_command_buffer *vk_cmdbuf,
67 struct panvk_descriptor_state *desc_state,
68 uint32_t set_idx)
69 {
70 struct panvk_cmd_buffer *cmdbuf =
71 container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
72 struct panvk_cmd_pool *pool =
73 container_of(cmdbuf->vk.pool, struct panvk_cmd_pool, vk);
74 struct panvk_push_set *push_set;
75
76 assert(set_idx < MAX_SETS);
77
78 if (likely(desc_state->push_sets[set_idx])) {
79 push_set = container_of(desc_state->push_sets[set_idx],
80 struct panvk_push_set, set);
81 } else if (!list_is_empty(&pool->push_sets)) {
82 push_set =
83 list_first_entry(&pool->push_sets, struct panvk_push_set, base.node);
84 list_del(&push_set->base.node);
85 list_addtail(&push_set->base.node, &cmdbuf->push_sets);
86 memset(push_set->descs, 0, sizeof(push_set->descs));
87 } else {
88 push_set = vk_zalloc(&pool->vk.alloc, sizeof(*push_set), 8,
89 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
90 list_addtail(&push_set->base.node, &cmdbuf->push_sets);
91 }
92
93 if (unlikely(!push_set)) {
94 vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
95 return NULL;
96 }
97
98 if (desc_state->push_sets[set_idx] == NULL) {
99 desc_state->push_sets[set_idx] = &push_set->set;
100 push_set->set.descs.host = push_set->descs;
101 }
102
103 struct panvk_descriptor_set *set = desc_state->push_sets[set_idx];
104
105 /* Pushing descriptors replaces whatever sets are bound */
106 desc_state->sets[set_idx] = set;
107 return set;
108 }
109
110 #if PAN_ARCH <= 7
111 VkResult
panvk_per_arch(cmd_prepare_dyn_ssbos)112 panvk_per_arch(cmd_prepare_dyn_ssbos)(
113 struct panvk_cmd_buffer *cmdbuf,
114 const struct panvk_descriptor_state *desc_state,
115 const struct panvk_shader *shader,
116 struct panvk_shader_desc_state *shader_desc_state)
117 {
118 if (!shader || !shader->desc_info.dyn_ssbos.count ||
119 shader_desc_state->dyn_ssbos)
120 return VK_SUCCESS;
121
122 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
123 cmdbuf, desc, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE,
124 PANVK_DESCRIPTOR_SIZE);
125 if (!ptr.gpu)
126 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
127
128 struct panvk_ssbo_addr *ssbos = ptr.cpu;
129 for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) {
130 uint32_t src_handle = shader->desc_info.dyn_ssbos.map[i];
131 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
132 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
133 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
134 const uint32_t dyn_buf_offset =
135 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
136
137 assert(set_idx < MAX_SETS);
138 assert(set);
139
140 ssbos[i] = (struct panvk_ssbo_addr){
141 .base_addr = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset,
142 .size = set->dyn_bufs[dyn_buf_idx].size,
143 };
144 }
145
146 shader_desc_state->dyn_ssbos = ptr.gpu;
147 return VK_SUCCESS;
148 }
149
150 static void
panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state * desc_state,const struct panvk_shader * shader,struct mali_uniform_buffer_packed * ubos,uint32_t ubo_count)151 panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state,
152 const struct panvk_shader *shader,
153 struct mali_uniform_buffer_packed *ubos,
154 uint32_t ubo_count)
155 {
156 for (uint32_t i = 0; i < shader->desc_info.dyn_ubos.count; i++) {
157 uint32_t src_handle = shader->desc_info.dyn_ubos.map[i];
158 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
159 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
160 uint32_t ubo_idx =
161 i + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO];
162 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
163 const uint32_t dyn_buf_offset =
164 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
165
166 assert(set_idx < MAX_SETS);
167 assert(set);
168 assert(ubo_idx < ubo_count);
169
170 pan_pack(&ubos[ubo_idx], UNIFORM_BUFFER, cfg) {
171 cfg.pointer = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
172 cfg.entries = DIV_ROUND_UP(set->dyn_bufs[dyn_buf_idx].size, 16);
173 }
174 }
175 }
176
177 VkResult
panvk_per_arch(cmd_prepare_shader_desc_tables)178 panvk_per_arch(cmd_prepare_shader_desc_tables)(
179 struct panvk_cmd_buffer *cmdbuf,
180 const struct panvk_descriptor_state *desc_state,
181 const struct panvk_shader *shader,
182 struct panvk_shader_desc_state *shader_desc_state)
183 {
184 if (!shader)
185 return VK_SUCCESS;
186
187 for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
188 uint32_t desc_count =
189 shader->desc_info.others.count[i] +
190 (i == PANVK_BIFROST_DESC_TABLE_UBO ? shader->desc_info.dyn_ubos.count
191 : 0);
192 uint32_t desc_size =
193 i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE;
194
195 if (!desc_count || shader_desc_state->tables[i])
196 continue;
197
198 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
199 cmdbuf, desc, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE);
200 if (!ptr.gpu)
201 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
202
203 shader_desc_state->tables[i] = ptr.gpu;
204
205 if (i == PANVK_BIFROST_DESC_TABLE_UBO)
206 panvk_cmd_fill_dyn_ubos(desc_state, shader, ptr.cpu, desc_count);
207
208 /* The image table being actually the attribute table, this is handled
209 * separately for vertex shaders. */
210 if (i == PANVK_BIFROST_DESC_TABLE_IMG &&
211 shader->info.stage != MESA_SHADER_VERTEX) {
212 assert(!shader_desc_state->img_attrib_table);
213
214 ptr = panvk_cmd_alloc_desc_array(cmdbuf, desc_count, ATTRIBUTE);
215 if (!ptr.gpu)
216 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
217
218 shader_desc_state->img_attrib_table = ptr.gpu;
219 }
220 }
221
222 uint32_t tex_count =
223 shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_TEXTURE];
224 uint32_t sampler_count =
225 shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER];
226
227 if (tex_count && !sampler_count) {
228 struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
229 if (!sampler.gpu)
230 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
231
232 /* Emit a dummy sampler if we have to. */
233 pan_pack(sampler.cpu, SAMPLER, _) {
234 }
235
236 shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu;
237 }
238
239 return VK_SUCCESS;
240 }
241 #else
242 void
panvk_per_arch(cmd_fill_dyn_bufs)243 panvk_per_arch(cmd_fill_dyn_bufs)(
244 const struct panvk_descriptor_state *desc_state,
245 const struct panvk_shader *shader, struct mali_buffer_packed *buffers)
246 {
247 if (!shader)
248 return;
249
250 for (uint32_t i = 0; i < shader->desc_info.dyn_bufs.count; i++) {
251 uint32_t src_handle = shader->desc_info.dyn_bufs.map[i];
252 uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
253 uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
254 const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
255 const uint32_t dyn_buf_offset =
256 desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
257
258 assert(set_idx < MAX_SETS);
259 assert(set);
260
261 pan_pack(&buffers[i], BUFFER, cfg) {
262 cfg.size = set->dyn_bufs[dyn_buf_idx].size;
263 cfg.address = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
264 }
265 }
266 }
267
268 VkResult
panvk_per_arch(cmd_prepare_shader_res_table)269 panvk_per_arch(cmd_prepare_shader_res_table)(
270 struct panvk_cmd_buffer *cmdbuf,
271 const struct panvk_descriptor_state *desc_state,
272 const struct panvk_shader *shader,
273 struct panvk_shader_desc_state *shader_desc_state)
274 {
275 if (!shader || shader_desc_state->res_table)
276 return VK_SUCCESS;
277
278 uint32_t first_unused_set = util_last_bit(shader->desc_info.used_set_mask);
279 uint32_t res_count = 1 + first_unused_set;
280 struct panfrost_ptr ptr =
281 panvk_cmd_alloc_desc_array(cmdbuf, res_count, RESOURCE);
282 if (!ptr.gpu)
283 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
284
285 struct mali_resource_packed *res_table = ptr.cpu;
286
287 /* First entry is the driver set table, where we store the vertex attributes,
288 * the dummy sampler, the dynamic buffers and the vertex buffers. */
289 pan_pack(&res_table[0], RESOURCE, cfg) {
290 cfg.address = shader_desc_state->driver_set.dev_addr;
291 cfg.size = shader_desc_state->driver_set.size;
292 cfg.contains_descriptors = cfg.size > 0;
293 }
294
295 for (uint32_t i = 0; i < first_unused_set; i++) {
296 const struct panvk_descriptor_set *set = desc_state->sets[i];
297
298 pan_pack(&res_table[i + 1], RESOURCE, cfg) {
299 if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) {
300 cfg.address = set->descs.dev;
301 cfg.contains_descriptors = true;
302 cfg.size = set->desc_count * PANVK_DESCRIPTOR_SIZE;
303 } else {
304 cfg.address = 0;
305 cfg.contains_descriptors = false;
306 cfg.size = 0;
307 }
308 }
309 }
310
311 shader_desc_state->res_table = ptr.gpu | res_count;
312 return VK_SUCCESS;
313 }
314 #endif
315
316 VkResult
panvk_per_arch(cmd_prepare_push_descs)317 panvk_per_arch(cmd_prepare_push_descs)(struct panvk_cmd_buffer *cmdbuf,
318 struct panvk_descriptor_state *desc_state,
319 uint32_t used_set_mask)
320 {
321 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->push_sets); i++) {
322 struct panvk_descriptor_set *push_set = desc_state->push_sets[i];
323
324 if (!(used_set_mask & BITFIELD_BIT(i)) || !push_set ||
325 desc_state->sets[i] != push_set || push_set->descs.dev)
326 continue;
327
328 struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
329 cmdbuf, desc, push_set->desc_count * PANVK_DESCRIPTOR_SIZE,
330 PANVK_DESCRIPTOR_SIZE);
331 if (!ptr.gpu)
332 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
333
334 memcpy(ptr.cpu, push_set->descs.host,
335 push_set->desc_count * PANVK_DESCRIPTOR_SIZE);
336 push_set->descs.dev = ptr.gpu;
337 }
338
339 return VK_SUCCESS;
340 }
341
342 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindDescriptorSets2KHR)343 panvk_per_arch(CmdBindDescriptorSets2KHR)(
344 VkCommandBuffer commandBuffer,
345 const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
346 {
347 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
348
349 /* TODO: Invalidate only if the shader tables are disturbed */
350 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
351 cmd_desc_state_bind_sets(&cmdbuf->state.gfx.desc_state,
352 pBindDescriptorSetsInfo);
353
354 memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
355 memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
356 }
357
358 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
359 cmd_desc_state_bind_sets(&cmdbuf->state.compute.desc_state,
360 pBindDescriptorSetsInfo);
361
362 memset(&cmdbuf->state.compute.cs.desc, 0,
363 sizeof(cmdbuf->state.compute.cs.desc));
364 }
365 }
366
367 static void
push_desc_set_write(struct panvk_cmd_buffer * cmd,struct panvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)368 push_desc_set_write(struct panvk_cmd_buffer *cmd,
369 struct panvk_descriptor_state *desc,
370 const VkPushDescriptorSetInfoKHR *info)
371 {
372 VK_FROM_HANDLE(vk_pipeline_layout, playout, info->layout);
373
374 const struct panvk_descriptor_set_layout *set_layout =
375 to_panvk_descriptor_set_layout(playout->set_layouts[info->set]);
376
377 struct panvk_descriptor_set *push_set =
378 cmd_get_push_desc_set(&cmd->vk, desc, info->set);
379 if (!push_set)
380 return;
381
382 push_set->layout = set_layout;
383 push_set->desc_count = set_layout->desc_count;
384
385 for (uint32_t i = 0; i < info->descriptorWriteCount; i++)
386 panvk_per_arch(descriptor_set_write)(push_set,
387 &info->pDescriptorWrites[i], true);
388
389 push_set->descs.dev = 0;
390 push_set->layout = NULL;
391 }
392
393 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSet2KHR)394 panvk_per_arch(CmdPushDescriptorSet2KHR)(
395 VkCommandBuffer commandBuffer,
396 const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
397 {
398 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
399
400 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
401 push_desc_set_write(cmdbuf, &cmdbuf->state.gfx.desc_state,
402 pPushDescriptorSetInfo);
403
404 memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
405 memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
406 }
407
408 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
409 push_desc_set_write(cmdbuf, &cmdbuf->state.compute.desc_state,
410 pPushDescriptorSetInfo);
411
412 memset(&cmdbuf->state.compute.cs.desc, 0,
413 sizeof(cmdbuf->state.compute.cs.desc));
414 }
415 }
416
417 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)418 panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)(
419 VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
420 *pPushDescriptorSetWithTemplateInfo)
421 {
422 VK_FROM_HANDLE(vk_descriptor_update_template, template,
423 pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
424 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
425 VK_FROM_HANDLE(vk_pipeline_layout, playout,
426 pPushDescriptorSetWithTemplateInfo->layout);
427 const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
428 const struct panvk_descriptor_set_layout *set_layout =
429 to_panvk_descriptor_set_layout(playout->set_layouts[set]);
430 struct panvk_descriptor_state *desc_state =
431 panvk_cmd_get_desc_state(cmdbuf, template->bind_point);
432 struct panvk_descriptor_set *push_set =
433 cmd_get_push_desc_set(&cmdbuf->vk, desc_state, set);
434 if (!push_set)
435 return;
436
437 push_set->layout = set_layout;
438 push_set->desc_count = set_layout->desc_count;
439
440 panvk_per_arch(descriptor_set_write_template)(
441 push_set, template, pPushDescriptorSetWithTemplateInfo->pData, true);
442
443 push_set->descs.dev = 0;
444 push_set->layout = NULL;
445
446 if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
447 memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
448 memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
449 } else {
450 memset(&cmdbuf->state.compute.cs.desc, 0,
451 sizeof(cmdbuf->state.compute.cs.desc));
452 }
453 }
454