1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_cmd_buffer.h"
6
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_descriptor_set_layout.h"
10 #include "nvk_device.h"
11 #include "nvk_device_memory.h"
12 #include "nvk_entrypoints.h"
13 #include "nvk_mme.h"
14 #include "nvk_physical_device.h"
15 #include "nvk_shader.h"
16 #include "nvkmd/nvkmd.h"
17
18 #include "vk_pipeline_layout.h"
19 #include "vk_synchronization.h"
20
21 #include "nv_push_cl906f.h"
22 #include "nv_push_cl90b5.h"
23 #include "nv_push_cla097.h"
24 #include "nv_push_cla0c0.h"
25 #include "nv_push_clc597.h"
26
27 static void
nvk_descriptor_state_fini(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)28 nvk_descriptor_state_fini(struct nvk_cmd_buffer *cmd,
29 struct nvk_descriptor_state *desc)
30 {
31 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
32
33 for (unsigned i = 0; i < NVK_MAX_SETS; i++) {
34 vk_free(&pool->vk.alloc, desc->sets[i].push);
35 desc->sets[i].push = NULL;
36 }
37 }
38
39 static void
nvk_destroy_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer)40 nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
41 {
42 struct nvk_cmd_buffer *cmd =
43 container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
44 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
45
46 nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
47 nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
48
49 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
50 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_gart_mem);
51 util_dynarray_fini(&cmd->pushes);
52 vk_command_buffer_finish(&cmd->vk);
53 vk_free(&pool->vk.alloc, cmd);
54 }
55
56 static VkResult
nvk_create_cmd_buffer(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)57 nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
58 VkCommandBufferLevel level,
59 struct vk_command_buffer **cmd_buffer_out)
60 {
61 struct nvk_cmd_pool *pool = container_of(vk_pool, struct nvk_cmd_pool, vk);
62 struct nvk_device *dev = nvk_cmd_pool_device(pool);
63 struct nvk_cmd_buffer *cmd;
64 VkResult result;
65
66 cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
67 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
68 if (cmd == NULL)
69 return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
70
71 result = vk_command_buffer_init(&pool->vk, &cmd->vk,
72 &nvk_cmd_buffer_ops, level);
73 if (result != VK_SUCCESS) {
74 vk_free(&pool->vk.alloc, cmd);
75 return result;
76 }
77
78 cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
79 cmd->vk.dynamic_graphics_state.ms.sample_locations =
80 &cmd->state.gfx._dynamic_sl;
81
82 list_inithead(&cmd->owned_mem);
83 list_inithead(&cmd->owned_gart_mem);
84 util_dynarray_init(&cmd->pushes, NULL);
85
86 *cmd_buffer_out = &cmd->vk;
87
88 return VK_SUCCESS;
89 }
90
91 static void
nvk_reset_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)92 nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
93 UNUSED VkCommandBufferResetFlags flags)
94 {
95 struct nvk_cmd_buffer *cmd =
96 container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
97 struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
98
99 vk_command_buffer_reset(&cmd->vk);
100
101 nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
102 nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
103
104 nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
105 nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
106 cmd->upload_mem = NULL;
107 cmd->push_mem = NULL;
108 cmd->push_mem_limit = NULL;
109 cmd->push = (struct nv_push) {0};
110
111 util_dynarray_clear(&cmd->pushes);
112
113 memset(&cmd->state, 0, sizeof(cmd->state));
114 }
115
116 const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
117 .create = nvk_create_cmd_buffer,
118 .reset = nvk_reset_cmd_buffer,
119 .destroy = nvk_destroy_cmd_buffer,
120 };
121
122 /* If we ever fail to allocate a push, we use this */
123 static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
124
125 static VkResult
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer * cmd,bool force_gart,struct nvk_cmd_mem ** mem_out)126 nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
127 struct nvk_cmd_mem **mem_out)
128 {
129 VkResult result = nvk_cmd_pool_alloc_mem(nvk_cmd_buffer_pool(cmd),
130 force_gart, mem_out);
131 if (result != VK_SUCCESS)
132 return result;
133
134 if (force_gart)
135 list_addtail(&(*mem_out)->link, &cmd->owned_gart_mem);
136 else
137 list_addtail(&(*mem_out)->link, &cmd->owned_mem);
138
139 return VK_SUCCESS;
140 }
141
142 static void
nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer * cmd)143 nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer *cmd)
144 {
145 if (likely(cmd->push_mem != NULL)) {
146 const uint32_t mem_offset =
147 (char *)cmd->push.start - (char *)cmd->push_mem->mem->map;
148
149 struct nvk_cmd_push push = {
150 .map = cmd->push.start,
151 .addr = cmd->push_mem->mem->va->addr + mem_offset,
152 .range = nv_push_dw_count(&cmd->push) * 4,
153 };
154 util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
155 }
156
157 cmd->push.start = cmd->push.end;
158 }
159
160 void
nvk_cmd_buffer_new_push(struct nvk_cmd_buffer * cmd)161 nvk_cmd_buffer_new_push(struct nvk_cmd_buffer *cmd)
162 {
163 nvk_cmd_buffer_flush_push(cmd);
164
165 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &cmd->push_mem);
166 if (unlikely(result != VK_SUCCESS)) {
167 STATIC_ASSERT(NVK_CMD_BUFFER_MAX_PUSH <= NVK_CMD_MEM_SIZE / 4);
168 cmd->push_mem = NULL;
169 nv_push_init(&cmd->push, push_runout, 0);
170 cmd->push_mem_limit = &push_runout[NVK_CMD_BUFFER_MAX_PUSH];
171 } else {
172 nv_push_init(&cmd->push, cmd->push_mem->mem->map, 0);
173 cmd->push_mem_limit =
174 (uint32_t *)((char *)cmd->push_mem->mem->map + NVK_CMD_MEM_SIZE);
175 }
176 }
177
178 void
nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer * cmd,uint64_t addr,uint32_t range)179 nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer *cmd,
180 uint64_t addr, uint32_t range)
181 {
182 nvk_cmd_buffer_flush_push(cmd);
183
184 struct nvk_cmd_push push = {
185 .addr = addr,
186 .range = range,
187 .no_prefetch = true,
188 };
189
190 util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
191 }
192
193 VkResult
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer * cmd,uint32_t size,uint32_t alignment,uint64_t * addr,void ** ptr)194 nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
195 uint32_t size, uint32_t alignment,
196 uint64_t *addr, void **ptr)
197 {
198 assert(size % 4 == 0);
199 assert(size <= NVK_CMD_MEM_SIZE);
200
201 uint32_t offset = cmd->upload_offset;
202 if (alignment > 0)
203 offset = align(offset, alignment);
204
205 assert(offset <= NVK_CMD_MEM_SIZE);
206 if (cmd->upload_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
207 *addr = cmd->upload_mem->mem->va->addr + offset;
208 *ptr = (char *)cmd->upload_mem->mem->map + offset;
209
210 cmd->upload_offset = offset + size;
211
212 return VK_SUCCESS;
213 }
214
215 struct nvk_cmd_mem *mem;
216 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
217 if (unlikely(result != VK_SUCCESS))
218 return result;
219
220 *addr = mem->mem->va->addr;
221 *ptr = mem->mem->map;
222
223 /* Pick whichever of the current upload BO and the new BO will have more
224 * room left to be the BO for the next upload. If our upload size is
225 * bigger than the old offset, we're better off burning the whole new
226 * upload BO on this one allocation and continuing on the current upload
227 * BO.
228 */
229 if (cmd->upload_mem == NULL || size < cmd->upload_offset) {
230 cmd->upload_mem = mem;
231 cmd->upload_offset = size;
232 }
233
234 return VK_SUCCESS;
235 }
236
237 VkResult
nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer * cmd,const void * data,uint32_t size,uint32_t alignment,uint64_t * addr)238 nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
239 const void *data, uint32_t size,
240 uint32_t alignment, uint64_t *addr)
241 {
242 VkResult result;
243 void *map;
244
245 result = nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, &map);
246 if (unlikely(result != VK_SUCCESS))
247 return result;
248
249 memcpy(map, data, size);
250
251 return VK_SUCCESS;
252 }
253
254 VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer * cmd,uint64_t * addr)255 nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
256 uint64_t *addr)
257 {
258 uint32_t offset = cmd->cond_render_gart_offset;
259 uint32_t size = 64;
260
261 assert(offset <= NVK_CMD_MEM_SIZE);
262 if (cmd->cond_render_gart_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
263 *addr = cmd->cond_render_gart_mem->mem->va->addr + offset;
264
265 cmd->cond_render_gart_offset = offset + size;
266
267 return VK_SUCCESS;
268 }
269
270 struct nvk_cmd_mem *mem;
271 VkResult result = nvk_cmd_buffer_alloc_mem(cmd, true, &mem);
272 if (unlikely(result != VK_SUCCESS))
273 return result;
274
275 *addr = mem->mem->va->addr;
276
277 /* Pick whichever of the current upload BO and the new BO will have more
278 * room left to be the BO for the next upload. If our upload size is
279 * bigger than the old offset, we're better off burning the whole new
280 * upload BO on this one allocation and continuing on the current upload
281 * BO.
282 */
283 if (cmd->cond_render_gart_mem == NULL || size < cmd->cond_render_gart_offset) {
284 cmd->cond_render_gart_mem = mem;
285 cmd->cond_render_gart_offset = size;
286 }
287
288 return VK_SUCCESS;
289 }
290
291 VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo)292 nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
293 const VkCommandBufferBeginInfo *pBeginInfo)
294 {
295 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
296
297 nvk_reset_cmd_buffer(&cmd->vk, 0);
298
299 /* Start with a nop so we have at least something to submit */
300 struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
301 P_MTHD(p, NV90B5, NOP);
302 P_NV90B5_NOP(p, 0);
303
304 nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
305 nvk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
306
307 return VK_SUCCESS;
308 }
309
310 VKAPI_ATTR VkResult VKAPI_CALL
nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)311 nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)
312 {
313 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
314
315 nvk_cmd_buffer_flush_push(cmd);
316
317 return vk_command_buffer_get_record_result(&cmd->vk);
318 }
319
320 VKAPI_ATTR void VKAPI_CALL
nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)321 nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
322 uint32_t commandBufferCount,
323 const VkCommandBuffer *pCommandBuffers)
324 {
325 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
326
327 if (commandBufferCount == 0)
328 return;
329
330 nvk_cmd_buffer_flush_push(cmd);
331
332 for (uint32_t i = 0; i < commandBufferCount; i++) {
333 VK_FROM_HANDLE(nvk_cmd_buffer, other, pCommandBuffers[i]);
334
335 /* We only need to copy the pushes. We do not copy the
336 * nvk_cmd_buffer::bos because that tracks ownership. Instead, we
337 * depend on the app to not discard secondaries while they are used by a
338 * primary. The Vulkan 1.3.227 spec for vkFreeCommandBuffers() says:
339 *
340 * "Any primary command buffer that is in the recording or executable
341 * state and has any element of pCommandBuffers recorded into it,
342 * becomes invalid."
343 *
344 * In other words, if the secondary command buffer ever goes away, this
345 * command buffer is invalid and the only thing the client can validly
346 * do with it is reset it. vkResetCommandPool() has similar language.
347 */
348 util_dynarray_append_dynarray(&cmd->pushes, &other->pushes);
349 }
350
351 /* From the Vulkan 1.3.275 spec:
352 *
353 * "When secondary command buffer(s) are recorded to execute on a
354 * primary command buffer, the secondary command buffer inherits no
355 * state from the primary command buffer, and all state of the primary
356 * command buffer is undefined after an execute secondary command buffer
357 * command is recorded. There is one exception to this rule - if the
358 * primary command buffer is inside a render pass instance, then the
359 * render pass and subpass state is not disturbed by executing secondary
360 * command buffers. For state dependent commands (such as draws and
361 * dispatches), any state consumed by those commands must not be
362 * undefined."
363 *
364 * Therefore, it's the client's job to reset all the state in the primary
365 * after the secondary executes. However, if we're doing any internal
366 * dirty tracking, we may miss the fact that a secondary has messed with
367 * GPU state if we don't invalidate all our internal tracking.
368 */
369 nvk_cmd_invalidate_graphics_state(cmd);
370 nvk_cmd_invalidate_compute_state(cmd);
371 }
372
373 enum nvk_barrier {
374 NVK_BARRIER_RENDER_WFI = 1 << 0,
375 NVK_BARRIER_COMPUTE_WFI = 1 << 1,
376 NVK_BARRIER_FLUSH_SHADER_DATA = 1 << 2,
377 NVK_BARRIER_INVALIDATE_SHADER_DATA = 1 << 3,
378 NVK_BARRIER_INVALIDATE_TEX_DATA = 1 << 4,
379 NVK_BARRIER_INVALIDATE_CONSTANT = 1 << 5,
380 NVK_BARRIER_INVALIDATE_MME_DATA = 1 << 6,
381 };
382
383 static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,VkAccessFlags2 access)384 nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
385 VkAccessFlags2 access)
386 {
387 stages = vk_expand_src_stage_flags2(stages);
388 access = vk_filter_src_access_flags2(stages, access);
389
390 enum nvk_barrier barriers = 0;
391
392 if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
393 barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
394
395 if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
396 barriers |= NVK_BARRIER_RENDER_WFI;
397
398 if (vk_pipeline_stage_flags2_has_compute_shader(stages))
399 barriers |= NVK_BARRIER_COMPUTE_WFI;
400 }
401
402 if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
403 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
404 VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
405 barriers |= NVK_BARRIER_RENDER_WFI;
406
407 if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
408 (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
409 VK_PIPELINE_STAGE_2_BLIT_BIT |
410 VK_PIPELINE_STAGE_2_CLEAR_BIT)))
411 barriers |= NVK_BARRIER_RENDER_WFI;
412
413 return barriers;
414 }
415
416 static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,VkAccessFlags2 access)417 nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
418 VkAccessFlags2 access)
419 {
420 stages = vk_expand_dst_stage_flags2(stages);
421 access = vk_filter_dst_access_flags2(stages, access);
422
423 enum nvk_barrier barriers = 0;
424
425 if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
426 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
427 VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT |
428 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
429 barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
430
431 if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
432 VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
433 barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
434 NVK_BARRIER_INVALIDATE_CONSTANT;
435
436 if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
437 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
438 barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
439
440 if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
441 barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
442
443 if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
444 (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
445 VK_PIPELINE_STAGE_2_BLIT_BIT)))
446 barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
447
448 return barriers;
449 }
450
451 void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer * cmd,const VkDependencyInfo * dep,bool wait)452 nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
453 const VkDependencyInfo *dep,
454 bool wait)
455 {
456 enum nvk_barrier barriers = 0;
457
458 for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
459 const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
460 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
461 bar->srcAccessMask);
462 }
463
464 for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
465 const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
466 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
467 bar->srcAccessMask);
468 }
469
470 for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
471 const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
472 barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
473 bar->srcAccessMask);
474 }
475
476 if (!barriers)
477 return;
478
479 struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
480
481 if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
482 assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
483 if (barriers & NVK_BARRIER_RENDER_WFI) {
484 P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
485 .data = DATA_TRUE,
486 .flush_data = FLUSH_DATA_TRUE,
487 });
488 }
489
490 if (barriers & NVK_BARRIER_COMPUTE_WFI) {
491 P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
492 .data = DATA_TRUE,
493 .flush_data = FLUSH_DATA_TRUE,
494 });
495 }
496 } else if (barriers & NVK_BARRIER_RENDER_WFI) {
497 /* If this comes from a vkCmdSetEvent, we don't need to wait */
498 if (wait)
499 P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
500 } else {
501 /* Compute WFI only happens when shader data is flushed */
502 assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
503 }
504 }
505
506 void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer * cmd,uint32_t dep_count,const VkDependencyInfo * deps)507 nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
508 uint32_t dep_count,
509 const VkDependencyInfo *deps)
510 {
511 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
512 struct nvk_physical_device *pdev = nvk_device_physical(dev);
513
514 enum nvk_barrier barriers = 0;
515
516 for (uint32_t d = 0; d < dep_count; d++) {
517 const VkDependencyInfo *dep = &deps[d];
518
519 for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
520 const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
521 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
522 bar->dstAccessMask);
523 }
524
525 for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
526 const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
527 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
528 bar->dstAccessMask);
529 }
530
531 for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
532 const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
533 barriers |= nvk_barrier_invalidates(bar->dstStageMask,
534 bar->dstAccessMask);
535 }
536 }
537
538 if (!barriers)
539 return;
540
541 struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
542
543 if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
544 P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
545 .lines = LINES_ALL,
546 });
547 }
548
549 if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
550 NVK_BARRIER_INVALIDATE_CONSTANT)) {
551 P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
552 .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
553 .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
554 });
555 }
556
557 if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
558 __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
559
560 if (pdev->info.cls_eng3d >= TURING_A)
561 P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
562 }
563 }
564
565 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)566 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
567 const VkDependencyInfo *pDependencyInfo)
568 {
569 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
570
571 nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
572 nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
573 }
574
575 void
nvk_cmd_bind_shaders(struct vk_command_buffer * vk_cmd,uint32_t stage_count,const gl_shader_stage * stages,struct vk_shader ** const shaders)576 nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
577 uint32_t stage_count,
578 const gl_shader_stage *stages,
579 struct vk_shader ** const shaders)
580 {
581 struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
582 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
583
584 for (uint32_t i = 0; i < stage_count; i++) {
585 struct nvk_shader *shader =
586 container_of(shaders[i], struct nvk_shader, vk);
587
588 if (shader != NULL) {
589 nvk_device_ensure_slm(dev, shader->info.slm_size,
590 shader->info.crs_size);
591 }
592
593 if (stages[i] == MESA_SHADER_COMPUTE ||
594 stages[i] == MESA_SHADER_KERNEL)
595 nvk_cmd_bind_compute_shader(cmd, shader);
596 else
597 nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
598 }
599 }
600
601 #define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
602
603 void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer * cmd,VkShaderStageFlags stages,uint32_t sets_start,uint32_t sets_end,uint32_t dyn_start,uint32_t dyn_end)604 nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
605 VkShaderStageFlags stages,
606 uint32_t sets_start, uint32_t sets_end,
607 uint32_t dyn_start, uint32_t dyn_end)
608 {
609 if (!(stages & NVK_VK_GRAPHICS_STAGE_BITS))
610 return;
611
612 uint32_t groups = 0;
613 u_foreach_bit(i, stages & NVK_VK_GRAPHICS_STAGE_BITS) {
614 gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
615 uint32_t g = nvk_cbuf_binding_for_stage(stage);
616 groups |= BITFIELD_BIT(g);
617 }
618
619 u_foreach_bit(g, groups) {
620 struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
621
622 for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
623 const struct nvk_cbuf *cbuf = &group->cbufs[i];
624 switch (cbuf->type) {
625 case NVK_CBUF_TYPE_INVALID:
626 case NVK_CBUF_TYPE_ROOT_DESC:
627 case NVK_CBUF_TYPE_SHADER_DATA:
628 break;
629
630 case NVK_CBUF_TYPE_DESC_SET:
631 case NVK_CBUF_TYPE_UBO_DESC:
632 if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
633 group->dirty |= BITFIELD_BIT(i);
634 break;
635
636 case NVK_CBUF_TYPE_DYNAMIC_UBO:
637 if (cbuf->dynamic_idx >= dyn_start && cbuf->dynamic_idx < dyn_end)
638 group->dirty |= BITFIELD_BIT(i);
639 break;
640
641 default:
642 unreachable("Invalid cbuf type");
643 }
644 }
645 }
646 }
647
648 static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorSetsInfoKHR * info)649 nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
650 struct nvk_descriptor_state *desc,
651 const VkBindDescriptorSetsInfoKHR *info)
652 {
653 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
654 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
655 struct nvk_physical_device *pdev = nvk_device_physical(dev);
656
657 union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
658 uint8_t set_dynamic_buffer_start[NVK_MAX_SETS];
659
660 /* Read off the current dynamic buffer start array so we can use it to
661 * determine where we should start binding dynamic buffers.
662 */
663 nvk_descriptor_state_get_root_array(desc, set_dynamic_buffer_start,
664 0, NVK_MAX_SETS,
665 set_dynamic_buffer_start);
666
667 /* Fro the Vulkan 1.3.275 spec:
668 *
669 * "When binding a descriptor set (see Descriptor Set Binding) to
670 * set number N...
671 *
672 * If, additionally, the previously bound descriptor set for set
673 * N was bound using a pipeline layout not compatible for set N,
674 * then all bindings in sets numbered greater than N are
675 * disturbed."
676 *
677 * This means that, if some earlier set gets bound in such a way that
678 * it changes set_dynamic_buffer_start[s], this binding is implicitly
679 * invalidated. Therefore, we can always look at the current value
680 * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
681 * range and it's only our responsibility to adjust all
682 * set_dynamic_buffer_start[p] for p > s as needed.
683 */
684 const uint8_t dyn_buffer_start = set_dynamic_buffer_start[info->firstSet];
685 uint8_t dyn_buffer_end = dyn_buffer_start;
686
687 uint32_t next_dyn_offset = 0;
688 for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
689 unsigned s = i + info->firstSet;
690 VK_FROM_HANDLE(nvk_descriptor_set, set, info->pDescriptorSets[i]);
691
692 if (desc->sets[s].type != NVK_DESCRIPTOR_SET_TYPE_SET ||
693 desc->sets[s].set != set) {
694 struct nvk_buffer_address set_addr;
695 if (set != NULL) {
696 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_SET;
697 desc->sets[s].set = set;
698 set_addr = nvk_descriptor_set_addr(set);
699 } else {
700 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_NONE;
701 desc->sets[s].set = NULL;
702 set_addr = NVK_BUFFER_ADDRESS_NULL;
703 }
704 nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
705 }
706
707 set_dynamic_buffer_start[s] = dyn_buffer_end;
708
709 if (pipeline_layout->set_layouts[s] != NULL) {
710 const struct nvk_descriptor_set_layout *set_layout =
711 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
712
713 if (set != NULL && set_layout->dynamic_buffer_count > 0) {
714 for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
715 union nvk_buffer_descriptor db = set->dynamic_buffers[j];
716 uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j];
717 if (BITSET_TEST(set_layout->dynamic_ubos, j) &&
718 nvk_use_bindless_cbuf(&pdev->info)) {
719 assert((offset & 0xf) == 0);
720 db.cbuf.base_addr_shift_4 += offset >> 4;
721 } else {
722 db.addr.base_addr += offset;
723 }
724 dynamic_buffers[dyn_buffer_end + j] = db;
725 }
726 next_dyn_offset += set->layout->dynamic_buffer_count;
727 }
728
729 dyn_buffer_end += set_layout->dynamic_buffer_count;
730 } else {
731 assert(set == NULL);
732 }
733 }
734 assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
735 assert(next_dyn_offset <= info->dynamicOffsetCount);
736
737 nvk_descriptor_state_set_root_array(cmd, desc, dynamic_buffers,
738 dyn_buffer_start, dyn_buffer_end,
739 &dynamic_buffers[dyn_buffer_start]);
740
741 /* We need to set everything above first_set because later calls to
742 * nvk_bind_descriptor_sets() depend on it for knowing where to start and
743 * they may not be called on the next consecutive set.
744 */
745 for (uint32_t s = info->firstSet + info->descriptorSetCount;
746 s < NVK_MAX_SETS; s++)
747 set_dynamic_buffer_start[s] = dyn_buffer_end;
748
749 /* We need to at least sync everything from first_set to NVK_MAX_SETS.
750 * However, we only save anything if firstSet >= 4 so we may as well sync
751 * everything just to be safe.
752 */
753 nvk_descriptor_state_set_root_array(cmd, desc, set_dynamic_buffer_start,
754 0, NVK_MAX_SETS,
755 set_dynamic_buffer_start);
756
757 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
758 info->firstSet + info->descriptorSetCount,
759 dyn_buffer_start, dyn_buffer_end);
760 }
761
762 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pBindDescriptorSetsInfo)763 nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
764 const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
765 {
766 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
767
768 if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
769 nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
770 pBindDescriptorSetsInfo);
771 }
772
773 if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
774 nvk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
775 pBindDescriptorSetsInfo);
776 }
777 }
778
779 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)780 nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,
781 uint32_t bufferCount,
782 const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
783 {
784 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
785
786 for (uint32_t i = 0; i < bufferCount; i++)
787 cmd->state.descriptor_buffers[i] = pBindingInfos[i].address;
788 }
789
790 static void
nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkSetDescriptorBufferOffsetsInfoEXT * info)791 nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer *cmd,
792 struct nvk_descriptor_state *desc,
793 const VkSetDescriptorBufferOffsetsInfoEXT *info)
794 {
795 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
796
797 for (uint32_t i = 0; i < info->setCount; ++i) {
798 const uint32_t s = i + info->firstSet;
799
800 desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_BUFFER;
801 desc->sets[s].set = NULL;
802
803 struct nvk_buffer_address set_addr;
804 if (pipeline_layout->set_layouts[s] != NULL) {
805 const struct nvk_descriptor_set_layout *set_layout =
806 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
807 assert(set_layout->flags &
808 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT);
809
810 const uint64_t buffer_base_addr =
811 cmd->state.descriptor_buffers[info->pBufferIndices[i]];
812
813 set_addr = (struct nvk_buffer_address) {
814 .base_addr = buffer_base_addr + info->pOffsets[i],
815 .size = set_layout->max_buffer_size,
816 };
817 } else {
818 set_addr = NVK_BUFFER_ADDRESS_NULL;
819 }
820 nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
821 }
822
823 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
824 info->firstSet,
825 info->firstSet + info->setCount,
826 0, 0);
827 }
828
829 VKAPI_ATTR void VKAPI_CALL
nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pInfo)830 nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,
831 const VkSetDescriptorBufferOffsetsInfoEXT *pInfo)
832 {
833 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
834
835 if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
836 nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.gfx.descriptors,
837 pInfo);
838 }
839
840 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
841 nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.cs.descriptors,
842 pInfo);
843 }
844 }
845
846 static void
nvk_bind_embedded_samplers(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * info)847 nvk_bind_embedded_samplers(struct nvk_cmd_buffer *cmd,
848 struct nvk_descriptor_state *desc,
849 const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *info)
850 {
851 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
852 const struct nvk_descriptor_set_layout *set_layout =
853 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
854
855 struct nvk_buffer_address set_addr = {
856 .base_addr = set_layout->embedded_samplers_addr,
857 .size = set_layout->non_variable_descriptor_buffer_size,
858 };
859 nvk_descriptor_state_set_root(cmd, desc, sets[info->set], set_addr);
860 }
861
862 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pInfo)863 nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
864 VkCommandBuffer commandBuffer,
865 const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *pInfo)
866 {
867 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
868
869 if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
870 nvk_bind_embedded_samplers(cmd, &cmd->state.gfx.descriptors, pInfo);
871 }
872
873 if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
874 nvk_bind_embedded_samplers(cmd, &cmd->state.cs.descriptors, pInfo);
875 }
876 }
877
878 static void
nvk_push_constants(UNUSED struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushConstantsInfoKHR * info)879 nvk_push_constants(UNUSED struct nvk_cmd_buffer *cmd,
880 struct nvk_descriptor_state *desc,
881 const VkPushConstantsInfoKHR *info)
882 {
883 nvk_descriptor_state_set_root_array(cmd, desc, push,
884 info->offset, info->size,
885 (char *)info->pValues);
886 }
887
888
889 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pPushConstantsInfo)890 nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
891 const VkPushConstantsInfoKHR *pPushConstantsInfo)
892 {
893 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
894
895 if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
896 nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
897
898 if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
899 nvk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
900 }
901
902 static struct nvk_push_descriptor_set *
nvk_cmd_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,uint32_t set)903 nvk_cmd_push_descriptors(struct nvk_cmd_buffer *cmd,
904 struct nvk_descriptor_state *desc,
905 uint32_t set)
906 {
907 assert(set < NVK_MAX_SETS);
908 if (unlikely(desc->sets[set].push == NULL)) {
909 desc->sets[set].push = vk_zalloc(&cmd->vk.pool->alloc,
910 sizeof(*desc->sets[set].push), 8,
911 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
912 if (unlikely(desc->sets[set].push == NULL)) {
913 vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
914 return NULL;
915 }
916 }
917
918 /* Pushing descriptors replaces whatever sets are bound */
919 desc->sets[set].type = NVK_DESCRIPTOR_SET_TYPE_PUSH;
920 desc->sets[set].set = NULL;
921 desc->push_dirty |= BITFIELD_BIT(set);
922
923 return desc->sets[set].push;
924 }
925
926 static void
nvk_push_descriptor_set(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)927 nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
928 struct nvk_descriptor_state *desc,
929 const VkPushDescriptorSetInfoKHR *info)
930 {
931 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
932 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
933
934 struct nvk_push_descriptor_set *push_set =
935 nvk_cmd_push_descriptors(cmd, desc, info->set);
936 if (unlikely(push_set == NULL))
937 return;
938
939 struct nvk_descriptor_set_layout *set_layout =
940 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
941
942 nvk_push_descriptor_set_update(dev, push_set, set_layout,
943 info->descriptorWriteCount,
944 info->pDescriptorWrites);
945
946 nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
947 info->set, info->set + 1, 0, 0);
948 }
949
950 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pPushDescriptorSetInfo)951 nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
952 const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
953 {
954 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
955
956 if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
957 nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
958 pPushDescriptorSetInfo);
959 }
960
961 if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
962 nvk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
963 pPushDescriptorSetInfo);
964 }
965 }
966
967 void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)968 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
969 struct nvk_descriptor_state *desc)
970 {
971 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
972 struct nvk_physical_device *pdev = nvk_device_physical(dev);
973 const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
974 VkResult result;
975
976 u_foreach_bit(set_idx, desc->push_dirty) {
977 if (desc->sets[set_idx].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
978 continue;
979
980 struct nvk_push_descriptor_set *push_set = desc->sets[set_idx].push;
981 uint64_t push_set_addr;
982 result = nvk_cmd_buffer_upload_data(cmd, push_set->data,
983 sizeof(push_set->data),
984 min_cbuf_alignment,
985 &push_set_addr);
986 if (unlikely(result != VK_SUCCESS)) {
987 vk_command_buffer_set_error(&cmd->vk, result);
988 return;
989 }
990
991 struct nvk_buffer_address set_addr = {
992 .base_addr = push_set_addr,
993 .size = sizeof(push_set->data),
994 };
995 nvk_descriptor_state_set_root(cmd, desc, sets[set_idx], set_addr);
996 }
997 }
998
999 bool
nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_shader * shader,const struct nvk_cbuf * cbuf,struct nvk_buffer_address * addr_out)1000 nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
1001 const struct nvk_descriptor_state *desc,
1002 const struct nvk_shader *shader,
1003 const struct nvk_cbuf *cbuf,
1004 struct nvk_buffer_address *addr_out)
1005 {
1006 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1007 struct nvk_physical_device *pdev = nvk_device_physical(dev);
1008
1009 switch (cbuf->type) {
1010 case NVK_CBUF_TYPE_INVALID:
1011 *addr_out = (struct nvk_buffer_address) { .size = 0 };
1012 return true;
1013
1014 case NVK_CBUF_TYPE_ROOT_DESC:
1015 unreachable("The caller should handle root descriptors");
1016 return false;
1017
1018 case NVK_CBUF_TYPE_SHADER_DATA:
1019 *addr_out = (struct nvk_buffer_address) {
1020 .base_addr = shader->data_addr,
1021 .size = shader->data_size,
1022 };
1023 return true;
1024
1025 case NVK_CBUF_TYPE_DESC_SET:
1026 nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], addr_out);
1027 return true;
1028
1029 case NVK_CBUF_TYPE_DYNAMIC_UBO: {
1030 uint8_t dyn_idx;
1031 nvk_descriptor_state_get_root(
1032 desc, set_dynamic_buffer_start[cbuf->desc_set], &dyn_idx);
1033 dyn_idx += cbuf->dynamic_idx;
1034 union nvk_buffer_descriptor ubo_desc;
1035 nvk_descriptor_state_get_root(desc, dynamic_buffers[dyn_idx], &ubo_desc);
1036 *addr_out = nvk_ubo_descriptor_addr(pdev, ubo_desc);
1037 return true;
1038 }
1039
1040 case NVK_CBUF_TYPE_UBO_DESC: {
1041 if (desc->sets[cbuf->desc_set].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
1042 return false;
1043
1044 struct nvk_push_descriptor_set *push = desc->sets[cbuf->desc_set].push;
1045 if (push == NULL)
1046 return false;
1047
1048 assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
1049 union nvk_buffer_descriptor desc;
1050 memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc));
1051 *addr_out = nvk_ubo_descriptor_addr(pdev, desc);
1052 return true;
1053 }
1054
1055 default:
1056 unreachable("Invalid cbuf type");
1057 }
1058 }
1059
1060 uint64_t
nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_cbuf * cbuf)1061 nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
1062 const struct nvk_descriptor_state *desc,
1063 const struct nvk_cbuf *cbuf)
1064 {
1065 assert(cbuf->type == NVK_CBUF_TYPE_UBO_DESC);
1066 switch (desc->sets[cbuf->desc_set].type) {
1067 case NVK_DESCRIPTOR_SET_TYPE_SET:
1068 case NVK_DESCRIPTOR_SET_TYPE_BUFFER: {
1069 struct nvk_buffer_address set_addr;
1070 nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], &set_addr);
1071
1072 assert(cbuf->desc_offset < set_addr.size);
1073 return set_addr.base_addr + cbuf->desc_offset;
1074 }
1075
1076 default:
1077 unreachable("Unknown descriptor set type");
1078 }
1079 }
1080
1081 void
nvk_cmd_buffer_dump(struct nvk_cmd_buffer * cmd,FILE * fp)1082 nvk_cmd_buffer_dump(struct nvk_cmd_buffer *cmd, FILE *fp)
1083 {
1084 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1085 struct nvk_physical_device *pdev = nvk_device_physical(dev);
1086
1087 util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, p) {
1088 if (p->map) {
1089 struct nv_push push = {
1090 .start = (uint32_t *)p->map,
1091 .end = (uint32_t *)((char *)p->map + p->range),
1092 };
1093 vk_push_print(fp, &push, &pdev->info);
1094 } else {
1095 const uint64_t addr = p->addr;
1096 fprintf(fp, "<%u B of INDIRECT DATA at 0x%" PRIx64 ">\n",
1097 p->range, addr);
1098 }
1099 }
1100 }
1101
1102 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pPushDescriptorSetWithTemplateInfo)1103 nvk_CmdPushDescriptorSetWithTemplate2KHR(
1104 VkCommandBuffer commandBuffer,
1105 const VkPushDescriptorSetWithTemplateInfoKHR *pPushDescriptorSetWithTemplateInfo)
1106 {
1107 VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
1108 struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1109 VK_FROM_HANDLE(vk_descriptor_update_template, template,
1110 pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
1111 VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
1112 pPushDescriptorSetWithTemplateInfo->layout);
1113 const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
1114
1115 struct nvk_descriptor_state *desc =
1116 nvk_get_descriptors_state(cmd, template->bind_point);
1117 struct nvk_push_descriptor_set *push_set =
1118 nvk_cmd_push_descriptors(cmd, desc, set);
1119 if (unlikely(push_set == NULL))
1120 return;
1121
1122 struct nvk_descriptor_set_layout *set_layout =
1123 vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
1124
1125 nvk_push_descriptor_set_update_template(dev, push_set, set_layout, template,
1126 pPushDescriptorSetWithTemplateInfo->pData);
1127
1128 /* We don't know the actual set of stages here so assume everything */
1129 nvk_cmd_dirty_cbufs_for_descriptors(cmd, NVK_VK_GRAPHICS_STAGE_BITS |
1130 VK_SHADER_STAGE_COMPUTE_BIT,
1131 set, set + 1, 0, 0);
1132 }
1133