xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_cmd_buffer.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_pool.h"
9 #include "nvk_descriptor_set_layout.h"
10 #include "nvk_device.h"
11 #include "nvk_device_memory.h"
12 #include "nvk_entrypoints.h"
13 #include "nvk_mme.h"
14 #include "nvk_physical_device.h"
15 #include "nvk_shader.h"
16 #include "nvkmd/nvkmd.h"
17 
18 #include "vk_pipeline_layout.h"
19 #include "vk_synchronization.h"
20 
21 #include "nv_push_cl906f.h"
22 #include "nv_push_cl90b5.h"
23 #include "nv_push_cla097.h"
24 #include "nv_push_cla0c0.h"
25 #include "nv_push_clc597.h"
26 
27 static void
nvk_descriptor_state_fini(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)28 nvk_descriptor_state_fini(struct nvk_cmd_buffer *cmd,
29                           struct nvk_descriptor_state *desc)
30 {
31    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
32 
33    for (unsigned i = 0; i < NVK_MAX_SETS; i++) {
34       vk_free(&pool->vk.alloc, desc->sets[i].push);
35       desc->sets[i].push = NULL;
36    }
37 }
38 
39 static void
nvk_destroy_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer)40 nvk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
41 {
42    struct nvk_cmd_buffer *cmd =
43       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
44    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
45 
46    nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
47    nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
48 
49    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
50    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_gart_mem);
51    util_dynarray_fini(&cmd->pushes);
52    vk_command_buffer_finish(&cmd->vk);
53    vk_free(&pool->vk.alloc, cmd);
54 }
55 
56 static VkResult
nvk_create_cmd_buffer(struct vk_command_pool * vk_pool,VkCommandBufferLevel level,struct vk_command_buffer ** cmd_buffer_out)57 nvk_create_cmd_buffer(struct vk_command_pool *vk_pool,
58                       VkCommandBufferLevel level,
59                       struct vk_command_buffer **cmd_buffer_out)
60 {
61    struct nvk_cmd_pool *pool = container_of(vk_pool, struct nvk_cmd_pool, vk);
62    struct nvk_device *dev = nvk_cmd_pool_device(pool);
63    struct nvk_cmd_buffer *cmd;
64    VkResult result;
65 
66    cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
67                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
68    if (cmd == NULL)
69       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
70 
71    result = vk_command_buffer_init(&pool->vk, &cmd->vk,
72                                    &nvk_cmd_buffer_ops, level);
73    if (result != VK_SUCCESS) {
74       vk_free(&pool->vk.alloc, cmd);
75       return result;
76    }
77 
78    cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
79    cmd->vk.dynamic_graphics_state.ms.sample_locations =
80       &cmd->state.gfx._dynamic_sl;
81 
82    list_inithead(&cmd->owned_mem);
83    list_inithead(&cmd->owned_gart_mem);
84    util_dynarray_init(&cmd->pushes, NULL);
85 
86    *cmd_buffer_out = &cmd->vk;
87 
88    return VK_SUCCESS;
89 }
90 
91 static void
nvk_reset_cmd_buffer(struct vk_command_buffer * vk_cmd_buffer,UNUSED VkCommandBufferResetFlags flags)92 nvk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
93                      UNUSED VkCommandBufferResetFlags flags)
94 {
95    struct nvk_cmd_buffer *cmd =
96       container_of(vk_cmd_buffer, struct nvk_cmd_buffer, vk);
97    struct nvk_cmd_pool *pool = nvk_cmd_buffer_pool(cmd);
98 
99    vk_command_buffer_reset(&cmd->vk);
100 
101    nvk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
102    nvk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
103 
104    nvk_cmd_pool_free_mem_list(pool, &cmd->owned_mem);
105    nvk_cmd_pool_free_gart_mem_list(pool, &cmd->owned_gart_mem);
106    cmd->upload_mem = NULL;
107    cmd->push_mem = NULL;
108    cmd->push_mem_limit = NULL;
109    cmd->push = (struct nv_push) {0};
110 
111    util_dynarray_clear(&cmd->pushes);
112 
113    memset(&cmd->state, 0, sizeof(cmd->state));
114 }
115 
116 const struct vk_command_buffer_ops nvk_cmd_buffer_ops = {
117    .create = nvk_create_cmd_buffer,
118    .reset = nvk_reset_cmd_buffer,
119    .destroy = nvk_destroy_cmd_buffer,
120 };
121 
122 /* If we ever fail to allocate a push, we use this */
123 static uint32_t push_runout[NVK_CMD_BUFFER_MAX_PUSH];
124 
125 static VkResult
nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer * cmd,bool force_gart,struct nvk_cmd_mem ** mem_out)126 nvk_cmd_buffer_alloc_mem(struct nvk_cmd_buffer *cmd, bool force_gart,
127                          struct nvk_cmd_mem **mem_out)
128 {
129    VkResult result = nvk_cmd_pool_alloc_mem(nvk_cmd_buffer_pool(cmd),
130                                             force_gart, mem_out);
131    if (result != VK_SUCCESS)
132       return result;
133 
134    if (force_gart)
135       list_addtail(&(*mem_out)->link, &cmd->owned_gart_mem);
136    else
137       list_addtail(&(*mem_out)->link, &cmd->owned_mem);
138 
139    return VK_SUCCESS;
140 }
141 
142 static void
nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer * cmd)143 nvk_cmd_buffer_flush_push(struct nvk_cmd_buffer *cmd)
144 {
145    if (likely(cmd->push_mem != NULL)) {
146       const uint32_t mem_offset =
147          (char *)cmd->push.start - (char *)cmd->push_mem->mem->map;
148 
149       struct nvk_cmd_push push = {
150          .map = cmd->push.start,
151          .addr = cmd->push_mem->mem->va->addr + mem_offset,
152          .range = nv_push_dw_count(&cmd->push) * 4,
153       };
154       util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
155    }
156 
157    cmd->push.start = cmd->push.end;
158 }
159 
160 void
nvk_cmd_buffer_new_push(struct nvk_cmd_buffer * cmd)161 nvk_cmd_buffer_new_push(struct nvk_cmd_buffer *cmd)
162 {
163    nvk_cmd_buffer_flush_push(cmd);
164 
165    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &cmd->push_mem);
166    if (unlikely(result != VK_SUCCESS)) {
167       STATIC_ASSERT(NVK_CMD_BUFFER_MAX_PUSH <= NVK_CMD_MEM_SIZE / 4);
168       cmd->push_mem = NULL;
169       nv_push_init(&cmd->push, push_runout, 0);
170       cmd->push_mem_limit = &push_runout[NVK_CMD_BUFFER_MAX_PUSH];
171    } else {
172       nv_push_init(&cmd->push, cmd->push_mem->mem->map, 0);
173       cmd->push_mem_limit =
174          (uint32_t *)((char *)cmd->push_mem->mem->map + NVK_CMD_MEM_SIZE);
175    }
176 }
177 
178 void
nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer * cmd,uint64_t addr,uint32_t range)179 nvk_cmd_buffer_push_indirect(struct nvk_cmd_buffer *cmd,
180                              uint64_t addr, uint32_t range)
181 {
182    nvk_cmd_buffer_flush_push(cmd);
183 
184    struct nvk_cmd_push push = {
185       .addr = addr,
186       .range = range,
187       .no_prefetch = true,
188    };
189 
190    util_dynarray_append(&cmd->pushes, struct nvk_cmd_push, push);
191 }
192 
193 VkResult
nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer * cmd,uint32_t size,uint32_t alignment,uint64_t * addr,void ** ptr)194 nvk_cmd_buffer_upload_alloc(struct nvk_cmd_buffer *cmd,
195                             uint32_t size, uint32_t alignment,
196                             uint64_t *addr, void **ptr)
197 {
198    assert(size % 4 == 0);
199    assert(size <= NVK_CMD_MEM_SIZE);
200 
201    uint32_t offset = cmd->upload_offset;
202    if (alignment > 0)
203       offset = align(offset, alignment);
204 
205    assert(offset <= NVK_CMD_MEM_SIZE);
206    if (cmd->upload_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
207       *addr = cmd->upload_mem->mem->va->addr + offset;
208       *ptr = (char *)cmd->upload_mem->mem->map + offset;
209 
210       cmd->upload_offset = offset + size;
211 
212       return VK_SUCCESS;
213    }
214 
215    struct nvk_cmd_mem *mem;
216    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, false, &mem);
217    if (unlikely(result != VK_SUCCESS))
218       return result;
219 
220    *addr = mem->mem->va->addr;
221    *ptr = mem->mem->map;
222 
223    /* Pick whichever of the current upload BO and the new BO will have more
224     * room left to be the BO for the next upload.  If our upload size is
225     * bigger than the old offset, we're better off burning the whole new
226     * upload BO on this one allocation and continuing on the current upload
227     * BO.
228     */
229    if (cmd->upload_mem == NULL || size < cmd->upload_offset) {
230       cmd->upload_mem = mem;
231       cmd->upload_offset = size;
232    }
233 
234    return VK_SUCCESS;
235 }
236 
237 VkResult
nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer * cmd,const void * data,uint32_t size,uint32_t alignment,uint64_t * addr)238 nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd,
239                            const void *data, uint32_t size,
240                            uint32_t alignment, uint64_t *addr)
241 {
242    VkResult result;
243    void *map;
244 
245    result = nvk_cmd_buffer_upload_alloc(cmd, size, alignment, addr, &map);
246    if (unlikely(result != VK_SUCCESS))
247       return result;
248 
249    memcpy(map, data, size);
250 
251    return VK_SUCCESS;
252 }
253 
254 VkResult
nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer * cmd,uint64_t * addr)255 nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
256                                  uint64_t *addr)
257 {
258    uint32_t offset = cmd->cond_render_gart_offset;
259    uint32_t size = 64;
260 
261    assert(offset <= NVK_CMD_MEM_SIZE);
262    if (cmd->cond_render_gart_mem != NULL && size <= NVK_CMD_MEM_SIZE - offset) {
263       *addr = cmd->cond_render_gart_mem->mem->va->addr + offset;
264 
265       cmd->cond_render_gart_offset = offset + size;
266 
267       return VK_SUCCESS;
268    }
269 
270    struct nvk_cmd_mem *mem;
271    VkResult result = nvk_cmd_buffer_alloc_mem(cmd, true, &mem);
272    if (unlikely(result != VK_SUCCESS))
273       return result;
274 
275    *addr = mem->mem->va->addr;
276 
277    /* Pick whichever of the current upload BO and the new BO will have more
278     * room left to be the BO for the next upload.  If our upload size is
279     * bigger than the old offset, we're better off burning the whole new
280     * upload BO on this one allocation and continuing on the current upload
281     * BO.
282     */
283    if (cmd->cond_render_gart_mem == NULL || size < cmd->cond_render_gart_offset) {
284       cmd->cond_render_gart_mem = mem;
285       cmd->cond_render_gart_offset = size;
286    }
287 
288    return VK_SUCCESS;
289 }
290 
291 VKAPI_ATTR VkResult VKAPI_CALL
nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,const VkCommandBufferBeginInfo * pBeginInfo)292 nvk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
293                        const VkCommandBufferBeginInfo *pBeginInfo)
294 {
295    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
296 
297    nvk_reset_cmd_buffer(&cmd->vk, 0);
298 
299    /* Start with a nop so we have at least something to submit */
300    struct nv_push *p = nvk_cmd_buffer_push(cmd, 2);
301    P_MTHD(p, NV90B5, NOP);
302    P_NV90B5_NOP(p, 0);
303 
304    nvk_cmd_buffer_begin_compute(cmd, pBeginInfo);
305    nvk_cmd_buffer_begin_graphics(cmd, pBeginInfo);
306 
307    return VK_SUCCESS;
308 }
309 
310 VKAPI_ATTR VkResult VKAPI_CALL
nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)311 nvk_EndCommandBuffer(VkCommandBuffer commandBuffer)
312 {
313    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
314 
315    nvk_cmd_buffer_flush_push(cmd);
316 
317    return vk_command_buffer_get_record_result(&cmd->vk);
318 }
319 
320 VKAPI_ATTR void VKAPI_CALL
nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,uint32_t commandBufferCount,const VkCommandBuffer * pCommandBuffers)321 nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
322                        uint32_t commandBufferCount,
323                        const VkCommandBuffer *pCommandBuffers)
324 {
325    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
326 
327    if (commandBufferCount == 0)
328       return;
329 
330    nvk_cmd_buffer_flush_push(cmd);
331 
332    for (uint32_t i = 0; i < commandBufferCount; i++) {
333       VK_FROM_HANDLE(nvk_cmd_buffer, other, pCommandBuffers[i]);
334 
335       /* We only need to copy the pushes.  We do not copy the
336        * nvk_cmd_buffer::bos because that tracks ownership.  Instead, we
337        * depend on the app to not discard secondaries while they are used by a
338        * primary.  The Vulkan 1.3.227 spec for vkFreeCommandBuffers() says:
339        *
340        *    "Any primary command buffer that is in the recording or executable
341        *    state and has any element of pCommandBuffers recorded into it,
342        *    becomes invalid."
343        *
344        * In other words, if the secondary command buffer ever goes away, this
345        * command buffer is invalid and the only thing the client can validly
346        * do with it is reset it.  vkResetCommandPool() has similar language.
347        */
348       util_dynarray_append_dynarray(&cmd->pushes, &other->pushes);
349    }
350 
351    /* From the Vulkan 1.3.275 spec:
352     *
353     *    "When secondary command buffer(s) are recorded to execute on a
354     *    primary command buffer, the secondary command buffer inherits no
355     *    state from the primary command buffer, and all state of the primary
356     *    command buffer is undefined after an execute secondary command buffer
357     *    command is recorded. There is one exception to this rule - if the
358     *    primary command buffer is inside a render pass instance, then the
359     *    render pass and subpass state is not disturbed by executing secondary
360     *    command buffers. For state dependent commands (such as draws and
361     *    dispatches), any state consumed by those commands must not be
362     *    undefined."
363     *
364     * Therefore, it's the client's job to reset all the state in the primary
365     * after the secondary executes.  However, if we're doing any internal
366     * dirty tracking, we may miss the fact that a secondary has messed with
367     * GPU state if we don't invalidate all our internal tracking.
368     */
369    nvk_cmd_invalidate_graphics_state(cmd);
370    nvk_cmd_invalidate_compute_state(cmd);
371 }
372 
373 enum nvk_barrier {
374    NVK_BARRIER_RENDER_WFI              = 1 << 0,
375    NVK_BARRIER_COMPUTE_WFI             = 1 << 1,
376    NVK_BARRIER_FLUSH_SHADER_DATA       = 1 << 2,
377    NVK_BARRIER_INVALIDATE_SHADER_DATA  = 1 << 3,
378    NVK_BARRIER_INVALIDATE_TEX_DATA     = 1 << 4,
379    NVK_BARRIER_INVALIDATE_CONSTANT     = 1 << 5,
380    NVK_BARRIER_INVALIDATE_MME_DATA     = 1 << 6,
381 };
382 
383 static enum nvk_barrier
nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,VkAccessFlags2 access)384 nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
385                           VkAccessFlags2 access)
386 {
387    stages = vk_expand_src_stage_flags2(stages);
388    access = vk_filter_src_access_flags2(stages, access);
389 
390    enum nvk_barrier barriers = 0;
391 
392    if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
393       barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
394 
395       if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
396          barriers |= NVK_BARRIER_RENDER_WFI;
397 
398       if (vk_pipeline_stage_flags2_has_compute_shader(stages))
399          barriers |= NVK_BARRIER_COMPUTE_WFI;
400    }
401 
402    if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
403                  VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
404                  VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
405       barriers |= NVK_BARRIER_RENDER_WFI;
406 
407    if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
408        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
409                   VK_PIPELINE_STAGE_2_BLIT_BIT |
410                   VK_PIPELINE_STAGE_2_CLEAR_BIT)))
411       barriers |= NVK_BARRIER_RENDER_WFI;
412 
413    return barriers;
414 }
415 
416 static enum nvk_barrier
nvk_barrier_invalidates(VkPipelineStageFlags2 stages,VkAccessFlags2 access)417 nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
418                         VkAccessFlags2 access)
419 {
420    stages = vk_expand_dst_stage_flags2(stages);
421    access = vk_filter_dst_access_flags2(stages, access);
422 
423    enum nvk_barrier barriers = 0;
424 
425    if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
426                  VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
427                  VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT |
428                  VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
429       barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
430 
431    if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
432                  VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
433       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
434                   NVK_BARRIER_INVALIDATE_CONSTANT;
435 
436    if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
437                  VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
438       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
439 
440    if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
441       barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
442 
443    if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
444        (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
445                   VK_PIPELINE_STAGE_2_BLIT_BIT)))
446       barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
447 
448    return barriers;
449 }
450 
451 void
nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer * cmd,const VkDependencyInfo * dep,bool wait)452 nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
453                        const VkDependencyInfo *dep,
454                        bool wait)
455 {
456    enum nvk_barrier barriers = 0;
457 
458    for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
459       const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
460       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
461                                             bar->srcAccessMask);
462    }
463 
464    for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
465       const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
466       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
467                                             bar->srcAccessMask);
468    }
469 
470    for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
471       const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
472       barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
473                                             bar->srcAccessMask);
474    }
475 
476    if (!barriers)
477       return;
478 
479    struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
480 
481    if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
482       assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
483       if (barriers & NVK_BARRIER_RENDER_WFI) {
484          P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
485             .data = DATA_TRUE,
486             .flush_data = FLUSH_DATA_TRUE,
487          });
488       }
489 
490       if (barriers & NVK_BARRIER_COMPUTE_WFI) {
491          P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
492             .data = DATA_TRUE,
493             .flush_data = FLUSH_DATA_TRUE,
494          });
495       }
496    } else if (barriers & NVK_BARRIER_RENDER_WFI) {
497       /* If this comes from a vkCmdSetEvent, we don't need to wait */
498       if (wait)
499          P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
500    } else {
501       /* Compute WFI only happens when shader data is flushed */
502       assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
503    }
504 }
505 
506 void
nvk_cmd_invalidate_deps(struct nvk_cmd_buffer * cmd,uint32_t dep_count,const VkDependencyInfo * deps)507 nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
508                         uint32_t dep_count,
509                         const VkDependencyInfo *deps)
510 {
511    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
512    struct nvk_physical_device *pdev = nvk_device_physical(dev);
513 
514    enum nvk_barrier barriers = 0;
515 
516    for (uint32_t d = 0; d < dep_count; d++) {
517       const VkDependencyInfo *dep = &deps[d];
518 
519       for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
520          const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
521          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
522                                              bar->dstAccessMask);
523       }
524 
525       for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
526          const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
527          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
528                                              bar->dstAccessMask);
529       }
530 
531       for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
532          const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
533          barriers |= nvk_barrier_invalidates(bar->dstStageMask,
534                                              bar->dstAccessMask);
535       }
536    }
537 
538    if (!barriers)
539       return;
540 
541    struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
542 
543    if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
544       P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
545          .lines = LINES_ALL,
546       });
547    }
548 
549    if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
550                    NVK_BARRIER_INVALIDATE_CONSTANT)) {
551       P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
552          .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
553          .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
554       });
555    }
556 
557    if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
558       __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
559 
560       if (pdev->info.cls_eng3d >= TURING_A)
561          P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
562    }
563 }
564 
565 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,const VkDependencyInfo * pDependencyInfo)566 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
567                         const VkDependencyInfo *pDependencyInfo)
568 {
569    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
570 
571    nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
572    nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
573 }
574 
575 void
nvk_cmd_bind_shaders(struct vk_command_buffer * vk_cmd,uint32_t stage_count,const gl_shader_stage * stages,struct vk_shader ** const shaders)576 nvk_cmd_bind_shaders(struct vk_command_buffer *vk_cmd,
577                      uint32_t stage_count,
578                      const gl_shader_stage *stages,
579                      struct vk_shader ** const shaders)
580 {
581    struct nvk_cmd_buffer *cmd = container_of(vk_cmd, struct nvk_cmd_buffer, vk);
582    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
583 
584    for (uint32_t i = 0; i < stage_count; i++) {
585       struct nvk_shader *shader =
586          container_of(shaders[i], struct nvk_shader, vk);
587 
588       if (shader != NULL) {
589          nvk_device_ensure_slm(dev, shader->info.slm_size,
590                                     shader->info.crs_size);
591       }
592 
593       if (stages[i] == MESA_SHADER_COMPUTE ||
594           stages[i] == MESA_SHADER_KERNEL)
595          nvk_cmd_bind_compute_shader(cmd, shader);
596       else
597          nvk_cmd_bind_graphics_shader(cmd, stages[i], shader);
598    }
599 }
600 
601 #define NVK_VK_GRAPHICS_STAGE_BITS VK_SHADER_STAGE_ALL_GRAPHICS
602 
603 void
nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer * cmd,VkShaderStageFlags stages,uint32_t sets_start,uint32_t sets_end,uint32_t dyn_start,uint32_t dyn_end)604 nvk_cmd_dirty_cbufs_for_descriptors(struct nvk_cmd_buffer *cmd,
605                                     VkShaderStageFlags stages,
606                                     uint32_t sets_start, uint32_t sets_end,
607                                     uint32_t dyn_start, uint32_t dyn_end)
608 {
609    if (!(stages & NVK_VK_GRAPHICS_STAGE_BITS))
610       return;
611 
612    uint32_t groups = 0;
613    u_foreach_bit(i, stages & NVK_VK_GRAPHICS_STAGE_BITS) {
614       gl_shader_stage stage = vk_to_mesa_shader_stage(1 << i);
615       uint32_t g = nvk_cbuf_binding_for_stage(stage);
616       groups |= BITFIELD_BIT(g);
617    }
618 
619    u_foreach_bit(g, groups) {
620       struct nvk_cbuf_group *group = &cmd->state.gfx.cbuf_groups[g];
621 
622       for (uint32_t i = 0; i < ARRAY_SIZE(group->cbufs); i++) {
623          const struct nvk_cbuf *cbuf = &group->cbufs[i];
624          switch (cbuf->type) {
625          case NVK_CBUF_TYPE_INVALID:
626          case NVK_CBUF_TYPE_ROOT_DESC:
627          case NVK_CBUF_TYPE_SHADER_DATA:
628             break;
629 
630          case NVK_CBUF_TYPE_DESC_SET:
631          case NVK_CBUF_TYPE_UBO_DESC:
632             if (cbuf->desc_set >= sets_start && cbuf->desc_set < sets_end)
633                group->dirty |= BITFIELD_BIT(i);
634             break;
635 
636          case NVK_CBUF_TYPE_DYNAMIC_UBO:
637             if (cbuf->dynamic_idx >= dyn_start && cbuf->dynamic_idx < dyn_end)
638                group->dirty |= BITFIELD_BIT(i);
639             break;
640 
641          default:
642             unreachable("Invalid cbuf type");
643          }
644       }
645    }
646 }
647 
648 static void
nvk_bind_descriptor_sets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorSetsInfoKHR * info)649 nvk_bind_descriptor_sets(struct nvk_cmd_buffer *cmd,
650                          struct nvk_descriptor_state *desc,
651                          const VkBindDescriptorSetsInfoKHR *info)
652 {
653    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
654    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
655    struct nvk_physical_device *pdev = nvk_device_physical(dev);
656 
657    union nvk_buffer_descriptor dynamic_buffers[NVK_MAX_DYNAMIC_BUFFERS];
658    uint8_t set_dynamic_buffer_start[NVK_MAX_SETS];
659 
660    /* Read off the current dynamic buffer start array so we can use it to
661     * determine where we should start binding dynamic buffers.
662     */
663    nvk_descriptor_state_get_root_array(desc, set_dynamic_buffer_start,
664                                        0, NVK_MAX_SETS,
665                                        set_dynamic_buffer_start);
666 
667    /* Fro the Vulkan 1.3.275 spec:
668     *
669     *    "When binding a descriptor set (see Descriptor Set Binding) to
670     *    set number N...
671     *
672     *    If, additionally, the previously bound descriptor set for set
673     *    N was bound using a pipeline layout not compatible for set N,
674     *    then all bindings in sets numbered greater than N are
675     *    disturbed."
676     *
677     * This means that, if some earlier set gets bound in such a way that
678     * it changes set_dynamic_buffer_start[s], this binding is implicitly
679     * invalidated.  Therefore, we can always look at the current value
680     * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
681     * range and it's only our responsibility to adjust all
682     * set_dynamic_buffer_start[p] for p > s as needed.
683     */
684    const uint8_t dyn_buffer_start = set_dynamic_buffer_start[info->firstSet];
685    uint8_t dyn_buffer_end = dyn_buffer_start;
686 
687    uint32_t next_dyn_offset = 0;
688    for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
689       unsigned s = i + info->firstSet;
690       VK_FROM_HANDLE(nvk_descriptor_set, set, info->pDescriptorSets[i]);
691 
692       if (desc->sets[s].type != NVK_DESCRIPTOR_SET_TYPE_SET ||
693           desc->sets[s].set != set) {
694          struct nvk_buffer_address set_addr;
695          if (set != NULL) {
696             desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_SET;
697             desc->sets[s].set = set;
698             set_addr = nvk_descriptor_set_addr(set);
699          } else {
700             desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_NONE;
701             desc->sets[s].set = NULL;
702             set_addr = NVK_BUFFER_ADDRESS_NULL;
703          }
704          nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
705       }
706 
707       set_dynamic_buffer_start[s] = dyn_buffer_end;
708 
709       if (pipeline_layout->set_layouts[s] != NULL) {
710          const struct nvk_descriptor_set_layout *set_layout =
711             vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
712 
713          if (set != NULL && set_layout->dynamic_buffer_count > 0) {
714             for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
715                union nvk_buffer_descriptor db = set->dynamic_buffers[j];
716                uint32_t offset = info->pDynamicOffsets[next_dyn_offset + j];
717                if (BITSET_TEST(set_layout->dynamic_ubos, j) &&
718                    nvk_use_bindless_cbuf(&pdev->info)) {
719                   assert((offset & 0xf) == 0);
720                   db.cbuf.base_addr_shift_4 += offset >> 4;
721                } else {
722                   db.addr.base_addr += offset;
723                }
724                dynamic_buffers[dyn_buffer_end + j] = db;
725             }
726             next_dyn_offset += set->layout->dynamic_buffer_count;
727          }
728 
729          dyn_buffer_end += set_layout->dynamic_buffer_count;
730       } else {
731          assert(set == NULL);
732       }
733    }
734    assert(dyn_buffer_end <= NVK_MAX_DYNAMIC_BUFFERS);
735    assert(next_dyn_offset <= info->dynamicOffsetCount);
736 
737    nvk_descriptor_state_set_root_array(cmd, desc, dynamic_buffers,
738                                        dyn_buffer_start, dyn_buffer_end,
739                                        &dynamic_buffers[dyn_buffer_start]);
740 
741    /* We need to set everything above first_set because later calls to
742     * nvk_bind_descriptor_sets() depend on it for knowing where to start and
743     * they may not be called on the next consecutive set.
744     */
745    for (uint32_t s = info->firstSet + info->descriptorSetCount;
746         s < NVK_MAX_SETS; s++)
747       set_dynamic_buffer_start[s] = dyn_buffer_end;
748 
749    /* We need to at least sync everything from first_set to NVK_MAX_SETS.
750     * However, we only save anything if firstSet >= 4 so we may as well sync
751     * everything just to be safe.
752     */
753    nvk_descriptor_state_set_root_array(cmd, desc, set_dynamic_buffer_start,
754                                        0, NVK_MAX_SETS,
755                                        set_dynamic_buffer_start);
756 
757    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags, info->firstSet,
758                                        info->firstSet + info->descriptorSetCount,
759                                        dyn_buffer_start, dyn_buffer_end);
760 }
761 
762 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,const VkBindDescriptorSetsInfoKHR * pBindDescriptorSetsInfo)763 nvk_CmdBindDescriptorSets2KHR(VkCommandBuffer commandBuffer,
764                               const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
765 {
766    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
767 
768    if (pBindDescriptorSetsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
769       nvk_bind_descriptor_sets(cmd, &cmd->state.gfx.descriptors,
770                                pBindDescriptorSetsInfo);
771    }
772 
773    if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
774       nvk_bind_descriptor_sets(cmd, &cmd->state.cs.descriptors,
775                                pBindDescriptorSetsInfo);
776    }
777 }
778 
779 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,uint32_t bufferCount,const VkDescriptorBufferBindingInfoEXT * pBindingInfos)780 nvk_CmdBindDescriptorBuffersEXT(VkCommandBuffer commandBuffer,
781                                 uint32_t bufferCount,
782                                 const VkDescriptorBufferBindingInfoEXT *pBindingInfos)
783 {
784    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
785 
786    for (uint32_t i = 0; i < bufferCount; i++)
787       cmd->state.descriptor_buffers[i] = pBindingInfos[i].address;
788 }
789 
790 static void
nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkSetDescriptorBufferOffsetsInfoEXT * info)791 nvk_set_descriptor_buffer_offsets(struct nvk_cmd_buffer *cmd,
792                                   struct nvk_descriptor_state *desc,
793                                   const VkSetDescriptorBufferOffsetsInfoEXT *info)
794 {
795    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
796 
797    for (uint32_t i = 0; i < info->setCount; ++i) {
798       const uint32_t s = i + info->firstSet;
799 
800       desc->sets[s].type = NVK_DESCRIPTOR_SET_TYPE_BUFFER;
801       desc->sets[s].set = NULL;
802 
803       struct nvk_buffer_address set_addr;
804       if (pipeline_layout->set_layouts[s] != NULL) {
805          const struct nvk_descriptor_set_layout *set_layout =
806             vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
807          assert(set_layout->flags &
808                 VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT);
809 
810          const uint64_t buffer_base_addr =
811             cmd->state.descriptor_buffers[info->pBufferIndices[i]];
812 
813          set_addr = (struct nvk_buffer_address) {
814             .base_addr = buffer_base_addr + info->pOffsets[i],
815             .size = set_layout->max_buffer_size,
816          };
817       } else {
818          set_addr = NVK_BUFFER_ADDRESS_NULL;
819       }
820       nvk_descriptor_state_set_root(cmd, desc, sets[s], set_addr);
821    }
822 
823    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
824                                        info->firstSet,
825                                        info->firstSet + info->setCount,
826                                        0, 0);
827 }
828 
829 VKAPI_ATTR void VKAPI_CALL
nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,const VkSetDescriptorBufferOffsetsInfoEXT * pInfo)830 nvk_CmdSetDescriptorBufferOffsets2EXT(VkCommandBuffer commandBuffer,
831                                       const VkSetDescriptorBufferOffsetsInfoEXT *pInfo)
832 {
833    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
834 
835    if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
836       nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.gfx.descriptors,
837                                         pInfo);
838    }
839 
840    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
841       nvk_set_descriptor_buffer_offsets(cmd, &cmd->state.cs.descriptors,
842                                         pInfo);
843    }
844 }
845 
846 static void
nvk_bind_embedded_samplers(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * info)847 nvk_bind_embedded_samplers(struct nvk_cmd_buffer *cmd,
848                            struct nvk_descriptor_state *desc,
849                            const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *info)
850 {
851    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
852    const struct nvk_descriptor_set_layout *set_layout =
853       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
854 
855    struct nvk_buffer_address set_addr = {
856       .base_addr = set_layout->embedded_samplers_addr,
857       .size = set_layout->non_variable_descriptor_buffer_size,
858    };
859    nvk_descriptor_state_set_root(cmd, desc, sets[info->set], set_addr);
860 }
861 
862 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(VkCommandBuffer commandBuffer,const VkBindDescriptorBufferEmbeddedSamplersInfoEXT * pInfo)863 nvk_CmdBindDescriptorBufferEmbeddedSamplers2EXT(
864     VkCommandBuffer commandBuffer,
865     const VkBindDescriptorBufferEmbeddedSamplersInfoEXT *pInfo)
866 {
867    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
868 
869    if (pInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
870       nvk_bind_embedded_samplers(cmd, &cmd->state.gfx.descriptors, pInfo);
871    }
872 
873    if (pInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
874       nvk_bind_embedded_samplers(cmd, &cmd->state.cs.descriptors, pInfo);
875    }
876 }
877 
878 static void
nvk_push_constants(UNUSED struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushConstantsInfoKHR * info)879 nvk_push_constants(UNUSED struct nvk_cmd_buffer *cmd,
880                    struct nvk_descriptor_state *desc,
881                    const VkPushConstantsInfoKHR *info)
882 {
883    nvk_descriptor_state_set_root_array(cmd, desc, push,
884                                        info->offset, info->size,
885                                        (char *)info->pValues);
886 }
887 
888 
889 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,const VkPushConstantsInfoKHR * pPushConstantsInfo)890 nvk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
891                          const VkPushConstantsInfoKHR *pPushConstantsInfo)
892 {
893    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
894 
895    if (pPushConstantsInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS)
896       nvk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
897 
898    if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
899       nvk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
900 }
901 
902 static struct nvk_push_descriptor_set *
nvk_cmd_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,uint32_t set)903 nvk_cmd_push_descriptors(struct nvk_cmd_buffer *cmd,
904                          struct nvk_descriptor_state *desc,
905                          uint32_t set)
906 {
907    assert(set < NVK_MAX_SETS);
908    if (unlikely(desc->sets[set].push == NULL)) {
909       desc->sets[set].push = vk_zalloc(&cmd->vk.pool->alloc,
910                                        sizeof(*desc->sets[set].push), 8,
911                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
912       if (unlikely(desc->sets[set].push == NULL)) {
913          vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
914          return NULL;
915       }
916    }
917 
918    /* Pushing descriptors replaces whatever sets are bound */
919    desc->sets[set].type = NVK_DESCRIPTOR_SET_TYPE_PUSH;
920    desc->sets[set].set = NULL;
921    desc->push_dirty |= BITFIELD_BIT(set);
922 
923    return desc->sets[set].push;
924 }
925 
926 static void
nvk_push_descriptor_set(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)927 nvk_push_descriptor_set(struct nvk_cmd_buffer *cmd,
928                         struct nvk_descriptor_state *desc,
929                         const VkPushDescriptorSetInfoKHR *info)
930 {
931    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
932    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
933 
934    struct nvk_push_descriptor_set *push_set =
935       nvk_cmd_push_descriptors(cmd, desc, info->set);
936    if (unlikely(push_set == NULL))
937       return;
938 
939    struct nvk_descriptor_set_layout *set_layout =
940       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
941 
942    nvk_push_descriptor_set_update(dev, push_set, set_layout,
943                                   info->descriptorWriteCount,
944                                   info->pDescriptorWrites);
945 
946    nvk_cmd_dirty_cbufs_for_descriptors(cmd, info->stageFlags,
947                                        info->set, info->set + 1, 0, 0);
948 }
949 
950 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetInfoKHR * pPushDescriptorSetInfo)951 nvk_CmdPushDescriptorSet2KHR(VkCommandBuffer commandBuffer,
952                              const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
953 {
954    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
955 
956    if (pPushDescriptorSetInfo->stageFlags & NVK_VK_GRAPHICS_STAGE_BITS) {
957       nvk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
958                               pPushDescriptorSetInfo);
959    }
960 
961    if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
962       nvk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
963                               pPushDescriptorSetInfo);
964    }
965 }
966 
967 void
nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer * cmd,struct nvk_descriptor_state * desc)968 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
969                                       struct nvk_descriptor_state *desc)
970 {
971    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
972    struct nvk_physical_device *pdev = nvk_device_physical(dev);
973    const uint32_t min_cbuf_alignment = nvk_min_cbuf_alignment(&pdev->info);
974    VkResult result;
975 
976    u_foreach_bit(set_idx, desc->push_dirty) {
977       if (desc->sets[set_idx].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
978          continue;
979 
980       struct nvk_push_descriptor_set *push_set = desc->sets[set_idx].push;
981       uint64_t push_set_addr;
982       result = nvk_cmd_buffer_upload_data(cmd, push_set->data,
983                                           sizeof(push_set->data),
984                                           min_cbuf_alignment,
985                                           &push_set_addr);
986       if (unlikely(result != VK_SUCCESS)) {
987          vk_command_buffer_set_error(&cmd->vk, result);
988          return;
989       }
990 
991       struct nvk_buffer_address set_addr = {
992          .base_addr = push_set_addr,
993          .size = sizeof(push_set->data),
994       };
995       nvk_descriptor_state_set_root(cmd, desc, sets[set_idx], set_addr);
996    }
997 }
998 
999 bool
nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_shader * shader,const struct nvk_cbuf * cbuf,struct nvk_buffer_address * addr_out)1000 nvk_cmd_buffer_get_cbuf_addr(struct nvk_cmd_buffer *cmd,
1001                              const struct nvk_descriptor_state *desc,
1002                              const struct nvk_shader *shader,
1003                              const struct nvk_cbuf *cbuf,
1004                              struct nvk_buffer_address *addr_out)
1005 {
1006    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1007    struct nvk_physical_device *pdev = nvk_device_physical(dev);
1008 
1009    switch (cbuf->type) {
1010    case NVK_CBUF_TYPE_INVALID:
1011       *addr_out = (struct nvk_buffer_address) { .size = 0 };
1012       return true;
1013 
1014    case NVK_CBUF_TYPE_ROOT_DESC:
1015       unreachable("The caller should handle root descriptors");
1016       return false;
1017 
1018    case NVK_CBUF_TYPE_SHADER_DATA:
1019       *addr_out = (struct nvk_buffer_address) {
1020          .base_addr = shader->data_addr,
1021          .size = shader->data_size,
1022       };
1023       return true;
1024 
1025    case NVK_CBUF_TYPE_DESC_SET:
1026       nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], addr_out);
1027       return true;
1028 
1029    case NVK_CBUF_TYPE_DYNAMIC_UBO: {
1030       uint8_t dyn_idx;
1031       nvk_descriptor_state_get_root(
1032          desc, set_dynamic_buffer_start[cbuf->desc_set], &dyn_idx);
1033       dyn_idx += cbuf->dynamic_idx;
1034       union nvk_buffer_descriptor ubo_desc;
1035       nvk_descriptor_state_get_root(desc, dynamic_buffers[dyn_idx], &ubo_desc);
1036       *addr_out = nvk_ubo_descriptor_addr(pdev, ubo_desc);
1037       return true;
1038    }
1039 
1040    case NVK_CBUF_TYPE_UBO_DESC: {
1041       if (desc->sets[cbuf->desc_set].type != NVK_DESCRIPTOR_SET_TYPE_PUSH)
1042          return false;
1043 
1044       struct nvk_push_descriptor_set *push = desc->sets[cbuf->desc_set].push;
1045       if (push == NULL)
1046          return false;
1047 
1048       assert(cbuf->desc_offset < NVK_PUSH_DESCRIPTOR_SET_SIZE);
1049       union nvk_buffer_descriptor desc;
1050       memcpy(&desc, &push->data[cbuf->desc_offset], sizeof(desc));
1051       *addr_out = nvk_ubo_descriptor_addr(pdev, desc);
1052       return true;
1053    }
1054 
1055    default:
1056       unreachable("Invalid cbuf type");
1057    }
1058 }
1059 
1060 uint64_t
nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer * cmd,const struct nvk_descriptor_state * desc,const struct nvk_cbuf * cbuf)1061 nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
1062                                         const struct nvk_descriptor_state *desc,
1063                                         const struct nvk_cbuf *cbuf)
1064 {
1065    assert(cbuf->type == NVK_CBUF_TYPE_UBO_DESC);
1066    switch (desc->sets[cbuf->desc_set].type) {
1067    case NVK_DESCRIPTOR_SET_TYPE_SET:
1068    case NVK_DESCRIPTOR_SET_TYPE_BUFFER: {
1069       struct nvk_buffer_address set_addr;
1070       nvk_descriptor_state_get_root(desc, sets[cbuf->desc_set], &set_addr);
1071 
1072       assert(cbuf->desc_offset < set_addr.size);
1073       return set_addr.base_addr + cbuf->desc_offset;
1074    }
1075 
1076    default:
1077       unreachable("Unknown descriptor set type");
1078    }
1079 }
1080 
1081 void
nvk_cmd_buffer_dump(struct nvk_cmd_buffer * cmd,FILE * fp)1082 nvk_cmd_buffer_dump(struct nvk_cmd_buffer *cmd, FILE *fp)
1083 {
1084    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1085    struct nvk_physical_device *pdev = nvk_device_physical(dev);
1086 
1087    util_dynarray_foreach(&cmd->pushes, struct nvk_cmd_push, p) {
1088       if (p->map) {
1089          struct nv_push push = {
1090             .start = (uint32_t *)p->map,
1091             .end = (uint32_t *)((char *)p->map + p->range),
1092          };
1093          vk_push_print(fp, &push, &pdev->info);
1094       } else {
1095          const uint64_t addr = p->addr;
1096          fprintf(fp, "<%u B of INDIRECT DATA at 0x%" PRIx64 ">\n",
1097                  p->range, addr);
1098       }
1099    }
1100 }
1101 
1102 VKAPI_ATTR void VKAPI_CALL
nvk_CmdPushDescriptorSetWithTemplate2KHR(VkCommandBuffer commandBuffer,const VkPushDescriptorSetWithTemplateInfoKHR * pPushDescriptorSetWithTemplateInfo)1103 nvk_CmdPushDescriptorSetWithTemplate2KHR(
1104    VkCommandBuffer commandBuffer,
1105    const VkPushDescriptorSetWithTemplateInfoKHR *pPushDescriptorSetWithTemplateInfo)
1106 {
1107    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
1108    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
1109    VK_FROM_HANDLE(vk_descriptor_update_template, template,
1110                   pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
1111    VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
1112                   pPushDescriptorSetWithTemplateInfo->layout);
1113    const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
1114 
1115    struct nvk_descriptor_state *desc =
1116       nvk_get_descriptors_state(cmd, template->bind_point);
1117    struct nvk_push_descriptor_set *push_set =
1118       nvk_cmd_push_descriptors(cmd, desc, set);
1119    if (unlikely(push_set == NULL))
1120       return;
1121 
1122    struct nvk_descriptor_set_layout *set_layout =
1123       vk_to_nvk_descriptor_set_layout(pipeline_layout->set_layouts[set]);
1124 
1125    nvk_push_descriptor_set_update_template(dev, push_set, set_layout, template,
1126                                            pPushDescriptorSetWithTemplateInfo->pData);
1127 
1128    /* We don't know the actual set of stages here so assume everything */
1129    nvk_cmd_dirty_cbufs_for_descriptors(cmd, NVK_VK_GRAPHICS_STAGE_BITS |
1130                                             VK_SHADER_STAGE_COMPUTE_BIT,
1131                                        set, set + 1, 0, 0);
1132 }
1133