/* * Copyright © 2022 Valve Corporation * SPDX-License-Identifier: MIT */ /* When using dynamic rendering with the suspend/resume functionality, we * sometimes need to merge together multiple suspended render passes * dynamically at submit time. This involves combining all the saved-up IBs, * emitting the rendering commands usually emitted by * CmdEndRenderPass()/CmdEndRendering(), and inserting them in between the * user command buffers. This gets tricky, because the same command buffer can * be submitted multiple times, each time with a different other set of * command buffers, and with VK_COMMAND_BUFFER_SIMULTANEOUS_USE_BIT, this can * happen before the previous submission of the same command buffer has * finished. At some point we have to free these commands and the BOs they are * contained in, and we can't do that when resubmitting the last command * buffer in the sequence because it may still be in use. This means we have * to make the commands owned by the device and roll our own memory tracking. */ #include "tu_dynamic_rendering.h" #include "tu_cmd_buffer.h" #include "tu_cs.h" struct dynamic_rendering_entry { struct tu_cmd_buffer *cmd_buffer; uint32_t fence; /* The fence value when cmd_buffer becomes available */ }; static VkResult get_cmd_buffer(struct tu_device *dev, struct tu_cmd_buffer **cmd_buffer_out) { struct tu6_global *global = dev->global_bo_map; /* Note: because QueueSubmit is serialized, we don't need any locks here. */ uint32_t fence = global->dynamic_rendering_fence; /* Go through the entries and return the finished ones to the pool, * shrinking the array of pending entries. */ struct dynamic_rendering_entry *new_entry = (struct dynamic_rendering_entry *) util_dynarray_begin( &dev->dynamic_rendering_pending); uint32_t entries = 0; util_dynarray_foreach(&dev->dynamic_rendering_pending, struct dynamic_rendering_entry, entry) { if (entry->fence <= fence) { VkCommandBuffer vk_buf = tu_cmd_buffer_to_handle(entry->cmd_buffer); vk_common_FreeCommandBuffers(tu_device_to_handle(dev), dev->dynamic_rendering_pool, 1, &vk_buf); } else { *new_entry = *entry; new_entry++; entries++; } } UNUSED void *dummy = util_dynarray_resize(&dev->dynamic_rendering_pending, struct dynamic_rendering_entry, entries); VkCommandBuffer vk_buf; const VkCommandBufferAllocateInfo info = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .pNext = NULL, .commandPool = dev->dynamic_rendering_pool, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandBufferCount = 1, }; VkResult result = vk_common_AllocateCommandBuffers(tu_device_to_handle(dev), &info, &vk_buf); if (result != VK_SUCCESS) return result; VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, vk_buf); struct dynamic_rendering_entry entry = { .cmd_buffer = cmd_buffer, .fence = ++dev->dynamic_rendering_fence, }; util_dynarray_append(&dev->dynamic_rendering_pending, struct dynamic_rendering_entry, entry); *cmd_buffer_out = cmd_buffer; return VK_SUCCESS; } VkResult tu_init_dynamic_rendering(struct tu_device *dev) { util_dynarray_init(&dev->dynamic_rendering_pending, NULL); dev->dynamic_rendering_fence = 0; const VkCommandPoolCreateInfo create_info = { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .pNext = NULL, .flags = 0, .queueFamilyIndex = 0, }; return vk_common_CreateCommandPool(tu_device_to_handle(dev), &create_info, &dev->vk.alloc, &dev->dynamic_rendering_pool); } void tu_destroy_dynamic_rendering(struct tu_device *dev) { vk_common_DestroyCommandPool(tu_device_to_handle(dev), dev->dynamic_rendering_pool, &dev->vk.alloc); util_dynarray_fini(&dev->dynamic_rendering_pending); } VkResult tu_insert_dynamic_cmdbufs(struct tu_device *dev, struct tu_cmd_buffer ***cmds_ptr, uint32_t *size) { struct tu_cmd_buffer **old_cmds = *cmds_ptr; bool has_dynamic = false; for (unsigned i = 0; i < *size; i++) { if (old_cmds[i]->state.suspend_resume != SR_NONE) { has_dynamic = true; break; } } if (!has_dynamic) return VK_SUCCESS; struct util_dynarray cmds = {0}; struct tu_cmd_buffer *cmd_buffer = NULL; for (unsigned i = 0; i < *size; i++) { switch (old_cmds[i]->state.suspend_resume) { case SR_NONE: case SR_IN_CHAIN: case SR_IN_PRE_CHAIN: break; case SR_AFTER_PRE_CHAIN: case SR_IN_CHAIN_AFTER_PRE_CHAIN: tu_append_pre_chain(cmd_buffer, old_cmds[i]); if (!(old_cmds[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { u_trace_disable_event_range(old_cmds[i]->pre_chain.trace_renderpass_start, old_cmds[i]->pre_chain.trace_renderpass_end); } TU_CALLX(dev, tu_cmd_render)(cmd_buffer); tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3); tu_cs_emit_qw(&cmd_buffer->cs, global_iova(cmd_buffer, dynamic_rendering_fence)); tu_cs_emit(&cmd_buffer->cs, dev->dynamic_rendering_fence); TU_CALLX(dev, tu_EndCommandBuffer)(tu_cmd_buffer_to_handle(cmd_buffer)); util_dynarray_append(&cmds, struct tu_cmd_buffer *, cmd_buffer); cmd_buffer = NULL; break; } util_dynarray_append(&cmds, struct tu_cmd_buffer *, old_cmds[i]); switch (old_cmds[i]->state.suspend_resume) { case SR_NONE: case SR_AFTER_PRE_CHAIN: break; case SR_IN_CHAIN: case SR_IN_CHAIN_AFTER_PRE_CHAIN: { assert(!cmd_buffer); VkResult result = get_cmd_buffer(dev, &cmd_buffer); if (result != VK_SUCCESS) return result; const VkCommandBufferBeginInfo begin = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; tu_cmd_buffer_begin(cmd_buffer, &begin); /* Setup the render pass using the first command buffer involved in * the chain, so that it will look like we're inside a render pass * for tu_cmd_render(). */ tu_restore_suspended_pass(cmd_buffer, old_cmds[i]); FALLTHROUGH; } case SR_IN_PRE_CHAIN: assert(cmd_buffer); tu_append_pre_post_chain(cmd_buffer, old_cmds[i]); if (old_cmds[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { u_trace_disable_event_range(old_cmds[i]->trace_renderpass_start, old_cmds[i]->trace_renderpass_end); } /* When the command buffer is finally recorded, we need its state * to be the state of the command buffer before it. We need this * because we skip tu6_emit_hw(). */ cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state; break; } } struct tu_cmd_buffer **new_cmds = (struct tu_cmd_buffer **) vk_alloc(&dev->vk.alloc, cmds.size, alignof(struct tu_cmd_buffer *), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!new_cmds) return VK_ERROR_OUT_OF_HOST_MEMORY; memcpy(new_cmds, cmds.data, cmds.size); *cmds_ptr = new_cmds; *size = util_dynarray_num_elements(&cmds, struct tu_cmd_buffer *); util_dynarray_fini(&cmds); return VK_SUCCESS; }