/* * Copyright © 2022 Collabora Ltd. and Red Hat Inc. * SPDX-License-Identifier: MIT */ #include "nvk_event.h" #include "nvk_cmd_buffer.h" #include "nvk_device.h" #include "nvk_entrypoints.h" #include "nvk_mme.h" #include "nv_push_cl906f.h" #include "nv_push_cl9097.h" #define NVK_EVENT_MEM_SIZE sizeof(VkResult) VKAPI_ATTR VkResult VKAPI_CALL nvk_CreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) { VK_FROM_HANDLE(nvk_device, dev, device); struct nvk_event *event; VkResult result; event = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*event), VK_OBJECT_TYPE_EVENT); if (!event) return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); result = nvk_heap_alloc(dev, &dev->event_heap, NVK_EVENT_MEM_SIZE, NVK_EVENT_MEM_SIZE, &event->addr, (void **)&event->status); if (result != VK_SUCCESS) { vk_object_free(&dev->vk, pAllocator, event); return result; } *event->status = VK_EVENT_RESET; *pEvent = nvk_event_to_handle(event); return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL nvk_DestroyEvent(VkDevice device, VkEvent _event, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(nvk_device, dev, device); VK_FROM_HANDLE(nvk_event, event, _event); if (!event) return; nvk_heap_free(dev, &dev->event_heap, event->addr, NVK_EVENT_MEM_SIZE); vk_object_free(&dev->vk, pAllocator, event); } VKAPI_ATTR VkResult VKAPI_CALL nvk_GetEventStatus(VkDevice device, VkEvent _event) { VK_FROM_HANDLE(nvk_event, event, _event); return *event->status; } VKAPI_ATTR VkResult VKAPI_CALL nvk_SetEvent(VkDevice device, VkEvent _event) { VK_FROM_HANDLE(nvk_event, event, _event); *event->status = VK_EVENT_SET; return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL nvk_ResetEvent(VkDevice device, VkEvent _event) { VK_FROM_HANDLE(nvk_event, event, _event); *event->status = VK_EVENT_RESET; return VK_SUCCESS; } static bool clear_bits64(uint64_t *bitfield, uint64_t bits) { bool has_bits = (*bitfield & bits) != 0; *bitfield &= ~bits; return has_bits; } uint32_t vk_stage_flags_to_nv9097_pipeline_location(VkPipelineStageFlags2 flags) { if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT| VK_PIPELINE_STAGE_2_COPY_BIT| VK_PIPELINE_STAGE_2_RESOLVE_BIT| VK_PIPELINE_STAGE_2_BLIT_BIT| VK_PIPELINE_STAGE_2_CLEAR_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_ALL; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_DEPTH_TEST; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_PIXEL_SHADER; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_ZCULL; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_STREAMING_OUTPUT; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_GEOMETRY_SHADER; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_TESSELATION_SHADER; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_TESSELATION_INIT_SHADER; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_VERTEX_SHADER; if (clear_bits64(&flags, VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT | VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT)) return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_DATA_ASSEMBLER; clear_bits64(&flags, VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_HOST_BIT | VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT); /* TODO: Doing this on 3D will likely cause a WFI which is probably ok but, * if we tracked which subchannel we've used most recently, we can probably * do better than that. */ clear_bits64(&flags, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT); assert(flags == 0); return NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_NONE; } VKAPI_ATTR void VKAPI_CALL nvk_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, const VkDependencyInfo *pDependencyInfo) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_event, event, _event); nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, false); VkPipelineStageFlags2 stages = 0; for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) stages |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) stages |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; struct nv_push *p = nvk_cmd_buffer_push(cmd, 5); P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A); P_NV9097_SET_REPORT_SEMAPHORE_A(p, event->addr >> 32); P_NV9097_SET_REPORT_SEMAPHORE_B(p, event->addr); P_NV9097_SET_REPORT_SEMAPHORE_C(p, VK_EVENT_SET); P_NV9097_SET_REPORT_SEMAPHORE_D(p, { .operation = OPERATION_RELEASE, .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE, .pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stages), .structure_size = STRUCTURE_SIZE_ONE_WORD, }); } VKAPI_ATTR void VKAPI_CALL nvk_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags2 stageMask) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_event, event, _event); struct nv_push *p = nvk_cmd_buffer_push(cmd, 5); P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A); P_NV9097_SET_REPORT_SEMAPHORE_A(p, event->addr >> 32); P_NV9097_SET_REPORT_SEMAPHORE_B(p, event->addr); P_NV9097_SET_REPORT_SEMAPHORE_C(p, VK_EVENT_RESET); P_NV9097_SET_REPORT_SEMAPHORE_D(p, { .operation = OPERATION_RELEASE, .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE, .pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stageMask), .structure_size = STRUCTURE_SIZE_ONE_WORD, }); } VKAPI_ATTR void VKAPI_CALL nvk_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, const VkDependencyInfo *pDependencyInfos) { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); for (uint32_t i = 0; i < eventCount; i++) { VK_FROM_HANDLE(nvk_event, event, pEvents[i]); struct nv_push *p = nvk_cmd_buffer_push(cmd, 5); __push_mthd(p, SUBC_NV9097, NV906F_SEMAPHOREA); P_NV906F_SEMAPHOREA(p, event->addr >> 32); P_NV906F_SEMAPHOREB(p, (event->addr & UINT32_MAX) >> 2); P_NV906F_SEMAPHOREC(p, VK_EVENT_SET); P_NV906F_SEMAPHORED(p, { .operation = OPERATION_ACQUIRE, .acquire_switch = ACQUIRE_SWITCH_ENABLED, .release_size = RELEASE_SIZE_4BYTE, }); } nvk_cmd_invalidate_deps(cmd, eventCount, pDependencyInfos); }