/* * Copyright © 2022 Imagination Technologies Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * 'pvr_write_query_to_buffer()' and 'pvr_wait_for_available()' based on anv: * Copyright © 2015 Intel Corporation */ #include #include #include #include #include #include "pvr_bo.h" #include "pvr_csb.h" #include "pvr_device_info.h" #include "pvr_private.h" #include "util/macros.h" #include "util/os_time.h" #include "vk_log.h" #include "vk_object.h" VkResult pvr_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool) { PVR_FROM_HANDLE(pvr_device, device, _device); const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; const uint32_t query_size = pCreateInfo->queryCount * sizeof(uint32_t); struct pvr_query_pool *pool; uint64_t alloc_size; VkResult result; /* Vulkan 1.0 supports only occlusion, timestamp, and pipeline statistics * query. * We don't currently support timestamp queries. * VkQueueFamilyProperties->timestampValidBits = 0. * We don't currently support pipeline statistics queries. * VkPhysicalDeviceFeatures->pipelineStatisticsQuery = false. */ assert(!device->vk.enabled_features.pipelineStatisticsQuery); assert(pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION); pool = vk_object_alloc(&device->vk, pAllocator, sizeof(*pool), VK_OBJECT_TYPE_QUERY_POOL); if (!pool) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pool->result_stride = ALIGN_POT(query_size, PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT)); pool->query_count = pCreateInfo->queryCount; /* Each Phantom writes to a separate offset within the vis test heap so * allocate space for the total number of Phantoms. */ alloc_size = (uint64_t)pool->result_stride * core_count; result = pvr_bo_suballoc(&device->suballoc_vis_test, alloc_size, PVRX(CR_ISP_OCLQRY_BASE_ADDR_ALIGNMENT), false, &pool->result_buffer); if (result != VK_SUCCESS) goto err_free_pool; result = pvr_bo_suballoc(&device->suballoc_general, query_size, sizeof(uint32_t), false, &pool->availability_buffer); if (result != VK_SUCCESS) goto err_free_result_buffer; *pQueryPool = pvr_query_pool_to_handle(pool); return VK_SUCCESS; err_free_result_buffer: pvr_bo_suballoc_free(pool->result_buffer); err_free_pool: vk_object_free(&device->vk, pAllocator, pool); return result; } void pvr_DestroyQueryPool(VkDevice _device, VkQueryPool queryPool, const VkAllocationCallbacks *pAllocator) { PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool); PVR_FROM_HANDLE(pvr_device, device, _device); if (!pool) return; pvr_bo_suballoc_free(pool->availability_buffer); pvr_bo_suballoc_free(pool->result_buffer); vk_object_free(&device->vk, pAllocator, pool); } /* Note: make sure to make the availability buffer's memory defined in * accordance to how the device is expected to fill it. We don't make it defined * here since that would cover up usage of this function while the underlying * buffer region being accessed wasn't expect to have been written by the * device. */ static inline bool pvr_query_is_available(const struct pvr_query_pool *pool, uint32_t query_idx) { volatile uint32_t *available = pvr_bo_suballoc_get_map_addr(pool->availability_buffer); return !!available[query_idx]; } #define NSEC_PER_SEC UINT64_C(1000000000) #define PVR_WAIT_TIMEOUT UINT64_C(5) /* Note: make sure to make the availability buffer's memory defined in * accordance to how the device is expected to fill it. We don't make it defined * here since that would cover up usage of this function while the underlying * buffer region being accessed wasn't expect to have been written by the * device. */ /* TODO: Handle device loss scenario properly. */ static VkResult pvr_wait_for_available(struct pvr_device *device, const struct pvr_query_pool *pool, uint32_t query_idx) { const uint64_t abs_timeout = os_time_get_absolute_timeout(PVR_WAIT_TIMEOUT * NSEC_PER_SEC); /* From the Vulkan 1.0 spec: * * Commands that wait indefinitely for device execution (namely * vkDeviceWaitIdle, vkQueueWaitIdle, vkWaitForFences or * vkAcquireNextImageKHR with a maximum timeout, and * vkGetQueryPoolResults with the VK_QUERY_RESULT_WAIT_BIT bit set in * flags) must return in finite time even in the case of a lost device, * and return either VK_SUCCESS or VK_ERROR_DEVICE_LOST. */ while (os_time_get_nano() < abs_timeout) { if (pvr_query_is_available(pool, query_idx) != 0) return VK_SUCCESS; } return vk_error(device, VK_ERROR_DEVICE_LOST); } #undef NSEC_PER_SEC #undef PVR_WAIT_TIMEOUT static inline void pvr_write_query_to_buffer(uint8_t *buffer, VkQueryResultFlags flags, uint32_t idx, uint64_t value) { if (flags & VK_QUERY_RESULT_64_BIT) { uint64_t *query_data = (uint64_t *)buffer; query_data[idx] = value; } else { uint32_t *query_data = (uint32_t *)buffer; query_data[idx] = value; } } VkResult pvr_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride, VkQueryResultFlags flags) { PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool); PVR_FROM_HANDLE(pvr_device, device, _device); VG(volatile uint32_t *available = pvr_bo_suballoc_get_map_addr(pool->availability_buffer)); volatile uint32_t *query_results = pvr_bo_suballoc_get_map_addr(pool->result_buffer); const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; uint8_t *data = (uint8_t *)pData; VkResult result = VK_SUCCESS; /* TODO: Instead of making the memory defined here for valgrind, to better * catch out of bounds access and other memory errors we should move them * where where the query buffers are changed by the driver or device (e.g. * "vkCmdResetQueryPool()", "vkGetQueryPoolResults()", etc.). */ VG(VALGRIND_MAKE_MEM_DEFINED(&available[firstQuery], queryCount * sizeof(uint32_t))); for (uint32_t i = 0; i < core_count; i++) { VG(VALGRIND_MAKE_MEM_DEFINED( &query_results[firstQuery + i * pool->result_stride], queryCount * sizeof(uint32_t))); } for (uint32_t i = 0; i < queryCount; i++) { bool is_available = pvr_query_is_available(pool, firstQuery + i); uint64_t count = 0; if (flags & VK_QUERY_RESULT_WAIT_BIT && !is_available) { result = pvr_wait_for_available(device, pool, firstQuery + i); if (result != VK_SUCCESS) return result; is_available = true; } for (uint32_t j = 0; j < core_count; j++) count += query_results[pool->result_stride * j + firstQuery + i]; if (is_available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) pvr_write_query_to_buffer(data, flags, 0, count); else result = VK_NOT_READY; if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) pvr_write_query_to_buffer(data, flags, 1, is_available); data += stride; } VG(VALGRIND_MAKE_MEM_UNDEFINED(&available[firstQuery], queryCount * sizeof(uint32_t))); for (uint32_t i = 0; i < core_count; i++) { VG(VALGRIND_MAKE_MEM_UNDEFINED( &query_results[firstQuery + i * pool->result_stride], queryCount * sizeof(uint32_t))); } return result; } void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_query_info query_info; PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); query_info.type = PVR_QUERY_TYPE_RESET_QUERY_POOL; query_info.reset_query_pool.query_pool = queryPool; query_info.reset_query_pool.first_query = firstQuery; query_info.reset_query_pool.query_count = queryCount; pvr_add_query_program(cmd_buffer, &query_info); } void pvr_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount) { PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool); uint32_t *availability = pvr_bo_suballoc_get_map_addr(pool->availability_buffer); memset(availability + firstQuery, 0, sizeof(uint32_t) * queryCount); } void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags) { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_query_info query_info; VkResult result; PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); query_info.type = PVR_QUERY_TYPE_COPY_QUERY_RESULTS; query_info.copy_query_results.query_pool = queryPool; query_info.copy_query_results.first_query = firstQuery; query_info.copy_query_results.query_count = queryCount; query_info.copy_query_results.dst_buffer = dstBuffer; query_info.copy_query_results.dst_offset = dstOffset; query_info.copy_query_results.stride = stride; query_info.copy_query_results.flags = flags; result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT); if (result != VK_SUCCESS) return; /* The Vulkan 1.3.231 spec says: * * "vkCmdCopyQueryPoolResults is considered to be a transfer operation, * and its writes to buffer memory must be synchronized using * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT before * using the results." * */ /* We record barrier event sub commands to sync the compute job used for the * copy query results program with transfer jobs to prevent an overlapping * transfer job with the compute job. */ cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){ .type = PVR_EVENT_TYPE_BARRIER, .barrier = { .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, }, }; result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); if (result != VK_SUCCESS) return; pvr_add_query_program(cmd_buffer, &query_info); result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_EVENT); if (result != VK_SUCCESS) return; cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){ .type = PVR_EVENT_TYPE_BARRIER, .barrier = { .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, }, }; } void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags) { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_cmd_buffer_state *state = &cmd_buffer->state; PVR_FROM_HANDLE(pvr_query_pool, pool, queryPool); PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); /* Occlusion queries can't be nested. */ assert(!state->vis_test_enabled); if (state->current_sub_cmd) { assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); if (!state->current_sub_cmd->gfx.query_pool) { state->current_sub_cmd->gfx.query_pool = pool; } else if (state->current_sub_cmd->gfx.query_pool != pool) { VkResult result; /* Kick render. */ state->current_sub_cmd->gfx.barrier_store = true; result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer); if (result != VK_SUCCESS) return; result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS); if (result != VK_SUCCESS) return; /* Use existing render setup, but load color attachments from HW * BGOBJ. */ state->current_sub_cmd->gfx.barrier_load = true; state->current_sub_cmd->gfx.barrier_store = false; state->current_sub_cmd->gfx.query_pool = pool; } } state->query_pool = pool; state->vis_test_enabled = true; state->vis_reg = query; state->dirty.vis_test = true; /* Add the index to the list for this render. */ util_dynarray_append(&state->query_indices, __typeof__(query), query); } void pvr_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query) { PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); struct pvr_cmd_buffer_state *state = &cmd_buffer->state; PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); state->vis_test_enabled = false; state->dirty.vis_test = true; }