/* * Copyright 2019 Google LLC * SPDX-License-Identifier: MIT * * based in part on anv and radv which are: * Copyright © 2015 Intel Corporation * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen */ #include "vn_device.h" #include "util/disk_cache.h" #include "util/hex.h" #include "venus-protocol/vn_protocol_driver_device.h" #include "vn_android.h" #include "vn_instance.h" #include "vn_physical_device.h" #include "vn_queue.h" /* device commands */ static void vn_queue_fini(struct vn_queue *queue) { VkDevice dev_handle = vk_device_to_handle(queue->base.base.base.device); if (queue->wait_fence != VK_NULL_HANDLE) { vn_DestroyFence(dev_handle, queue->wait_fence, NULL); } if (queue->sparse_semaphore != VK_NULL_HANDLE) { vn_DestroySemaphore(dev_handle, queue->sparse_semaphore, NULL); } vn_cached_storage_fini(&queue->storage); vn_queue_base_fini(&queue->base); } static VkResult vn_queue_init(struct vn_device *dev, struct vn_queue *queue, const VkDeviceQueueCreateInfo *queue_info, uint32_t queue_index) { VkResult result = vn_queue_base_init(&queue->base, &dev->base, queue_info, queue_index); if (result != VK_SUCCESS) return result; vn_cached_storage_init(&queue->storage, &dev->base.base.alloc); const int ring_idx = vn_instance_acquire_ring_idx(dev->instance); if (ring_idx < 0) { vn_log(dev->instance, "failed binding VkQueue to renderer timeline"); return VK_ERROR_INITIALIZATION_FAILED; } queue->ring_idx = (uint32_t)ring_idx; const VkDeviceQueueTimelineInfoMESA timeline_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_TIMELINE_INFO_MESA, .ringIdx = queue->ring_idx, }; const VkDeviceQueueInfo2 device_queue_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, .pNext = &timeline_info, .flags = queue_info->flags, .queueFamilyIndex = queue_info->queueFamilyIndex, .queueIndex = queue_index, }; VkQueue queue_handle = vn_queue_to_handle(queue); vn_async_vkGetDeviceQueue2(dev->primary_ring, vn_device_to_handle(dev), &device_queue_info, &queue_handle); return VK_SUCCESS; } static VkResult vn_device_init_queues(struct vn_device *dev, const VkDeviceCreateInfo *create_info) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; uint32_t count = 0; for (uint32_t i = 0; i < create_info->queueCreateInfoCount; i++) count += create_info->pQueueCreateInfos[i].queueCount; struct vn_queue *queues = vk_zalloc(alloc, sizeof(*queues) * count, VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!queues) return VK_ERROR_OUT_OF_HOST_MEMORY; count = 0; for (uint32_t i = 0; i < create_info->queueCreateInfoCount; i++) { VkResult result; const VkDeviceQueueCreateInfo *queue_info = &create_info->pQueueCreateInfos[i]; for (uint32_t j = 0; j < queue_info->queueCount; j++) { result = vn_queue_init(dev, &queues[count], queue_info, j); if (result != VK_SUCCESS) { for (uint32_t k = 0; k < count; k++) vn_queue_fini(&queues[k]); vk_free(alloc, queues); return result; } count++; } } dev->queues = queues; dev->queue_count = count; return VK_SUCCESS; } static bool vn_device_queue_family_init(struct vn_device *dev, const VkDeviceCreateInfo *create_info) { const VkAllocationCallbacks *alloc = &dev->base.base.alloc; uint32_t *queue_families = NULL; uint32_t count = 0; queue_families = vk_zalloc( alloc, sizeof(*queue_families) * create_info->queueCreateInfoCount, VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!queue_families) return false; for (uint32_t i = 0; i < create_info->queueCreateInfoCount; i++) { const uint32_t index = create_info->pQueueCreateInfos[i].queueFamilyIndex; bool new_index = true; for (uint32_t j = 0; j < count; j++) { if (queue_families[j] == index) { new_index = false; break; } } if (new_index) queue_families[count++] = index; } dev->queue_families = queue_families; dev->queue_family_count = count; return true; } static inline void vn_device_queue_family_fini(struct vn_device *dev) { vk_free(&dev->base.base.alloc, dev->queue_families); } static VkResult vn_device_memory_report_init(struct vn_device *dev, const VkDeviceCreateInfo *create_info) { const struct vk_features *app_feats = &dev->base.base.enabled_features; if (!app_feats->deviceMemoryReport) return VK_SUCCESS; uint32_t count = 0; vk_foreach_struct_const(pnext, create_info->pNext) { if (pnext->sType == VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT) count++; } struct vn_device_memory_report *mem_reports = NULL; if (count) { mem_reports = vk_alloc(&dev->base.base.alloc, sizeof(*mem_reports) * count, VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!mem_reports) return VK_ERROR_OUT_OF_HOST_MEMORY; } count = 0; vk_foreach_struct_const(pnext, create_info->pNext) { if (pnext->sType == VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT) { const struct VkDeviceDeviceMemoryReportCreateInfoEXT *report = (void *)pnext; mem_reports[count].callback = report->pfnUserCallback; mem_reports[count].data = report->pUserData; count++; } } dev->memory_report_count = count; dev->memory_reports = mem_reports; return VK_SUCCESS; } static inline void vn_device_memory_report_fini(struct vn_device *dev) { vk_free(&dev->base.base.alloc, dev->memory_reports); } static bool find_extension_names(const char *const *exts, uint32_t ext_count, const char *name) { for (uint32_t i = 0; i < ext_count; i++) { if (!strcmp(exts[i], name)) return true; } return false; } static bool merge_extension_names(const char *const *exts, uint32_t ext_count, const char *const *extra_exts, uint32_t extra_count, const char *const *block_exts, uint32_t block_count, const VkAllocationCallbacks *alloc, const char *const **out_exts, uint32_t *out_count) { const char **merged = vk_alloc(alloc, sizeof(*merged) * (ext_count + extra_count), VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!merged) return false; uint32_t count = 0; for (uint32_t i = 0; i < ext_count; i++) { if (!find_extension_names(block_exts, block_count, exts[i])) merged[count++] = exts[i]; } for (uint32_t i = 0; i < extra_count; i++) { if (!find_extension_names(exts, ext_count, extra_exts[i])) merged[count++] = extra_exts[i]; } *out_exts = merged; *out_count = count; return true; } static const VkDeviceCreateInfo * vn_device_fix_create_info(const struct vn_device *dev, const VkDeviceCreateInfo *dev_info, const VkAllocationCallbacks *alloc, VkDeviceCreateInfo *local_info) { const struct vn_physical_device *physical_dev = dev->physical_device; const struct vk_device_extension_table *app_exts = &dev->base.base.enabled_extensions; /* extra_exts and block_exts must not overlap */ const char *extra_exts[16]; const char *block_exts[16]; uint32_t extra_count = 0; uint32_t block_count = 0; /* fix for WSI (treat AHB as WSI extension for simplicity) */ const bool has_wsi = app_exts->KHR_swapchain || app_exts->ANDROID_native_buffer || app_exts->ANDROID_external_memory_android_hardware_buffer; if (has_wsi) { if (!app_exts->EXT_image_drm_format_modifier) { extra_exts[extra_count++] = VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME; if (physical_dev->renderer_version < VK_API_VERSION_1_2 && !app_exts->KHR_image_format_list) { extra_exts[extra_count++] = VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME; } } if (!app_exts->EXT_queue_family_foreign) { extra_exts[extra_count++] = VK_EXT_QUEUE_FAMILY_FOREIGN_EXTENSION_NAME; } if (app_exts->KHR_swapchain) { /* see vn_physical_device_get_native_extensions */ block_exts[block_count++] = VK_KHR_SWAPCHAIN_EXTENSION_NAME; block_exts[block_count++] = VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME; block_exts[block_count++] = VK_KHR_INCREMENTAL_PRESENT_EXTENSION_NAME; } if (app_exts->ANDROID_native_buffer) { /* see vn_QueueSignalReleaseImageANDROID */ if (!app_exts->KHR_external_fence_fd) { assert(physical_dev->renderer_sync_fd.fence_exportable); extra_exts[extra_count++] = VK_KHR_EXTERNAL_FENCE_FD_EXTENSION_NAME; } block_exts[block_count++] = VK_ANDROID_NATIVE_BUFFER_EXTENSION_NAME; } if (app_exts->ANDROID_external_memory_android_hardware_buffer) { block_exts[block_count++] = VK_ANDROID_EXTERNAL_MEMORY_ANDROID_HARDWARE_BUFFER_EXTENSION_NAME; } } if (app_exts->KHR_external_memory_fd || app_exts->EXT_external_memory_dma_buf || has_wsi) { if (physical_dev->external_memory.renderer_handle_type == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT) { if (!app_exts->EXT_external_memory_dma_buf) { extra_exts[extra_count++] = VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME; } if (!app_exts->KHR_external_memory_fd) { extra_exts[extra_count++] = VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME; } } } /* see vn_queue_submission_count_batch_semaphores */ if (!app_exts->KHR_external_semaphore_fd && has_wsi) { assert(physical_dev->renderer_sync_fd.semaphore_importable); extra_exts[extra_count++] = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME; } if (app_exts->EXT_device_memory_report) { /* see vn_physical_device_get_native_extensions */ block_exts[block_count++] = VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME; } if (app_exts->EXT_physical_device_drm) { /* see vn_physical_device_get_native_extensions */ block_exts[block_count++] = VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME; } if (app_exts->EXT_tooling_info) { /* see vn_physical_device_get_native_extensions */ block_exts[block_count++] = VK_EXT_TOOLING_INFO_EXTENSION_NAME; } if (app_exts->EXT_pci_bus_info) { /* always filter for simplicity */ block_exts[block_count++] = VK_EXT_PCI_BUS_INFO_EXTENSION_NAME; } assert(extra_count <= ARRAY_SIZE(extra_exts)); assert(block_count <= ARRAY_SIZE(block_exts)); if (!extra_count && (!block_count || !dev_info->enabledExtensionCount)) return dev_info; *local_info = *dev_info; if (!merge_extension_names(dev_info->ppEnabledExtensionNames, dev_info->enabledExtensionCount, extra_exts, extra_count, block_exts, block_count, alloc, &local_info->ppEnabledExtensionNames, &local_info->enabledExtensionCount)) return NULL; return local_info; } static inline VkResult vn_device_feedback_pool_init(struct vn_device *dev) { /* The feedback pool defaults to suballocate slots of 8 bytes each. Initial * pool size of 4096 corresponds to a total of 512 fences, semaphores and * events, which well covers the common scenarios. Pool can grow anyway. */ static const uint32_t pool_size = 4096; const VkAllocationCallbacks *alloc = &dev->base.base.alloc; if (VN_PERF(NO_EVENT_FEEDBACK) && VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK)) return VK_SUCCESS; return vn_feedback_pool_init(dev, &dev->feedback_pool, pool_size, alloc); } static inline void vn_device_feedback_pool_fini(struct vn_device *dev) { if (VN_PERF(NO_EVENT_FEEDBACK) && VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK)) return; vn_feedback_pool_fini(&dev->feedback_pool); } static void vn_device_update_shader_cache_id(struct vn_device *dev) { /* venus utilizes the host side shader cache. * This is a WA to generate shader cache files containing headers * with a unique cache id that will change based on host driver * identifiers. This allows fossilize replay to detect if the host * side shader cach is no longer up to date. * The shader cache is destroyed after creating the necessary files * and not utilized by venus. */ #if !DETECT_OS_ANDROID && defined(ENABLE_SHADER_CACHE) const uint8_t *device_uuid = dev->physical_device->base.base.properties.pipelineCacheUUID; char uuid[VK_UUID_SIZE * 2 + 1]; mesa_bytes_to_hex(uuid, device_uuid, VK_UUID_SIZE); struct disk_cache *cache = disk_cache_create("venus", uuid, 0); if (!cache) return; /* The entry header is what contains the cache id / timestamp so we * need to create a fake entry. */ uint8_t key[20]; char data[] = "Fake Shader"; disk_cache_compute_key(cache, data, sizeof(data), key); disk_cache_put(cache, key, data, sizeof(data), NULL); disk_cache_destroy(cache); #endif } static VkResult vn_device_init(struct vn_device *dev, struct vn_physical_device *physical_dev, const VkDeviceCreateInfo *create_info, const VkAllocationCallbacks *alloc) { struct vn_instance *instance = physical_dev->instance; VkPhysicalDevice physical_dev_handle = vn_physical_device_to_handle(physical_dev); VkDevice dev_handle = vn_device_to_handle(dev); VkDeviceCreateInfo local_create_info; VkResult result; dev->instance = instance; dev->physical_device = physical_dev; dev->device_mask = 1; dev->renderer = instance->renderer; dev->primary_ring = instance->ring.ring; create_info = vn_device_fix_create_info(dev, create_info, alloc, &local_create_info); if (!create_info) return VK_ERROR_OUT_OF_HOST_MEMORY; const VkDeviceGroupDeviceCreateInfo *group = vk_find_struct_const( create_info->pNext, DEVICE_GROUP_DEVICE_CREATE_INFO); if (group && group->physicalDeviceCount) dev->device_mask = (1 << group->physicalDeviceCount) - 1; result = vn_call_vkCreateDevice(dev->primary_ring, physical_dev_handle, create_info, NULL, &dev_handle); /* free the fixed extensions here since no longer needed below */ if (create_info == &local_create_info) vk_free(alloc, (void *)create_info->ppEnabledExtensionNames); if (result != VK_SUCCESS) return result; result = vn_device_memory_report_init(dev, create_info); if (result != VK_SUCCESS) goto out_destroy_device; if (!vn_device_queue_family_init(dev, create_info)) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto out_memory_report_fini; } result = vn_device_feedback_pool_init(dev); if (result != VK_SUCCESS) goto out_queue_family_fini; result = vn_feedback_cmd_pools_init(dev); if (result != VK_SUCCESS) goto out_feedback_pool_fini; result = vn_device_init_queues(dev, create_info); if (result != VK_SUCCESS) goto out_feedback_cmd_pools_fini; vn_buffer_reqs_cache_init(dev); vn_image_reqs_cache_init(dev); /* This is a WA to allow fossilize replay to detect if the host side shader * cache is no longer up to date. */ vn_device_update_shader_cache_id(dev); return VK_SUCCESS; out_feedback_cmd_pools_fini: vn_feedback_cmd_pools_fini(dev); out_feedback_pool_fini: vn_device_feedback_pool_fini(dev); out_queue_family_fini: vn_device_queue_family_fini(dev); out_memory_report_fini: vn_device_memory_report_fini(dev); out_destroy_device: vn_call_vkDestroyDevice(dev->primary_ring, dev_handle, NULL); return result; } VkResult vn_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { VN_TRACE_FUNC(); struct vn_physical_device *physical_dev = vn_physical_device_from_handle(physicalDevice); struct vn_instance *instance = physical_dev->instance; const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : &instance->base.base.alloc; struct vn_device *dev; VkResult result; dev = vk_zalloc(alloc, sizeof(*dev), VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); if (!dev) return vn_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); struct vk_device_dispatch_table dispatch_table; vk_device_dispatch_table_from_entrypoints(&dispatch_table, &vn_device_entrypoints, true); vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false); result = vn_device_base_init(&dev->base, &physical_dev->base, &dispatch_table, pCreateInfo, alloc); if (result != VK_SUCCESS) { vk_free(alloc, dev); return vn_error(instance, result); } result = vn_device_init(dev, physical_dev, pCreateInfo, alloc); if (result != VK_SUCCESS) { vn_device_base_fini(&dev->base); vk_free(alloc, dev); return vn_error(instance, result); } if (VN_DEBUG(LOG_CTX_INFO)) { vn_log(instance, "%s", physical_dev->base.base.properties.deviceName); vn_log(instance, "%s", physical_dev->base.base.properties.driverInfo); } vn_tls_set_async_pipeline_create(); *pDevice = vn_device_to_handle(dev); return VK_SUCCESS; } void vn_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) { VN_TRACE_FUNC(); struct vn_device *dev = vn_device_from_handle(device); const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : &dev->base.base.alloc; if (!dev) return; vn_image_reqs_cache_fini(dev); vn_buffer_reqs_cache_fini(dev); for (uint32_t i = 0; i < dev->queue_count; i++) vn_queue_fini(&dev->queues[i]); vn_feedback_cmd_pools_fini(dev); vn_device_feedback_pool_fini(dev); vn_device_queue_family_fini(dev); vn_device_memory_report_fini(dev); vn_async_vkDestroyDevice(dev->primary_ring, device, NULL); /* We must emit vn_call_vkDestroyDevice before releasing bound ring_idx. * Otherwise, another thread might reuse their ring_idx while they * are still bound to the queues in the renderer. */ for (uint32_t i = 0; i < dev->queue_count; i++) { vn_instance_release_ring_idx(dev->instance, dev->queues[i].ring_idx); } vk_free(alloc, dev->queues); vn_device_base_fini(&dev->base); vk_free(alloc, dev); } PFN_vkVoidFunction vn_GetDeviceProcAddr(VkDevice device, const char *pName) { struct vn_device *dev = vn_device_from_handle(device); return vk_device_get_proc_addr(&dev->base.base, pName); } void vn_GetDeviceGroupPeerMemoryFeatures( VkDevice device, uint32_t heapIndex, uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) { struct vn_device *dev = vn_device_from_handle(device); /* TODO get and cache the values in vkCreateDevice */ vn_call_vkGetDeviceGroupPeerMemoryFeatures( dev->primary_ring, device, heapIndex, localDeviceIndex, remoteDeviceIndex, pPeerMemoryFeatures); } VkResult vn_GetCalibratedTimestampsEXT( VkDevice device, uint32_t timestampCount, const VkCalibratedTimestampInfoEXT *pTimestampInfos, uint64_t *pTimestamps, uint64_t *pMaxDeviation) { struct vn_device *dev = vn_device_from_handle(device); uint64_t begin, end, max_clock_period = 0; VkResult ret; int domain; #ifdef CLOCK_MONOTONIC_RAW begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW); #else begin = vk_clock_gettime(CLOCK_MONOTONIC); #endif for (domain = 0; domain < timestampCount; domain++) { switch (pTimestampInfos[domain].timeDomain) { case VK_TIME_DOMAIN_DEVICE_EXT: { uint64_t device_max_deviation = 0; ret = vn_call_vkGetCalibratedTimestampsEXT( dev->primary_ring, device, 1, &pTimestampInfos[domain], &pTimestamps[domain], &device_max_deviation); if (ret != VK_SUCCESS) return vn_error(dev->instance, ret); max_clock_period = MAX2(max_clock_period, device_max_deviation); break; } case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT: pTimestamps[domain] = vk_clock_gettime(CLOCK_MONOTONIC); max_clock_period = MAX2(max_clock_period, 1); break; #ifdef CLOCK_MONOTONIC_RAW case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT: pTimestamps[domain] = begin; break; #endif default: pTimestamps[domain] = 0; break; } } #ifdef CLOCK_MONOTONIC_RAW end = vk_clock_gettime(CLOCK_MONOTONIC_RAW); #else end = vk_clock_gettime(CLOCK_MONOTONIC); #endif *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period); return VK_SUCCESS; }