/* * Copyright © Microsoft Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "dzn_private.h" #include "vk_alloc.h" #include "vk_common_entrypoints.h" #include "vk_cmd_enqueue_entrypoints.h" #include "vk_debug_report.h" #include "vk_format.h" #include "vk_sync_dummy.h" #include "vk_util.h" #include "git_sha1.h" #include "util/u_debug.h" #include "util/disk_cache.h" #include "util/macros.h" #include "util/mesa-sha1.h" #include "util/u_dl.h" #include "util/driconf.h" #include "glsl_types.h" #include "dxil_validator.h" #include "git_sha1.h" #include #include #include #include #ifdef _WIN32 #include #include #include "dzn_dxgi.h" #endif #include #define DZN_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION) #define MAX_TIER2_MEMORY_TYPES 4 const VkExternalMemoryHandleTypeFlags opaque_external_flag = #ifdef _WIN32 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; #else VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; #endif static const struct vk_instance_extension_table instance_extensions = { .KHR_get_physical_device_properties2 = true, .KHR_device_group_creation = true, #ifdef DZN_USE_WSI_PLATFORM .KHR_surface = true, .KHR_get_surface_capabilities2 = true, #endif #ifdef VK_USE_PLATFORM_WIN32_KHR .KHR_win32_surface = true, #endif #ifdef VK_USE_PLATFORM_XCB_KHR .KHR_xcb_surface = true, #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR .KHR_wayland_surface = true, #endif #ifdef VK_USE_PLATFORM_XLIB_KHR .KHR_xlib_surface = true, #endif #ifndef VK_USE_PLATFORM_WIN32_KHR .EXT_headless_surface = true, #endif .EXT_debug_report = true, .EXT_debug_utils = true, }; static void dzn_physical_device_get_extensions(struct dzn_physical_device *pdev) { pdev->vk.supported_extensions = (struct vk_device_extension_table) { .KHR_16bit_storage = pdev->options4.Native16BitShaderOpsSupported, .KHR_bind_memory2 = true, .KHR_buffer_device_address = pdev->shader_model >= D3D_SHADER_MODEL_6_6, .KHR_create_renderpass2 = true, .KHR_dedicated_allocation = true, .KHR_depth_stencil_resolve = true, .KHR_descriptor_update_template = true, .KHR_device_group = true, .KHR_draw_indirect_count = true, .KHR_driver_properties = true, .KHR_dynamic_rendering = true, .KHR_external_memory = true, .KHR_external_semaphore = true, #ifdef _WIN32 .KHR_external_memory_win32 = true, .KHR_external_semaphore_win32 = true, #else .KHR_external_memory_fd = true, .KHR_external_semaphore_fd = true, #endif .KHR_image_format_list = true, .KHR_imageless_framebuffer = true, .KHR_get_memory_requirements2 = true, .KHR_maintenance1 = true, .KHR_maintenance2 = true, .KHR_maintenance3 = true, .KHR_multiview = true, .KHR_relaxed_block_layout = true, .KHR_sampler_mirror_clamp_to_edge = true, .KHR_separate_depth_stencil_layouts = true, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, .KHR_shader_float16_int8 = pdev->options4.Native16BitShaderOpsSupported, .KHR_shader_float_controls = true, .KHR_shader_integer_dot_product = true, .KHR_spirv_1_4 = true, .KHR_storage_buffer_storage_class = true, #ifdef DZN_USE_WSI_PLATFORM .KHR_swapchain = true, #endif .KHR_synchronization2 = true, .KHR_timeline_semaphore = true, .KHR_uniform_buffer_standard_layout = true, .EXT_buffer_device_address = pdev->shader_model >= D3D_SHADER_MODEL_6_6, .EXT_descriptor_indexing = pdev->shader_model >= D3D_SHADER_MODEL_6_6, #if defined(_WIN32) .EXT_external_memory_host = pdev->dev13, #endif .EXT_scalar_block_layout = true, .EXT_separate_stencil_usage = true, .EXT_shader_replicated_composites = true, .EXT_shader_subgroup_ballot = true, .EXT_shader_subgroup_vote = true, .EXT_subgroup_size_control = true, .EXT_vertex_attribute_divisor = true, .MSFT_layered_driver = true, }; } VKAPI_ATTR VkResult VKAPI_CALL dzn_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount, VkExtensionProperties *pProperties) { /* We don't support any layers */ if (pLayerName) return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); return vk_enumerate_instance_extension_properties( &instance_extensions, pPropertyCount, pProperties); } static const struct debug_control dzn_debug_options[] = { { "sync", DZN_DEBUG_SYNC }, { "nir", DZN_DEBUG_NIR }, { "dxil", DZN_DEBUG_DXIL }, { "warp", DZN_DEBUG_WARP }, { "internal", DZN_DEBUG_INTERNAL }, { "signature", DZN_DEBUG_SIG }, { "gbv", DZN_DEBUG_GBV }, { "d3d12", DZN_DEBUG_D3D12 }, { "debugger", DZN_DEBUG_DEBUGGER }, { "redirects", DZN_DEBUG_REDIRECTS }, { "bindless", DZN_DEBUG_BINDLESS }, { "nobindless", DZN_DEBUG_NO_BINDLESS }, { "experimental", DZN_DEBUG_EXPERIMENTAL }, { "multiview", DZN_DEBUG_MULTIVIEW }, { NULL, 0 } }; static void dzn_physical_device_destroy(struct vk_physical_device *physical) { struct dzn_physical_device *pdev = container_of(physical, struct dzn_physical_device, vk); struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); if (pdev->dev) ID3D12Device1_Release(pdev->dev); if (pdev->dev10) ID3D12Device1_Release(pdev->dev10); if (pdev->dev11) ID3D12Device1_Release(pdev->dev11); if (pdev->dev12) ID3D12Device1_Release(pdev->dev12); if (pdev->dev13) ID3D12Device1_Release(pdev->dev13); if (pdev->adapter) IUnknown_Release(pdev->adapter); dzn_wsi_finish(pdev); vk_physical_device_finish(&pdev->vk); vk_free(&instance->vk.alloc, pdev); } static void dzn_instance_destroy(struct dzn_instance *instance, const VkAllocationCallbacks *alloc) { if (!instance) return; vk_instance_finish(&instance->vk); #ifdef _WIN32 dxil_destroy_validator(instance->dxil_validator); #endif if (instance->factory) ID3D12DeviceFactory_Release(instance->factory); if (instance->d3d12_mod) util_dl_close(instance->d3d12_mod); vk_free2(vk_default_allocator(), alloc, instance); } #ifdef _WIN32 extern IMAGE_DOS_HEADER __ImageBase; static const char * try_find_d3d12core_next_to_self(char *path, size_t path_arr_size) { uint32_t path_size = GetModuleFileNameA((HINSTANCE)&__ImageBase, path, path_arr_size); if (!path_arr_size || path_size == path_arr_size) { mesa_loge("Unable to get path to self\n"); return NULL; } char *last_slash = strrchr(path, '\\'); if (!last_slash) { mesa_loge("Unable to get path to self\n"); return NULL; } *(last_slash + 1) = '\0'; if (strcat_s(path, path_arr_size, "D3D12Core.dll") != 0) { mesa_loge("Unable to get path to D3D12Core.dll next to self\n"); return NULL; } if (GetFileAttributesA(path) == INVALID_FILE_ATTRIBUTES) { return NULL; } *(last_slash + 1) = '\0'; return path; } #endif static ID3D12DeviceFactory * try_create_device_factory(struct util_dl_library *d3d12_mod) { /* A device factory allows us to isolate things like debug layer enablement from other callers, * and can potentially even refer to a different D3D12 redist implementation from others. */ ID3D12DeviceFactory *factory = NULL; PFN_D3D12_GET_INTERFACE D3D12GetInterface = (PFN_D3D12_GET_INTERFACE)util_dl_get_proc_address(d3d12_mod, "D3D12GetInterface"); if (!D3D12GetInterface) { mesa_loge("Failed to retrieve D3D12GetInterface\n"); return NULL; } #ifdef _WIN32 /* First, try to create a device factory from a DLL-parallel D3D12Core.dll */ ID3D12SDKConfiguration *sdk_config = NULL; if (SUCCEEDED(D3D12GetInterface(&CLSID_D3D12SDKConfiguration, &IID_ID3D12SDKConfiguration, (void **)&sdk_config))) { ID3D12SDKConfiguration1 *sdk_config1 = NULL; if (SUCCEEDED(IUnknown_QueryInterface(sdk_config, &IID_ID3D12SDKConfiguration1, (void **)&sdk_config1))) { char self_path[MAX_PATH]; const char *d3d12core_path = try_find_d3d12core_next_to_self(self_path, sizeof(self_path)); if (d3d12core_path) { if (SUCCEEDED(ID3D12SDKConfiguration1_CreateDeviceFactory(sdk_config1, D3D12_PREVIEW_SDK_VERSION, d3d12core_path, &IID_ID3D12DeviceFactory, (void **)&factory)) || SUCCEEDED(ID3D12SDKConfiguration1_CreateDeviceFactory(sdk_config1, D3D12_SDK_VERSION, d3d12core_path, &IID_ID3D12DeviceFactory, (void **)&factory))) { ID3D12SDKConfiguration_Release(sdk_config); ID3D12SDKConfiguration1_Release(sdk_config1); return factory; } } /* Nope, seems we don't have a matching D3D12Core.dll next to ourselves */ ID3D12SDKConfiguration1_Release(sdk_config1); } /* It's possible there's a D3D12Core.dll next to the .exe, for development/testing purposes. If so, we'll be notified * by environment variables what the relative path is and the version to use. */ const char *d3d12core_relative_path = getenv("DZN_AGILITY_RELATIVE_PATH"); const char *d3d12core_sdk_version = getenv("DZN_AGILITY_SDK_VERSION"); if (d3d12core_relative_path && d3d12core_sdk_version) { ID3D12SDKConfiguration_SetSDKVersion(sdk_config, atoi(d3d12core_sdk_version), d3d12core_relative_path); } ID3D12SDKConfiguration_Release(sdk_config); } #endif (void)D3D12GetInterface(&CLSID_D3D12DeviceFactory, &IID_ID3D12DeviceFactory, (void **)&factory); return factory; } VKAPI_ATTR void VKAPI_CALL dzn_DestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator) { dzn_instance_destroy(dzn_instance_from_handle(instance), pAllocator); } static void dzn_physical_device_init_uuids(struct dzn_physical_device *pdev) { const char *mesa_version = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1; struct mesa_sha1 sha1_ctx; uint8_t sha1[SHA1_DIGEST_LENGTH]; STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); /* The pipeline cache UUID is used for determining when a pipeline cache is * invalid. Our cache is device-agnostic, but it does depend on the features * provided by the D3D12 driver, so let's hash the build ID plus some * caps that might impact our NIR lowering passes. */ _mesa_sha1_init(&sha1_ctx); _mesa_sha1_update(&sha1_ctx, mesa_version, strlen(mesa_version)); disk_cache_get_function_identifier(dzn_physical_device_init_uuids, &sha1_ctx); _mesa_sha1_update(&sha1_ctx, &pdev->options, offsetof(struct dzn_physical_device, options21) + sizeof(pdev->options21) - offsetof(struct dzn_physical_device, options)); _mesa_sha1_final(&sha1_ctx, sha1); memcpy(pdev->pipeline_cache_uuid, sha1, VK_UUID_SIZE); /* The driver UUID is used for determining sharability of images and memory * between two Vulkan instances in separate processes. People who want to * share memory need to also check the device UUID (below) so all this * needs to be is the build-id. */ _mesa_sha1_compute(mesa_version, strlen(mesa_version), sha1); memcpy(pdev->driver_uuid, sha1, VK_UUID_SIZE); /* The device UUID uniquely identifies the given device within the machine. */ _mesa_sha1_init(&sha1_ctx); _mesa_sha1_update(&sha1_ctx, &pdev->desc.vendor_id, sizeof(pdev->desc.vendor_id)); _mesa_sha1_update(&sha1_ctx, &pdev->desc.device_id, sizeof(pdev->desc.device_id)); _mesa_sha1_update(&sha1_ctx, &pdev->desc.subsys_id, sizeof(pdev->desc.subsys_id)); _mesa_sha1_update(&sha1_ctx, &pdev->desc.revision, sizeof(pdev->desc.revision)); _mesa_sha1_final(&sha1_ctx, sha1); memcpy(pdev->device_uuid, sha1, VK_UUID_SIZE); } const struct vk_pipeline_cache_object_ops *const dzn_pipeline_cache_import_ops[] = { &dzn_cached_blob_ops, NULL, }; static void dzn_physical_device_cache_caps(struct dzn_physical_device *pdev) { D3D_FEATURE_LEVEL checklist[] = { D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_2, }; D3D12_FEATURE_DATA_FEATURE_LEVELS levels = { .NumFeatureLevels = ARRAY_SIZE(checklist), .pFeatureLevelsRequested = checklist, }; ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); pdev->feature_level = levels.MaxSupportedFeatureLevel; static const D3D_SHADER_MODEL valid_shader_models[] = { D3D_SHADER_MODEL_6_8 ,D3D_SHADER_MODEL_6_7, D3D_SHADER_MODEL_6_6, D3D_SHADER_MODEL_6_5, D3D_SHADER_MODEL_6_4, D3D_SHADER_MODEL_6_3, D3D_SHADER_MODEL_6_2, D3D_SHADER_MODEL_6_1, }; for (UINT i = 0; i < ARRAY_SIZE(valid_shader_models); ++i) { D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { valid_shader_models[i] }; if (SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)))) { pdev->shader_model = shader_model.HighestShaderModel; break; } } D3D_ROOT_SIGNATURE_VERSION root_sig_versions[] = { D3D_ROOT_SIGNATURE_VERSION_1_2, D3D_ROOT_SIGNATURE_VERSION_1_1 }; for (UINT i = 0; i < ARRAY_SIZE(root_sig_versions); ++i) { D3D12_FEATURE_DATA_ROOT_SIGNATURE root_sig = { root_sig_versions[i] }; if (SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ROOT_SIGNATURE, &root_sig, sizeof(root_sig)))) { pdev->root_sig_version = root_sig.HighestVersion; break; } } ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_ARCHITECTURE1, &pdev->architecture, sizeof(pdev->architecture)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS, &pdev->options, sizeof(pdev->options)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS1, &pdev->options1, sizeof(pdev->options1)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS2, &pdev->options2, sizeof(pdev->options2)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS3, &pdev->options3, sizeof(pdev->options3)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS4, &pdev->options4, sizeof(pdev->options4)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS12, &pdev->options12, sizeof(pdev->options12)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS13, &pdev->options13, sizeof(pdev->options13)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS14, &pdev->options14, sizeof(pdev->options14)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS15, &pdev->options15, sizeof(pdev->options15)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS16, &pdev->options16, sizeof(pdev->options16)); ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS17, &pdev->options17, sizeof(pdev->options17)); if (FAILED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS19, &pdev->options19, sizeof(pdev->options19)))) { pdev->options19.MaxSamplerDescriptorHeapSize = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; pdev->options19.MaxSamplerDescriptorHeapSizeWithStaticSamplers = pdev->options19.MaxSamplerDescriptorHeapSize; pdev->options19.MaxViewDescriptorHeapSize = D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1; } if (FAILED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_D3D12_OPTIONS21, &pdev->options21, sizeof(pdev->options21)))) { pdev->options21.ExecuteIndirectTier = D3D12_EXECUTE_INDIRECT_TIER_1_0; } { D3D12_FEATURE_DATA_FORMAT_SUPPORT a4b4g4r4_support = { .Format = DXGI_FORMAT_A4B4G4R4_UNORM }; pdev->support_a4b4g4r4 = SUCCEEDED(ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT, &a4b4g4r4_support, sizeof(a4b4g4r4_support))); } pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) { .props = { .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 4, .timestampValidBits = 64, .minImageTransferGranularity = { 0, 0, 0 }, }, .desc = { .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, }, }; pdev->queue_families[pdev->queue_family_count++] = (struct dzn_queue_family) { .props = { .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, .queueCount = 8, .timestampValidBits = 64, .minImageTransferGranularity = { 0, 0, 0 }, }, .desc = { .Type = D3D12_COMMAND_LIST_TYPE_COMPUTE, }, }; assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families)); D3D12_COMMAND_QUEUE_DESC queue_desc = { .Type = D3D12_COMMAND_LIST_TYPE_DIRECT, .Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, .NodeMask = 0, }; ID3D12CommandQueue *cmdqueue; ID3D12Device1_CreateCommandQueue(pdev->dev, &queue_desc, &IID_ID3D12CommandQueue, (void **)&cmdqueue); uint64_t ts_freq; ID3D12CommandQueue_GetTimestampFrequency(cmdqueue, &ts_freq); pdev->timestamp_period = 1000000000.0f / ts_freq; ID3D12CommandQueue_Release(cmdqueue); } static void dzn_physical_device_init_memory(struct dzn_physical_device *pdev) { VkPhysicalDeviceMemoryProperties *mem = &pdev->memory; /* For each pair of elements X and Y returned in memoryTypes, X must be placed at a lower index position than Y if: * - the set of bit flags returned in the propertyFlags member of X is a strict subset of the set of bit flags * returned in the propertyFlags member of Y; or * - the propertyFlags members of X and Y are equal, and X belongs to a memory heap with greater performance * (as determined in an implementation-specific manner) ; or * - the propertyFlags members of Y includes VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD or * VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD and X does not * See: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceMemoryProperties.html */ mem->memoryHeapCount = 0; mem->memoryTypeCount = 0; VkMemoryPropertyFlags ram_device_local_property = 0; VkMemoryHeapFlags ram_device_local_heap_flag = 0; if (pdev->architecture.UMA) { /* All memory is considered device-local for UMA even though it's just RAM */ ram_device_local_property = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; ram_device_local_heap_flag = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; } mem->memoryHeaps[mem->memoryHeapCount++] = (VkMemoryHeap) { .size = pdev->desc.shared_system_memory, .flags = ram_device_local_heap_flag, }; /* Three non-device-local memory types: host non-visible, host write-combined, and host cached */ mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){ .propertyFlags = ram_device_local_property, .heapIndex = mem->memoryHeapCount - 1, }; mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){ .propertyFlags = ram_device_local_property | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .heapIndex = mem->memoryHeapCount - 1, }; mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType) { .propertyFlags = ram_device_local_property | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, .heapIndex = mem->memoryHeapCount - 1, }; if (!pdev->architecture.UMA) { /* Add a device-local memory heap/type */ mem->memoryHeaps[mem->memoryHeapCount++] = (VkMemoryHeap){ .size = pdev->desc.dedicated_video_memory, .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, }; mem->memoryTypes[mem->memoryTypeCount++] = (VkMemoryType){ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, .heapIndex = mem->memoryHeapCount - 1, }; } assert(mem->memoryTypeCount <= MAX_TIER2_MEMORY_TYPES); if (pdev->options.ResourceHeapTier == D3D12_RESOURCE_HEAP_TIER_1) { unsigned oldMemoryTypeCount = mem->memoryTypeCount; VkMemoryType oldMemoryTypes[MAX_TIER2_MEMORY_TYPES]; memcpy(oldMemoryTypes, mem->memoryTypes, oldMemoryTypeCount * sizeof(VkMemoryType)); mem->memoryTypeCount = 0; for (unsigned oldMemoryTypeIdx = 0; oldMemoryTypeIdx < oldMemoryTypeCount; ++oldMemoryTypeIdx) { D3D12_HEAP_FLAGS flags[] = { D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS, D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES, /* Note: Vulkan requires *all* images to come from the same memory type as long as * the tiling property (and a few other misc properties) are the same. So, this * non-RT/DS texture flag will only be used for TILING_LINEAR textures, which * can't be render targets. */ D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES }; for (int i = 0; i < ARRAY_SIZE(flags); ++i) { D3D12_HEAP_FLAGS flag = flags[i]; pdev->heap_flags_for_mem_type[mem->memoryTypeCount] = flag; mem->memoryTypes[mem->memoryTypeCount] = oldMemoryTypes[oldMemoryTypeIdx]; mem->memoryTypeCount++; } } } } static D3D12_HEAP_FLAGS dzn_physical_device_get_heap_flags_for_mem_type(const struct dzn_physical_device *pdev, uint32_t mem_type) { return pdev->heap_flags_for_mem_type[mem_type]; } uint32_t dzn_physical_device_get_mem_type_mask_for_resource(const struct dzn_physical_device *pdev, const D3D12_RESOURCE_DESC *desc, bool shared) { if (pdev->options.ResourceHeapTier > D3D12_RESOURCE_HEAP_TIER_1 && !shared) return (1u << pdev->memory.memoryTypeCount) - 1; D3D12_HEAP_FLAGS deny_flag = D3D12_HEAP_FLAG_NONE; if (pdev->options.ResourceHeapTier <= D3D12_RESOURCE_HEAP_TIER_1) { if (desc->Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) deny_flag = D3D12_HEAP_FLAG_DENY_BUFFERS; else if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) deny_flag = D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES; else deny_flag = D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES; } uint32_t mask = 0; for (unsigned i = 0; i < pdev->memory.memoryTypeCount; ++i) { if (shared && (pdev->memory.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) continue; if ((pdev->heap_flags_for_mem_type[i] & deny_flag) == D3D12_HEAP_FLAG_NONE) mask |= (1 << i); } return mask; } static uint32_t dzn_physical_device_get_max_mip_level(bool is_3d) { return is_3d ? 11 : 14; } static uint32_t dzn_physical_device_get_max_extent(bool is_3d) { uint32_t max_mip = dzn_physical_device_get_max_mip_level(is_3d); return 1 << max_mip; } static uint32_t dzn_physical_device_get_max_array_layers() { return dzn_physical_device_get_max_extent(false); } static void dzn_physical_device_get_features(const struct dzn_physical_device *pdev, struct vk_features *features) { struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); bool support_descriptor_indexing = pdev->shader_model >= D3D_SHADER_MODEL_6_6 && !(instance->debug_flags & DZN_DEBUG_NO_BINDLESS); bool support_8bit = driQueryOptionb(&instance->dri_options, "dzn_enable_8bit_loads_stores") && pdev->options4.Native16BitShaderOpsSupported; *features = (struct vk_features) { .robustBufferAccess = true, /* This feature is mandatory */ .fullDrawIndexUint32 = false, .imageCubeArray = true, .independentBlend = true, .geometryShader = true, .tessellationShader = false, .sampleRateShading = true, .dualSrcBlend = false, .logicOp = false, .multiDrawIndirect = true, .drawIndirectFirstInstance = true, .depthClamp = true, .depthBiasClamp = true, .fillModeNonSolid = true, .depthBounds = pdev->options2.DepthBoundsTestSupported, .wideLines = driQueryOptionb(&instance->dri_options, "dzn_claim_wide_lines"), .largePoints = false, .alphaToOne = false, .multiViewport = false, .samplerAnisotropy = true, .textureCompressionETC2 = false, .textureCompressionASTC_LDR = false, .textureCompressionBC = true, .occlusionQueryPrecise = true, .pipelineStatisticsQuery = true, .vertexPipelineStoresAndAtomics = true, .fragmentStoresAndAtomics = true, .shaderTessellationAndGeometryPointSize = false, .shaderImageGatherExtended = true, .shaderStorageImageExtendedFormats = pdev->options.TypedUAVLoadAdditionalFormats, .shaderStorageImageMultisample = false, .shaderStorageImageReadWithoutFormat = true, .shaderStorageImageWriteWithoutFormat = true, .shaderUniformBufferArrayDynamicIndexing = true, .shaderSampledImageArrayDynamicIndexing = true, .shaderStorageBufferArrayDynamicIndexing = true, .shaderStorageImageArrayDynamicIndexing = true, .shaderClipDistance = true, .shaderCullDistance = true, .shaderFloat64 = pdev->options.DoublePrecisionFloatShaderOps, .shaderInt64 = pdev->options1.Int64ShaderOps, .shaderInt16 = pdev->options4.Native16BitShaderOpsSupported, .shaderResourceResidency = false, .shaderResourceMinLod = false, .sparseBinding = false, .sparseResidencyBuffer = false, .sparseResidencyImage2D = false, .sparseResidencyImage3D = false, .sparseResidency2Samples = false, .sparseResidency4Samples = false, .sparseResidency8Samples = false, .sparseResidency16Samples = false, .sparseResidencyAliased = false, .variableMultisampleRate = false, .inheritedQueries = false, .storageBuffer16BitAccess = pdev->options4.Native16BitShaderOpsSupported, .uniformAndStorageBuffer16BitAccess = pdev->options4.Native16BitShaderOpsSupported, .storagePushConstant16 = false, .storageInputOutput16 = false, .multiview = true, .multiviewGeometryShader = true, .multiviewTessellationShader = false, .variablePointersStorageBuffer = false, .variablePointers = false, .protectedMemory = false, .samplerYcbcrConversion = false, .shaderDrawParameters = true, .samplerMirrorClampToEdge = true, .drawIndirectCount = true, .storageBuffer8BitAccess = support_8bit, .uniformAndStorageBuffer8BitAccess = support_8bit, .storagePushConstant8 = support_8bit, .shaderBufferInt64Atomics = false, .shaderSharedInt64Atomics = false, .shaderFloat16 = pdev->options4.Native16BitShaderOpsSupported, .shaderInt8 = support_8bit, .descriptorIndexing = support_descriptor_indexing, .shaderInputAttachmentArrayDynamicIndexing = true, .shaderUniformTexelBufferArrayDynamicIndexing = true, .shaderStorageTexelBufferArrayDynamicIndexing = true, .shaderUniformBufferArrayNonUniformIndexing = support_descriptor_indexing, .shaderSampledImageArrayNonUniformIndexing = support_descriptor_indexing, .shaderStorageBufferArrayNonUniformIndexing = support_descriptor_indexing, .shaderStorageImageArrayNonUniformIndexing = support_descriptor_indexing, .shaderInputAttachmentArrayNonUniformIndexing = support_descriptor_indexing, .shaderUniformTexelBufferArrayNonUniformIndexing = support_descriptor_indexing, .shaderStorageTexelBufferArrayNonUniformIndexing = support_descriptor_indexing, .descriptorBindingUniformBufferUpdateAfterBind = support_descriptor_indexing, .descriptorBindingSampledImageUpdateAfterBind = support_descriptor_indexing, .descriptorBindingStorageImageUpdateAfterBind = support_descriptor_indexing, .descriptorBindingStorageBufferUpdateAfterBind = support_descriptor_indexing, .descriptorBindingUniformTexelBufferUpdateAfterBind = support_descriptor_indexing, .descriptorBindingStorageTexelBufferUpdateAfterBind = support_descriptor_indexing, .descriptorBindingUpdateUnusedWhilePending = support_descriptor_indexing, .descriptorBindingPartiallyBound = support_descriptor_indexing, .descriptorBindingVariableDescriptorCount = support_descriptor_indexing, .runtimeDescriptorArray = support_descriptor_indexing, .samplerFilterMinmax = false, .scalarBlockLayout = true, .imagelessFramebuffer = true, .uniformBufferStandardLayout = true, .shaderSubgroupExtendedTypes = true, .separateDepthStencilLayouts = true, .hostQueryReset = true, .timelineSemaphore = true, .bufferDeviceAddress = pdev->shader_model >= D3D_SHADER_MODEL_6_6, .bufferDeviceAddressCaptureReplay = false, .bufferDeviceAddressMultiDevice = false, .vulkanMemoryModel = false, .vulkanMemoryModelDeviceScope = false, .vulkanMemoryModelAvailabilityVisibilityChains = false, .shaderOutputViewportIndex = false, .shaderOutputLayer = false, .subgroupBroadcastDynamicId = true, .robustImageAccess = false, .inlineUniformBlock = false, .descriptorBindingInlineUniformBlockUpdateAfterBind = false, .pipelineCreationCacheControl = false, .privateData = true, .shaderDemoteToHelperInvocation = false, .shaderTerminateInvocation = false, .subgroupSizeControl = pdev->options1.WaveOps && pdev->shader_model >= D3D_SHADER_MODEL_6_6, .computeFullSubgroups = true, .synchronization2 = true, .textureCompressionASTC_HDR = false, .shaderZeroInitializeWorkgroupMemory = false, .dynamicRendering = true, .shaderIntegerDotProduct = true, .maintenance4 = false, .shaderExpectAssume = true, .vertexAttributeInstanceRateDivisor = true, .vertexAttributeInstanceRateZeroDivisor = true, .shaderReplicatedComposites = true, }; } static void dzn_physical_device_get_properties(const struct dzn_physical_device *pdev, struct vk_properties *properties) { /* minimum from the D3D and Vulkan specs */ const VkSampleCountFlags supported_sample_counts = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT; VkPhysicalDeviceType devtype = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; if (pdev->desc.is_warp) devtype = VK_PHYSICAL_DEVICE_TYPE_CPU; else if (!pdev->architecture.UMA) { devtype = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; } *properties = (struct vk_properties){ .apiVersion = DZN_API_VERSION, .driverVersion = vk_get_driver_version(), .vendorID = pdev->desc.vendor_id, .deviceID = pdev->desc.device_id, .deviceType = devtype, /* Limits */ .maxImageDimension1D = D3D12_REQ_TEXTURE1D_U_DIMENSION, .maxImageDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, .maxImageDimension3D = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION, .maxImageDimensionCube = D3D12_REQ_TEXTURECUBE_DIMENSION, .maxImageArrayLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, /* from here on, we simply use the minimum values from the spec for now */ .maxTexelBufferElements = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP, .maxUniformBufferRange = D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * D3D12_STANDARD_VECTOR_SIZE * sizeof(float), .maxStorageBufferRange = 1 << D3D12_REQ_BUFFER_RESOURCE_TEXEL_COUNT_2_TO_EXP, .maxPushConstantsSize = 128, .maxMemoryAllocationCount = 4096, .maxSamplerAllocationCount = 4000, .bufferImageGranularity = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, .sparseAddressSpaceSize = 0, .maxBoundDescriptorSets = MAX_SETS, .maxPerStageDescriptorSamplers = pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 16u : MAX_DESCS_PER_SAMPLER_HEAP, .maxPerStageDescriptorUniformBuffers = pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 14u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorStorageBuffers = pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 64u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorSampledImages = pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 128u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorStorageImages = pdev->options.ResourceBindingTier <= D3D12_RESOURCE_BINDING_TIER_2 ? 64u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorInputAttachments = pdev->options.ResourceBindingTier == D3D12_RESOURCE_BINDING_TIER_1 ? 128u : MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageResources = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetSamplers = MAX_DESCS_PER_SAMPLER_HEAP, .maxDescriptorSetUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, .maxDescriptorSetStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, .maxDescriptorSetSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxVertexInputAttributes = MIN2(D3D12_STANDARD_VERTEX_ELEMENT_COUNT, MAX_VERTEX_GENERIC_ATTRIBS), .maxVertexInputBindings = MAX_VBS, .maxVertexInputAttributeOffset = D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES - 1, .maxVertexInputBindingStride = D3D12_REQ_MULTI_ELEMENT_STRUCTURE_SIZE_IN_BYTES, .maxVertexOutputComponents = D3D12_VS_OUTPUT_REGISTER_COUNT * D3D12_VS_OUTPUT_REGISTER_COMPONENTS, .maxTessellationGenerationLevel = 0, .maxTessellationPatchSize = 0, .maxTessellationControlPerVertexInputComponents = 0, .maxTessellationControlPerVertexOutputComponents = 0, .maxTessellationControlPerPatchOutputComponents = 0, .maxTessellationControlTotalOutputComponents = 0, .maxTessellationEvaluationInputComponents = 0, .maxTessellationEvaluationOutputComponents = 0, .maxGeometryShaderInvocations = D3D12_GS_MAX_INSTANCE_COUNT, .maxGeometryInputComponents = D3D12_GS_INPUT_REGISTER_COUNT * D3D12_GS_INPUT_REGISTER_COMPONENTS, .maxGeometryOutputComponents = D3D12_GS_OUTPUT_REGISTER_COUNT * D3D12_GS_OUTPUT_REGISTER_COMPONENTS, .maxGeometryOutputVertices = D3D12_GS_MAX_OUTPUT_VERTEX_COUNT_ACROSS_INSTANCES, .maxGeometryTotalOutputComponents = D3D12_REQ_GS_INVOCATION_32BIT_OUTPUT_COMPONENT_LIMIT, .maxFragmentInputComponents = D3D12_PS_INPUT_REGISTER_COUNT * D3D12_PS_INPUT_REGISTER_COMPONENTS, .maxFragmentOutputAttachments = D3D12_PS_OUTPUT_REGISTER_COUNT, .maxFragmentDualSrcAttachments = 0, .maxFragmentCombinedOutputResources = D3D12_PS_OUTPUT_REGISTER_COUNT, .maxComputeSharedMemorySize = D3D12_CS_TGSM_REGISTER_COUNT * sizeof(float), .maxComputeWorkGroupCount = { D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION }, .maxComputeWorkGroupInvocations = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP, .maxComputeWorkGroupSize = { D3D12_CS_THREAD_GROUP_MAX_X, D3D12_CS_THREAD_GROUP_MAX_Y, D3D12_CS_THREAD_GROUP_MAX_Z }, .subPixelPrecisionBits = D3D12_SUBPIXEL_FRACTIONAL_BIT_COUNT, .subTexelPrecisionBits = D3D12_SUBTEXEL_FRACTIONAL_BIT_COUNT, .mipmapPrecisionBits = D3D12_MIP_LOD_FRACTIONAL_BIT_COUNT, .maxDrawIndexedIndexValue = 0x00ffffff, .maxDrawIndirectCount = UINT32_MAX, .maxSamplerLodBias = D3D12_MIP_LOD_BIAS_MAX, .maxSamplerAnisotropy = D3D12_REQ_MAXANISOTROPY, .maxViewports = MAX_VP, .maxViewportDimensions = { D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION }, .viewportBoundsRange = { D3D12_VIEWPORT_BOUNDS_MIN, D3D12_VIEWPORT_BOUNDS_MAX }, .viewportSubPixelBits = 0, .minMemoryMapAlignment = 64, .minTexelBufferOffsetAlignment = 32, .minUniformBufferOffsetAlignment = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, .minStorageBufferOffsetAlignment = D3D12_RAW_UAV_SRV_BYTE_ALIGNMENT, .minTexelOffset = D3D12_COMMONSHADER_TEXEL_OFFSET_MAX_NEGATIVE, .maxTexelOffset = D3D12_COMMONSHADER_TEXEL_OFFSET_MAX_POSITIVE, .minTexelGatherOffset = -32, .maxTexelGatherOffset = 31, .minInterpolationOffset = -0.5f, .maxInterpolationOffset = 0.5f, .subPixelInterpolationOffsetBits = 4, .maxFramebufferWidth = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, .maxFramebufferHeight = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION, .maxFramebufferLayers = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, .framebufferColorSampleCounts = supported_sample_counts, .framebufferDepthSampleCounts = supported_sample_counts, .framebufferStencilSampleCounts = supported_sample_counts, .framebufferNoAttachmentsSampleCounts = supported_sample_counts, .maxColorAttachments = MAX_RTS, .sampledImageColorSampleCounts = supported_sample_counts, .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, .sampledImageDepthSampleCounts = supported_sample_counts, .sampledImageStencilSampleCounts = supported_sample_counts, .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, .maxSampleMaskWords = 1, .timestampComputeAndGraphics = true, .timestampPeriod = pdev->timestamp_period, .maxClipDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT, .maxCullDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT, .maxCombinedClipAndCullDistances = D3D12_CLIP_OR_CULL_DISTANCE_COUNT, .discreteQueuePriorities = 2, .pointSizeRange = { 1.0f, 1.0f }, .lineWidthRange = { 1.0f, 1.0f }, .pointSizeGranularity = 0.0f, .lineWidthGranularity = 0.0f, .strictLines = 0, .standardSampleLocations = true, .optimalBufferCopyOffsetAlignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, .optimalBufferCopyRowPitchAlignment = D3D12_TEXTURE_DATA_PITCH_ALIGNMENT, .nonCoherentAtomSize = 256, /* Core 1.1 */ .deviceLUIDValid = true, .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES, .maxMultiviewViewCount = 6, .maxMultiviewInstanceIndex = UINT_MAX, .protectedNoFault = false, /* Vulkan 1.1 wants this value to be at least 1024. Let's stick to this * minimum requirement for now, and hope the total number of samplers * across all descriptor sets doesn't exceed 2048, otherwise we'd exceed * the maximum number of samplers per heap. For any descriptor set * containing more than 1024 descriptors, * vkGetDescriptorSetLayoutSupport() can be called to determine if the * layout is within D3D12 descriptor heap bounds. */ .maxPerSetDescriptors = 1024, /* According to the spec, the maximum D3D12 resource size is * min(max(128MB, 0.25f * (amount of dedicated VRAM)), 2GB), * but the limit actually depends on the max(system_ram, VRAM) not * just the VRAM. */ .maxMemoryAllocationSize = CLAMP(MAX2(pdev->desc.dedicated_video_memory, pdev->desc.dedicated_system_memory + pdev->desc.shared_system_memory) / 4, 128ull * 1024 * 1024, 2ull * 1024 * 1024 * 1024), .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_ARITHMETIC_BIT, .subgroupSupportedStages = VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_VERTEX_BIT, .subgroupQuadOperationsInAllStages = true, .subgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1, /* Core 1.2 */ .driverID = VK_DRIVER_ID_MESA_DOZEN, .conformanceVersion = (VkConformanceVersion){ .major = 0, .minor = 0, .subminor = 0, .patch = 0, }, .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, .shaderSignedZeroInfNanPreserveFloat16 = false, .shaderSignedZeroInfNanPreserveFloat32 = false, .shaderSignedZeroInfNanPreserveFloat64 = false, .shaderDenormPreserveFloat16 = true, .shaderDenormPreserveFloat32 = pdev->shader_model >= D3D_SHADER_MODEL_6_2, .shaderDenormPreserveFloat64 = true, .shaderDenormFlushToZeroFloat16 = false, .shaderDenormFlushToZeroFloat32 = true, .shaderDenormFlushToZeroFloat64 = false, .shaderRoundingModeRTEFloat16 = true, .shaderRoundingModeRTEFloat32 = true, .shaderRoundingModeRTEFloat64 = true, .shaderRoundingModeRTZFloat16 = false, .shaderRoundingModeRTZFloat32 = false, .shaderRoundingModeRTZFloat64 = false, .shaderUniformBufferArrayNonUniformIndexingNative = true, .shaderSampledImageArrayNonUniformIndexingNative = true, .shaderStorageBufferArrayNonUniformIndexingNative = true, .shaderStorageImageArrayNonUniformIndexingNative = true, .shaderInputAttachmentArrayNonUniformIndexingNative = true, .robustBufferAccessUpdateAfterBind = true, .quadDivergentImplicitLod = false, .maxUpdateAfterBindDescriptorsInAllPools = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindSamplers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxPerStageUpdateAfterBindResources = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindSamplers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindUniformBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, .maxDescriptorSetUpdateAfterBindStorageBuffers = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, .maxDescriptorSetUpdateAfterBindSampledImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindStorageImages = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCS_PER_CBV_SRV_UAV_HEAP, .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT, .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT, .independentResolveNone = true, .independentResolve = true, .filterMinmaxSingleComponentFormats = false, .filterMinmaxImageComponentMapping = false, .maxTimelineSemaphoreValueDifference = UINT64_MAX, .framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT, /* Core 1.3 */ .minSubgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1, .maxSubgroupSize = pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMax : 1, .maxComputeWorkgroupSubgroups = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP / (pdev->options1.WaveOps ? pdev->options1.WaveLaneCountMin : 1), .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT, .integerDotProduct4x8BitPackedSignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4, .integerDotProduct4x8BitPackedUnsignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4, .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4, .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdev->shader_model >= D3D_SHADER_MODEL_6_4, /* VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT */ .maxVertexAttribDivisor = UINT32_MAX, /* VkPhysicalDeviceExternalMemoryHostPropertiesEXT */ .minImportedHostPointerAlignment = 65536, /* VkPhysicalDeviceLayeredDriverPropertiesMSFT */ .underlyingAPI = VK_LAYERED_DRIVER_UNDERLYING_API_D3D12_MSFT, }; snprintf(properties->deviceName, sizeof(properties->deviceName), "Microsoft Direct3D12 (%s)", pdev->desc.description); memcpy(properties->pipelineCacheUUID, pdev->pipeline_cache_uuid, VK_UUID_SIZE); memcpy(properties->driverUUID, pdev->driver_uuid, VK_UUID_SIZE); memcpy(properties->deviceUUID, pdev->device_uuid, VK_UUID_SIZE); memcpy(properties->deviceLUID, &pdev->desc.adapter_luid, VK_LUID_SIZE); STATIC_ASSERT(sizeof(pdev->desc.adapter_luid) == sizeof(properties->deviceLUID)); snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "Dozen"); snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); } static VkResult dzn_physical_device_create(struct vk_instance *instance, IUnknown *adapter, const struct dzn_physical_device_desc *desc) { struct dzn_physical_device *pdev = vk_zalloc(&instance->alloc, sizeof(*pdev), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!pdev) return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); struct vk_physical_device_dispatch_table dispatch_table; vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &dzn_physical_device_entrypoints, true); vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false); VkResult result = vk_physical_device_init(&pdev->vk, instance, NULL, NULL, NULL, /* We set up extensions later */ &dispatch_table); if (result != VK_SUCCESS) { vk_free(&instance->alloc, pdev); return result; } pdev->desc = *desc; pdev->adapter = adapter; IUnknown_AddRef(adapter); list_addtail(&pdev->vk.link, &instance->physical_devices.list); vk_warn_non_conformant_implementation("dzn"); struct dzn_instance *dzn_instance = container_of(instance, struct dzn_instance, vk); uint32_t num_sync_types = 0; pdev->sync_types[num_sync_types++] = &dzn_sync_type; pdev->sync_types[num_sync_types++] = &dzn_instance->sync_binary_type.sync; pdev->sync_types[num_sync_types++] = &vk_sync_dummy_type; pdev->sync_types[num_sync_types] = NULL; assert(num_sync_types <= MAX_SYNC_TYPES); pdev->vk.supported_sync_types = pdev->sync_types; pdev->vk.pipeline_cache_import_ops = dzn_pipeline_cache_import_ops; pdev->dev = d3d12_create_device(dzn_instance->d3d12_mod, pdev->adapter, dzn_instance->factory, !dzn_instance->dxil_validator); if (!pdev->dev) { list_del(&pdev->vk.link); dzn_physical_device_destroy(&pdev->vk); return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED); } if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device10, (void **)&pdev->dev10))) pdev->dev10 = NULL; if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device11, (void **)&pdev->dev11))) pdev->dev11 = NULL; if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device12, (void **)&pdev->dev12))) pdev->dev12 = NULL; if (FAILED(ID3D12Device1_QueryInterface(pdev->dev, &IID_ID3D12Device13, (void **)&pdev->dev13))) pdev->dev13 = NULL; dzn_physical_device_cache_caps(pdev); dzn_physical_device_init_memory(pdev); dzn_physical_device_init_uuids(pdev); if (dzn_instance->debug_flags & DZN_DEBUG_MULTIVIEW) pdev->options3.ViewInstancingTier = D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED; dzn_physical_device_get_extensions(pdev); if (driQueryOptionb(&dzn_instance->dri_options, "dzn_enable_8bit_loads_stores") && pdev->options4.Native16BitShaderOpsSupported) pdev->vk.supported_extensions.KHR_8bit_storage = true; if (dzn_instance->debug_flags & DZN_DEBUG_NO_BINDLESS) pdev->vk.supported_extensions.EXT_descriptor_indexing = false; dzn_physical_device_get_features(pdev, &pdev->vk.supported_features); dzn_physical_device_get_properties(pdev, &pdev->vk.properties); result = dzn_wsi_init(pdev); if (result != VK_SUCCESS || !pdev->dev) { list_del(&pdev->vk.link); dzn_physical_device_destroy(&pdev->vk); return result; } return VK_SUCCESS; } static DXGI_FORMAT dzn_get_most_capable_format_for_casting(VkFormat format, VkImageCreateFlags create_flags) { enum pipe_format pfmt = vk_format_to_pipe_format(format); bool block_compressed = util_format_is_compressed(pfmt); if (block_compressed && !(create_flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)) return dzn_image_get_dxgi_format(NULL, format, 0, 0); unsigned blksz = util_format_get_blocksize(pfmt); switch (blksz) { case 1: return DXGI_FORMAT_R8_UNORM; case 2: return DXGI_FORMAT_R16_UNORM; case 4: return DXGI_FORMAT_R32_FLOAT; case 8: return DXGI_FORMAT_R32G32_FLOAT; case 12: return DXGI_FORMAT_R32G32B32_FLOAT; case 16: return DXGI_FORMAT_R32G32B32A32_FLOAT; default: unreachable("Unsupported format bit size");; } } D3D12_FEATURE_DATA_FORMAT_SUPPORT dzn_physical_device_get_format_support(struct dzn_physical_device *pdev, VkFormat format, VkImageCreateFlags create_flags) { VkImageUsageFlags usage = vk_format_is_depth_or_stencil(format) ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT : 0; VkImageAspectFlags aspects = 0; if (vk_format_has_depth(format)) aspects = VK_IMAGE_ASPECT_DEPTH_BIT; if (vk_format_has_stencil(format)) aspects = VK_IMAGE_ASPECT_STENCIL_BIT; D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = { .Format = dzn_image_get_dxgi_format(pdev, format, usage, aspects), }; /* KHR_maintenance2: If an image is created with the extended usage flag * (or if properties are queried with that flag), then if any compatible * format can support a given usage, it should be considered supported. * With the exception of depth, which are limited in their cast set, * we can do this by just picking a single most-capable format to query * the support for, instead of the originally requested format. */ if (aspects == 0 && dfmt_info.Format != DXGI_FORMAT_UNKNOWN && (create_flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)) { dfmt_info.Format = dzn_get_most_capable_format_for_casting(format, create_flags); } ASSERTED HRESULT hres = ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT, &dfmt_info, sizeof(dfmt_info)); assert(!FAILED(hres)); if (usage != VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) return dfmt_info; /* Depth/stencil resources have different format when they're accessed * as textures, query the capabilities for this format too. */ dzn_foreach_aspect(aspect, aspects) { D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info2 = { .Format = dzn_image_get_dxgi_format(pdev, format, 0, aspect), }; hres = ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_FORMAT_SUPPORT, &dfmt_info2, sizeof(dfmt_info2)); assert(!FAILED(hres)); #define DS_SRV_FORMAT_SUPPORT1_MASK \ (D3D12_FORMAT_SUPPORT1_SHADER_LOAD | \ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | \ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | \ D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_MONO_TEXT | \ D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE | \ D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD | \ D3D12_FORMAT_SUPPORT1_SHADER_GATHER | \ D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW | \ D3D12_FORMAT_SUPPORT1_SHADER_GATHER_COMPARISON) dfmt_info.Support1 |= dfmt_info2.Support1 & DS_SRV_FORMAT_SUPPORT1_MASK; dfmt_info.Support2 |= dfmt_info2.Support2; } return dfmt_info; } static void dzn_physical_device_get_format_properties(struct dzn_physical_device *pdev, VkFormat format, VkFormatProperties2 *properties) { D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = dzn_physical_device_get_format_support(pdev, format, 0); VkFormatProperties *base_props = &properties->formatProperties; vk_foreach_struct(ext, properties->pNext) { vk_debug_ignored_stype(ext->sType); } if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) { if (dzn_graphics_pipeline_patch_vi_format(format) != format) *base_props = (VkFormatProperties){ .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT, }; else *base_props = (VkFormatProperties) { 0 }; return; } *base_props = (VkFormatProperties) { .linearTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, .optimalTilingFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, .bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT, }; if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_IA_VERTEX_BUFFER) base_props->bufferFeatures |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; #define TEX_FLAGS (D3D12_FORMAT_SUPPORT1_TEXTURE1D | \ D3D12_FORMAT_SUPPORT1_TEXTURE2D | \ D3D12_FORMAT_SUPPORT1_TEXTURE3D | \ D3D12_FORMAT_SUPPORT1_TEXTURECUBE) if ((dfmt_info.Support1 & TEX_FLAGS) && (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD)) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT; } if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; } if ((dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) && (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW)) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BUFFER) base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT; } #define ATOMIC_FLAGS (D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | \ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | \ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | \ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | \ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | \ D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX) if ((dfmt_info.Support2 & ATOMIC_FLAGS) == ATOMIC_FLAGS) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; base_props->bufferFeatures |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; } if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BUFFER) base_props->bufferFeatures |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; /* Color/depth/stencil attachment cap implies input attachement cap, and input * attachment loads are lowered to texture loads in dozen, hence the requirement * to have shader-load support. */ if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) { if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; } if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_BLENDABLE) base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; if (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) { base_props->optimalTilingFeatures |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT; } } /* B4G4R4A4 support is required, but d3d12 doesn't support it. The needed * d3d12 format would be A4R4G4B4. We map this format to d3d12's B4G4R4A4, * which is Vulkan's A4R4G4B4, and adjust the SRV component-mapping to fake * B4G4R4A4, but that forces us to limit the usage to sampling, which, * luckily, is exactly what we need to support the required features. * * However, since this involves swizzling the alpha channel, it can cause * problems for border colors. Fortunately, d3d12 added an A4B4G4R4 format, * which still isn't quite right (it'd be Vulkan R4G4B4A4), but can be * swizzled by just swapping R and B, so no border color issues arise. */ if (format == VK_FORMAT_B4G4R4A4_UNORM_PACK16) { VkFormatFeatureFlags bgra4_req_features = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; base_props->optimalTilingFeatures &= bgra4_req_features; base_props->bufferFeatures = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; } /* depth/stencil format shouldn't advertise buffer features */ if (vk_format_is_depth_or_stencil(format)) base_props->bufferFeatures = 0; } static VkResult dzn_physical_device_get_image_format_properties(struct dzn_physical_device *pdev, const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties2 *properties) { const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; VkExternalImageFormatProperties *external_props = NULL; properties->imageFormatProperties = (VkImageFormatProperties) { 0 }; VkImageUsageFlags usage = info->usage; /* Extract input structs */ vk_foreach_struct_const(s, info->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: external_info = (const VkPhysicalDeviceExternalImageFormatInfo *)s; break; case VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO: usage |= ((const VkImageStencilUsageCreateInfo *)s)->stencilUsage; break; default: vk_debug_ignored_stype(s->sType); break; } } assert(info->tiling == VK_IMAGE_TILING_OPTIMAL || info->tiling == VK_IMAGE_TILING_LINEAR); /* Extract output structs */ vk_foreach_struct(s, properties->pNext) { switch (s->sType) { case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: external_props = (VkExternalImageFormatProperties *)s; external_props->externalMemoryProperties = (VkExternalMemoryProperties) { 0 }; break; default: vk_debug_ignored_stype(s->sType); break; } } if (external_info && external_info->handleType != 0) { const VkExternalMemoryHandleTypeFlags d3d12_resource_handle_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT | opaque_external_flag; const VkExternalMemoryHandleTypeFlags d3d11_texture_handle_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT | d3d12_resource_handle_types; const VkExternalMemoryFeatureFlags import_export_feature_flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; const VkExternalMemoryFeatureFlags dedicated_feature_flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | import_export_feature_flags; switch (external_info->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT: external_props->externalMemoryProperties.compatibleHandleTypes = d3d11_texture_handle_types; external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d11_texture_handle_types; external_props->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags; break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: external_props->externalMemoryProperties.compatibleHandleTypes = d3d12_resource_handle_types; external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d12_resource_handle_types; external_props->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags; break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: external_props->externalMemoryProperties.compatibleHandleTypes = external_props->externalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag; external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; #ifdef _WIN32 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT: #else case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: #endif external_props->externalMemoryProperties.compatibleHandleTypes = d3d11_texture_handle_types; external_props->externalMemoryProperties.exportFromImportedHandleTypes = d3d11_texture_handle_types; external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; #if defined(_WIN32) case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: if (pdev->dev13) { external_props->externalMemoryProperties.compatibleHandleTypes = external_props->externalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag; external_props->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; } FALLTHROUGH; #endif default: return VK_ERROR_FORMAT_NOT_SUPPORTED; } /* Linear textures not supported, but there's nothing else we can deduce from just a handle type */ if (info->tiling != VK_IMAGE_TILING_OPTIMAL && external_info->handleType != VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT) return VK_ERROR_FORMAT_NOT_SUPPORTED; } if (info->tiling != VK_IMAGE_TILING_OPTIMAL && (usage & ~(VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT))) return VK_ERROR_FORMAT_NOT_SUPPORTED; if (info->tiling != VK_IMAGE_TILING_OPTIMAL && vk_format_is_depth_or_stencil(info->format)) return VK_ERROR_FORMAT_NOT_SUPPORTED; D3D12_FEATURE_DATA_FORMAT_SUPPORT dfmt_info = dzn_physical_device_get_format_support(pdev, info->format, info->flags); if (dfmt_info.Format == DXGI_FORMAT_UNKNOWN) return VK_ERROR_FORMAT_NOT_SUPPORTED; bool is_bgra4 = info->format == VK_FORMAT_B4G4R4A4_UNORM_PACK16 && !(info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT); if ((info->type == VK_IMAGE_TYPE_1D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE1D)) || (info->type == VK_IMAGE_TYPE_2D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE2D)) || (info->type == VK_IMAGE_TYPE_3D && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURE3D)) || ((info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TEXTURECUBE))) return VK_ERROR_FORMAT_NOT_SUPPORTED; /* Due to extended capability querying, we might see 1D support for BC, but we don't actually have it */ if (vk_format_is_block_compressed(info->format) && info->type == VK_IMAGE_TYPE_1D) return VK_ERROR_FORMAT_NOT_SUPPORTED; if ((usage & VK_IMAGE_USAGE_SAMPLED_BIT) && /* Note: format support for SAMPLED is not necessarily accurate for integer formats */ !(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD)) return VK_ERROR_FORMAT_NOT_SUPPORTED; if ((usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) && (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_SHADER_LOAD) || is_bgra4)) return VK_ERROR_FORMAT_NOT_SUPPORTED; if ((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || is_bgra4)) return VK_ERROR_FORMAT_NOT_SUPPORTED; if ((usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL) || is_bgra4)) return VK_ERROR_FORMAT_NOT_SUPPORTED; if ((usage & VK_IMAGE_USAGE_STORAGE_BIT) && (!(dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW) || is_bgra4)) return VK_ERROR_FORMAT_NOT_SUPPORTED; if (info->type == VK_IMAGE_TYPE_3D && info->tiling != VK_IMAGE_TILING_OPTIMAL) return VK_ERROR_FORMAT_NOT_SUPPORTED; bool is_3d = info->type == VK_IMAGE_TYPE_3D; uint32_t max_extent = dzn_physical_device_get_max_extent(is_3d); if (info->tiling == VK_IMAGE_TILING_OPTIMAL && dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MIP) properties->imageFormatProperties.maxMipLevels = dzn_physical_device_get_max_mip_level(is_3d) + 1; else properties->imageFormatProperties.maxMipLevels = 1; if (info->tiling == VK_IMAGE_TILING_OPTIMAL && info->type != VK_IMAGE_TYPE_3D) properties->imageFormatProperties.maxArrayLayers = dzn_physical_device_get_max_array_layers(); else properties->imageFormatProperties.maxArrayLayers = 1; switch (info->type) { case VK_IMAGE_TYPE_1D: properties->imageFormatProperties.maxExtent.width = max_extent; properties->imageFormatProperties.maxExtent.height = 1; properties->imageFormatProperties.maxExtent.depth = 1; break; case VK_IMAGE_TYPE_2D: properties->imageFormatProperties.maxExtent.width = max_extent; properties->imageFormatProperties.maxExtent.height = max_extent; properties->imageFormatProperties.maxExtent.depth = 1; break; case VK_IMAGE_TYPE_3D: properties->imageFormatProperties.maxExtent.width = max_extent; properties->imageFormatProperties.maxExtent.height = max_extent; properties->imageFormatProperties.maxExtent.depth = max_extent; break; default: unreachable("bad VkImageType"); } /* From the Vulkan 1.0 spec, section 34.1.1. Supported Sample Counts: * * sampleCounts will be set to VK_SAMPLE_COUNT_1_BIT if at least one of the * following conditions is true: * * - tiling is VK_IMAGE_TILING_LINEAR * - type is not VK_IMAGE_TYPE_2D * - flags contains VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT * - neither the VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT flag nor the * VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT flag in * VkFormatProperties::optimalTilingFeatures returned by * vkGetPhysicalDeviceFormatProperties is set. * * D3D12 has a few more constraints: * - no UAVs on multisample resources */ properties->imageFormatProperties.sampleCounts = VK_SAMPLE_COUNT_1_BIT; if (info->tiling != VK_IMAGE_TILING_LINEAR && info->type == VK_IMAGE_TYPE_2D && !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && (dfmt_info.Support1 & D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD) && !is_bgra4 && !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) { for (uint32_t s = VK_SAMPLE_COUNT_2_BIT; s < VK_SAMPLE_COUNT_64_BIT; s <<= 1) { D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS ms_info = { .Format = dfmt_info.Format, .SampleCount = s, }; HRESULT hres = ID3D12Device1_CheckFeatureSupport(pdev->dev, D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &ms_info, sizeof(ms_info)); if (!FAILED(hres) && ms_info.NumQualityLevels > 0) properties->imageFormatProperties.sampleCounts |= s; } } /* TODO: set correct value here */ properties->imageFormatProperties.maxResourceSize = UINT32_MAX; return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormat format, VkFormatProperties2 *pFormatProperties) { VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); dzn_physical_device_get_format_properties(pdev, format, pFormatProperties); } VKAPI_ATTR VkResult VKAPI_CALL dzn_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceImageFormatInfo2 *info, VkImageFormatProperties2 *props) { VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); return dzn_physical_device_get_image_format_properties(pdev, info, props); } VKAPI_ATTR VkResult VKAPI_CALL dzn_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags createFlags, VkImageFormatProperties *pImageFormatProperties) { const VkPhysicalDeviceImageFormatInfo2 info = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, .format = format, .type = type, .tiling = tiling, .usage = usage, .flags = createFlags, }; VkImageFormatProperties2 props = { 0 }; VkResult result = dzn_GetPhysicalDeviceImageFormatProperties2(physicalDevice, &info, &props); *pImageFormatProperties = props.imageFormatProperties; return result; } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling, uint32_t *pPropertyCount, VkSparseImageFormatProperties *pProperties) { *pPropertyCount = 0; } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties) { *pPropertyCount = 0; } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, VkExternalBufferProperties *pExternalBufferProperties) { #if defined(_WIN32) VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); #endif const VkExternalMemoryHandleTypeFlags d3d12_resource_handle_types = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT | opaque_external_flag; const VkExternalMemoryFeatureFlags import_export_feature_flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; const VkExternalMemoryFeatureFlags dedicated_feature_flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | import_export_feature_flags; switch (pExternalBufferInfo->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = d3d12_resource_handle_types; pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = d3d12_resource_handle_types; pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = dedicated_feature_flags; break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag; pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; #ifdef _WIN32 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT: #else case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: #endif pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | d3d12_resource_handle_types; pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; #if defined(_WIN32) case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: if (pdev->dev13) { pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes = pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT | opaque_external_flag; pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures = import_export_feature_flags; break; } FALLTHROUGH; #endif default: pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties){ 0 }; break; } } VkResult dzn_instance_add_physical_device(struct vk_instance *instance, IUnknown *adapter, const struct dzn_physical_device_desc *desc) { struct dzn_instance *dzn_instance = container_of(instance, struct dzn_instance, vk); if ((dzn_instance->debug_flags & DZN_DEBUG_WARP) && !desc->is_warp) return VK_SUCCESS; return dzn_physical_device_create(instance, adapter, desc); } static VkResult dzn_enumerate_physical_devices(struct vk_instance *instance) { VkResult result = dzn_enumerate_physical_devices_dxcore(instance); #ifdef _WIN32 if (result != VK_SUCCESS) result = dzn_enumerate_physical_devices_dxgi(instance); #endif return result; } static const driOptionDescription dzn_dri_options[] = { DRI_CONF_SECTION_DEBUG DRI_CONF_DZN_CLAIM_WIDE_LINES(false) DRI_CONF_DZN_ENABLE_8BIT_LOADS_STORES(false) DRI_CONF_DZN_DISABLE(false) DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false) DRI_CONF_SECTION_END }; static void dzn_init_dri_config(struct dzn_instance *instance) { driParseOptionInfo(&instance->available_dri_options, dzn_dri_options, ARRAY_SIZE(dzn_dri_options)); driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "dzn", NULL, NULL, instance->vk.app_info.app_name, instance->vk.app_info.app_version, instance->vk.app_info.engine_name, instance->vk.app_info.engine_version); } static VkResult dzn_instance_create(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *out) { struct dzn_instance *instance = vk_zalloc2(vk_default_allocator(), pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!instance) return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); struct vk_instance_dispatch_table dispatch_table; vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &dzn_instance_entrypoints, true); vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false); VkResult result = vk_instance_init(&instance->vk, &instance_extensions, &dispatch_table, pCreateInfo, pAllocator ? pAllocator : vk_default_allocator()); if (result != VK_SUCCESS) { vk_free2(vk_default_allocator(), pAllocator, instance); return result; } instance->vk.physical_devices.enumerate = dzn_enumerate_physical_devices; instance->vk.physical_devices.destroy = dzn_physical_device_destroy; instance->debug_flags = parse_debug_string(getenv("DZN_DEBUG"), dzn_debug_options); #ifdef _WIN32 if (instance->debug_flags & DZN_DEBUG_DEBUGGER) { /* wait for debugger to attach... */ while (!IsDebuggerPresent()) { Sleep(100); } } if (instance->debug_flags & DZN_DEBUG_REDIRECTS) { char home[MAX_PATH], path[MAX_PATH]; if (SUCCEEDED(SHGetFolderPathA(NULL, CSIDL_PROFILE, NULL, 0, home))) { snprintf(path, sizeof(path), "%s\\stderr.txt", home); freopen(path, "w", stderr); snprintf(path, sizeof(path), "%s\\stdout.txt", home); freopen(path, "w", stdout); } } #endif bool missing_validator = false; #ifdef _WIN32 if ((instance->debug_flags & DZN_DEBUG_EXPERIMENTAL) == 0) { instance->dxil_validator = dxil_create_validator(NULL); missing_validator = !instance->dxil_validator; } #endif if (missing_validator) { dzn_instance_destroy(instance, pAllocator); return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); } instance->d3d12_mod = util_dl_open(UTIL_DL_PREFIX "d3d12" UTIL_DL_EXT); if (!instance->d3d12_mod) { dzn_instance_destroy(instance, pAllocator); return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); } instance->d3d12.serialize_root_sig = d3d12_get_serialize_root_sig(instance->d3d12_mod); if (!instance->d3d12.serialize_root_sig) { dzn_instance_destroy(instance, pAllocator); return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED); } instance->factory = try_create_device_factory(instance->d3d12_mod); if (instance->debug_flags & DZN_DEBUG_D3D12) d3d12_enable_debug_layer(instance->d3d12_mod, instance->factory); if (instance->debug_flags & DZN_DEBUG_GBV) d3d12_enable_gpu_validation(instance->d3d12_mod, instance->factory); instance->sync_binary_type = vk_sync_binary_get_type(&dzn_sync_type); dzn_init_dri_config(instance); if (driQueryOptionb(&instance->dri_options, "dzn_disable")) { dzn_instance_destroy(instance, pAllocator); return vk_errorf(NULL, VK_ERROR_INITIALIZATION_FAILED, "dzn_disable set, failing instance creation"); } *out = dzn_instance_to_handle(instance); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *pInstance) { return dzn_instance_create(pCreateInfo, pAllocator, pInstance); } VKAPI_ATTR VkResult VKAPI_CALL dzn_EnumerateInstanceVersion(uint32_t *pApiVersion) { *pApiVersion = DZN_API_VERSION; return VK_SUCCESS; } VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL dzn_GetInstanceProcAddr(VkInstance _instance, const char *pName) { VK_FROM_HANDLE(dzn_instance, instance, _instance); return vk_instance_get_proc_addr(&instance->vk, &dzn_instance_entrypoints, pName); } /* Windows will use a dll definition file to avoid build errors. */ #ifdef _WIN32 #undef PUBLIC #define PUBLIC #endif PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) { return dzn_GetInstanceProcAddr(instance, pName); } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, VkQueueFamilyProperties2 *pQueueFamilyProperties) { VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties, pQueueFamilyPropertyCount); for (uint32_t i = 0; i < pdev->queue_family_count; i++) { vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) { p->queueFamilyProperties = pdev->queue_families[i].props; vk_foreach_struct(ext, pQueueFamilyProperties->pNext) { vk_debug_ignored_stype(ext->sType); } } } } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties *pMemoryProperties) { VK_FROM_HANDLE(dzn_physical_device, pdev, physicalDevice); *pMemoryProperties = pdev->memory; } VKAPI_ATTR void VKAPI_CALL dzn_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) { dzn_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties); vk_foreach_struct(ext, pMemoryProperties->pNext) { vk_debug_ignored_stype(ext->sType); } } VKAPI_ATTR VkResult VKAPI_CALL dzn_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties) { if (pProperties == NULL) { *pPropertyCount = 0; return VK_SUCCESS; } return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); } static VkResult dzn_queue_sync_wait(struct dzn_queue *queue, const struct vk_sync_wait *wait) { if (wait->sync->type == &vk_sync_dummy_type) return VK_SUCCESS; struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); assert(wait->sync->type == &dzn_sync_type); struct dzn_sync *sync = container_of(wait->sync, struct dzn_sync, vk); uint64_t value = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? wait->wait_value : 1; assert(sync->fence != NULL); if (value > 0 && FAILED(ID3D12CommandQueue_Wait(queue->cmdqueue, sync->fence, value))) return vk_error(device, VK_ERROR_UNKNOWN); return VK_SUCCESS; } static VkResult dzn_queue_sync_signal(struct dzn_queue *queue, const struct vk_sync_signal *signal) { if (signal->sync->type == &vk_sync_dummy_type) return VK_SUCCESS; struct dzn_device *device = container_of(queue->vk.base.device, struct dzn_device, vk); assert(signal->sync->type == &dzn_sync_type); struct dzn_sync *sync = container_of(signal->sync, struct dzn_sync, vk); uint64_t value = (sync->vk.flags & VK_SYNC_IS_TIMELINE) ? signal->signal_value : 1; assert(value > 0); assert(sync->fence != NULL); if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, sync->fence, value))) return vk_error(device, VK_ERROR_UNKNOWN); return VK_SUCCESS; } static VkResult dzn_queue_submit(struct vk_queue *q, struct vk_queue_submit *info) { struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); struct dzn_device *device = container_of(q->base.device, struct dzn_device, vk); VkResult result = VK_SUCCESS; for (uint32_t i = 0; i < info->wait_count; i++) { result = dzn_queue_sync_wait(queue, &info->waits[i]); if (result != VK_SUCCESS) return result; } ID3D12CommandList **cmdlists = alloca(info->command_buffer_count * sizeof(ID3D12CommandList*)); for (uint32_t i = 0; i < info->command_buffer_count; i++) { struct dzn_cmd_buffer *cmd_buffer = container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk); cmdlists[i] = (ID3D12CommandList *)cmd_buffer->cmdlist; util_dynarray_foreach(&cmd_buffer->queries.reset, struct dzn_cmd_buffer_query_range, range) { mtx_lock(&range->qpool->queries_lock); for (uint32_t q = range->start; q < range->start + range->count; q++) { struct dzn_query *query = &range->qpool->queries[q]; if (query->fence) { ID3D12Fence_Release(query->fence); query->fence = NULL; } query->fence_value = 0; } mtx_unlock(&range->qpool->queries_lock); } } ID3D12CommandQueue_ExecuteCommandLists(queue->cmdqueue, info->command_buffer_count, cmdlists); for (uint32_t i = 0; i < info->command_buffer_count; i++) { struct dzn_cmd_buffer* cmd_buffer = container_of(info->command_buffers[i], struct dzn_cmd_buffer, vk); util_dynarray_foreach(&cmd_buffer->events.signal, struct dzn_cmd_event_signal, evt) { if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, evt->event->fence, evt->value ? 1 : 0))) return vk_error(device, VK_ERROR_UNKNOWN); } util_dynarray_foreach(&cmd_buffer->queries.signal, struct dzn_cmd_buffer_query_range, range) { mtx_lock(&range->qpool->queries_lock); for (uint32_t q = range->start; q < range->start + range->count; q++) { struct dzn_query *query = &range->qpool->queries[q]; query->fence_value = queue->fence_point + 1; query->fence = queue->fence; ID3D12Fence_AddRef(query->fence); } mtx_unlock(&range->qpool->queries_lock); } } for (uint32_t i = 0; i < info->signal_count; i++) { result = dzn_queue_sync_signal(queue, &info->signals[i]); if (result != VK_SUCCESS) return vk_error(device, VK_ERROR_UNKNOWN); } if (FAILED(ID3D12CommandQueue_Signal(queue->cmdqueue, queue->fence, ++queue->fence_point))) return vk_error(device, VK_ERROR_UNKNOWN); return VK_SUCCESS; } static void dzn_queue_finish(struct dzn_queue *queue) { if (queue->cmdqueue) ID3D12CommandQueue_Release(queue->cmdqueue); if (queue->fence) ID3D12Fence_Release(queue->fence); vk_queue_finish(&queue->vk); } static VkResult dzn_queue_init(struct dzn_queue *queue, struct dzn_device *device, const VkDeviceQueueCreateInfo *pCreateInfo, uint32_t index_in_family) { struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); VkResult result = vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family); if (result != VK_SUCCESS) return result; queue->vk.driver_submit = dzn_queue_submit; assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count); D3D12_COMMAND_QUEUE_DESC queue_desc = pdev->queue_families[pCreateInfo->queueFamilyIndex].desc; float priority_in = pCreateInfo->pQueuePriorities[index_in_family]; queue_desc.Priority = priority_in > 0.5f ? D3D12_COMMAND_QUEUE_PRIORITY_HIGH : D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; queue_desc.NodeMask = 0; if (FAILED(ID3D12Device1_CreateCommandQueue(device->dev, &queue_desc, &IID_ID3D12CommandQueue, (void **)&queue->cmdqueue))) { dzn_queue_finish(queue); return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); } if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void **)&queue->fence))) { dzn_queue_finish(queue); return vk_error(device->vk.physical->instance, VK_ERROR_INITIALIZATION_FAILED); } return VK_SUCCESS; } static VkResult dzn_device_create_sync_for_memory(struct vk_device *device, VkDeviceMemory memory, bool signal_memory, struct vk_sync **sync_out) { return vk_sync_create(device, &vk_sync_dummy_type, 0, 1, sync_out); } static VkResult dzn_device_query_init(struct dzn_device *device) { /* FIXME: create the resource in the default heap */ D3D12_HEAP_PROPERTIES hprops = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, D3D12_HEAP_TYPE_UPLOAD); D3D12_RESOURCE_DESC rdesc = { .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, .Width = DZN_QUERY_REFS_RES_SIZE, .Height = 1, .DepthOrArraySize = 1, .MipLevels = 1, .Format = DXGI_FORMAT_UNKNOWN, .SampleDesc = { .Count = 1, .Quality = 0 }, .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, .Flags = D3D12_RESOURCE_FLAG_NONE, }; if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &hprops, D3D12_HEAP_FLAG_NONE, &rdesc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&device->queries.refs))) return vk_error(device->vk.physical, VK_ERROR_OUT_OF_DEVICE_MEMORY); uint8_t *queries_ref; if (FAILED(ID3D12Resource_Map(device->queries.refs, 0, NULL, (void **)&queries_ref))) return vk_error(device->vk.physical, VK_ERROR_OUT_OF_HOST_MEMORY); memset(queries_ref + DZN_QUERY_REFS_ALL_ONES_OFFSET, 0xff, DZN_QUERY_REFS_SECTION_SIZE); memset(queries_ref + DZN_QUERY_REFS_ALL_ZEROS_OFFSET, 0x0, DZN_QUERY_REFS_SECTION_SIZE); ID3D12Resource_Unmap(device->queries.refs, 0, NULL); return VK_SUCCESS; } static void dzn_device_query_finish(struct dzn_device *device) { if (device->queries.refs) ID3D12Resource_Release(device->queries.refs); } static void dzn_device_destroy(struct dzn_device *device, const VkAllocationCallbacks *pAllocator) { if (!device) return; struct dzn_instance *instance = container_of(device->vk.physical->instance, struct dzn_instance, vk); vk_foreach_queue_safe(q, &device->vk) { struct dzn_queue *queue = container_of(q, struct dzn_queue, vk); dzn_queue_finish(queue); } dzn_device_query_finish(device); dzn_meta_finish(device); dzn_foreach_pool_type(type) { dzn_descriptor_heap_finish(&device->device_heaps[type].heap); util_dynarray_fini(&device->device_heaps[type].slot_freelist); mtx_destroy(&device->device_heaps[type].lock); } if (device->dev_config) ID3D12DeviceConfiguration_Release(device->dev_config); if (device->dev) ID3D12Device1_Release(device->dev); if (device->dev10) ID3D12Device1_Release(device->dev10); if (device->dev11) ID3D12Device1_Release(device->dev11); if (device->dev12) ID3D12Device1_Release(device->dev12); if (device->dev13) ID3D12Device1_Release(device->dev13); vk_device_finish(&device->vk); vk_free2(&instance->vk.alloc, pAllocator, device); } static VkResult dzn_device_check_status(struct vk_device *dev) { struct dzn_device *device = container_of(dev, struct dzn_device, vk); if (FAILED(ID3D12Device_GetDeviceRemovedReason(device->dev))) return vk_device_set_lost(&device->vk, "D3D12 device removed"); return VK_SUCCESS; } static VkResult dzn_device_create(struct dzn_physical_device *pdev, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDevice *out) { struct dzn_instance *instance = container_of(pdev->vk.instance, struct dzn_instance, vk); uint32_t graphics_queue_count = 0; uint32_t queue_count = 0; for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; queue_count += qinfo->queueCount; if (pdev->queue_families[qinfo->queueFamilyIndex].props.queueFlags & VK_QUEUE_GRAPHICS_BIT) graphics_queue_count += qinfo->queueCount; } /* Add a swapchain queue if there's no or too many graphics queues */ if (graphics_queue_count != 1) queue_count++; VK_MULTIALLOC(ma); VK_MULTIALLOC_DECL(&ma, struct dzn_device, device, 1); VK_MULTIALLOC_DECL(&ma, struct dzn_queue, queues, queue_count); if (!vk_multialloc_zalloc2(&ma, &instance->vk.alloc, pAllocator, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY); struct vk_device_dispatch_table dispatch_table; /* For secondary command buffer support, overwrite any command entrypoints * in the main device-level dispatch table with * vk_cmd_enqueue_unless_primary_Cmd*. */ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true); vk_device_dispatch_table_from_entrypoints(&dispatch_table, &dzn_device_entrypoints, false); vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false); /* Populate our primary cmd_dispatch table. */ vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, &dzn_device_entrypoints, true); vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch, &vk_common_device_entrypoints, false); /* Override entrypoints with alternatives based on supported features. */ if (pdev->options12.EnhancedBarriersSupported) { device->cmd_dispatch.CmdPipelineBarrier2 = dzn_CmdPipelineBarrier2_enhanced; } VkResult result = vk_device_init(&device->vk, &pdev->vk, &dispatch_table, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { vk_free2(&device->vk.alloc, pAllocator, device); return result; } /* Must be done after vk_device_init() because this function memset(0) the * whole struct. */ device->vk.command_dispatch_table = &device->cmd_dispatch; device->vk.create_sync_for_memory = dzn_device_create_sync_for_memory; device->vk.check_status = dzn_device_check_status; device->dev = pdev->dev; ID3D12Device1_AddRef(device->dev); if (pdev->dev10) { device->dev10 = pdev->dev10; ID3D12Device1_AddRef(device->dev10); } if (pdev->dev11) { device->dev11 = pdev->dev11; ID3D12Device1_AddRef(device->dev11); } if (pdev->dev12) { device->dev12 = pdev->dev12; ID3D12Device1_AddRef(device->dev12); } if (pdev->dev13) { device->dev13 = pdev->dev13; ID3D12Device1_AddRef(device->dev13); } ID3D12InfoQueue *info_queue; if (SUCCEEDED(ID3D12Device1_QueryInterface(device->dev, &IID_ID3D12InfoQueue, (void **)&info_queue))) { D3D12_MESSAGE_SEVERITY severities[] = { D3D12_MESSAGE_SEVERITY_INFO, D3D12_MESSAGE_SEVERITY_WARNING, }; D3D12_MESSAGE_ID msg_ids[] = { D3D12_MESSAGE_ID_CLEARRENDERTARGETVIEW_MISMATCHINGCLEARVALUE, }; D3D12_INFO_QUEUE_FILTER NewFilter = { 0 }; NewFilter.DenyList.NumSeverities = ARRAY_SIZE(severities); NewFilter.DenyList.pSeverityList = severities; NewFilter.DenyList.NumIDs = ARRAY_SIZE(msg_ids); NewFilter.DenyList.pIDList = msg_ids; ID3D12InfoQueue_PushStorageFilter(info_queue, &NewFilter); ID3D12InfoQueue_Release(info_queue); } IUnknown_QueryInterface(device->dev, &IID_ID3D12DeviceConfiguration, (void **)&device->dev_config); result = dzn_meta_init(device); if (result != VK_SUCCESS) { dzn_device_destroy(device, pAllocator); return result; } result = dzn_device_query_init(device); if (result != VK_SUCCESS) { dzn_device_destroy(device, pAllocator); return result; } uint32_t qindex = 0; for (uint32_t qf = 0; qf < pCreateInfo->queueCreateInfoCount; qf++) { const VkDeviceQueueCreateInfo *qinfo = &pCreateInfo->pQueueCreateInfos[qf]; for (uint32_t q = 0; q < qinfo->queueCount; q++) { result = dzn_queue_init(&queues[qindex++], device, qinfo, q); if (result != VK_SUCCESS) { dzn_device_destroy(device, pAllocator); return result; } if (graphics_queue_count == 1 && pdev->queue_families[qinfo->queueFamilyIndex].props.queueFlags & VK_QUEUE_GRAPHICS_BIT) device->swapchain_queue = &queues[qindex - 1]; } } if (!device->swapchain_queue) { const float swapchain_queue_priority = 0.0f; VkDeviceQueueCreateInfo swapchain_queue_info = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .flags = 0, .queueCount = 1, .pQueuePriorities = &swapchain_queue_priority, }; for (uint32_t qf = 0; qf < pdev->queue_family_count; qf++) { if (pdev->queue_families[qf].props.queueFlags & VK_QUEUE_GRAPHICS_BIT) { swapchain_queue_info.queueFamilyIndex = qf; break; } } result = dzn_queue_init(&queues[qindex], device, &swapchain_queue_info, 0); if (result != VK_SUCCESS) { dzn_device_destroy(device, pAllocator); return result; } device->swapchain_queue = &queues[qindex++]; device->need_swapchain_blits = true; } device->support_static_samplers = true; device->bindless = (instance->debug_flags & DZN_DEBUG_BINDLESS) != 0 || device->vk.enabled_features.descriptorIndexing || device->vk.enabled_extensions.EXT_descriptor_indexing || device->vk.enabled_features.bufferDeviceAddress || device->vk.enabled_extensions.EXT_buffer_device_address; if (device->bindless) { uint32_t sampler_count = MIN2(pdev->options19.MaxSamplerDescriptorHeapSize, 4000); device->support_static_samplers = pdev->options19.MaxSamplerDescriptorHeapSizeWithStaticSamplers >= sampler_count; dzn_foreach_pool_type(type) { uint32_t descriptor_count = type == D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER ? sampler_count : D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1; result = dzn_descriptor_heap_init(&device->device_heaps[type].heap, device, type, descriptor_count, true); if (result != VK_SUCCESS) { dzn_device_destroy(device, pAllocator); return result; } mtx_init(&device->device_heaps[type].lock, mtx_plain); util_dynarray_init(&device->device_heaps[type].slot_freelist, NULL); device->device_heaps[type].next_alloc_slot = 0; } } assert(queue_count == qindex); *out = dzn_device_to_handle(device); return VK_SUCCESS; } static ID3DBlob * serialize_root_sig(struct dzn_device *device, const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc) { struct dzn_instance *instance = container_of(device->vk.physical->instance, struct dzn_instance, vk); ID3DBlob *sig = NULL, *error = NULL; HRESULT hr = device->dev_config ? ID3D12DeviceConfiguration_SerializeVersionedRootSignature(device->dev_config, desc, &sig, &error) : instance->d3d12.serialize_root_sig(desc, &sig, &error); if (FAILED(hr)) { if (instance->debug_flags & DZN_DEBUG_SIG) { const char *error_msg = (const char *)ID3D10Blob_GetBufferPointer(error); fprintf(stderr, "== SERIALIZE ROOT SIG ERROR =============================================\n" "%s\n" "== END ==========================================================\n", error_msg); } } if (error) ID3D10Blob_Release(error); return sig; } ID3D12RootSignature * dzn_device_create_root_sig(struct dzn_device *device, const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc) { ID3DBlob *sig = serialize_root_sig(device, desc); if (!sig) return NULL; ID3D12RootSignature *root_sig = NULL; ID3D12Device1_CreateRootSignature(device->dev, 0, ID3D10Blob_GetBufferPointer(sig), ID3D10Blob_GetBufferSize(sig), &IID_ID3D12RootSignature, (void **)&root_sig); ID3D10Blob_Release(sig); return root_sig; } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) { VK_FROM_HANDLE(dzn_physical_device, physical_device, physicalDevice); VkResult result; assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); /* Check enabled features */ if (pCreateInfo->pEnabledFeatures) { result = vk_physical_device_check_device_features(&physical_device->vk, pCreateInfo); if (result != VK_SUCCESS) return vk_error(physical_device, result); } /* Check requested queues and fail if we are requested to create any * queues with flags we don't support. */ assert(pCreateInfo->queueCreateInfoCount > 0); for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { if (pCreateInfo->pQueueCreateInfos[i].flags != 0) return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED); } return dzn_device_create(physical_device, pCreateInfo, pAllocator, pDevice); } VKAPI_ATTR void VKAPI_CALL dzn_DestroyDevice(VkDevice dev, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(dzn_device, device, dev); device->vk.dispatch_table.DeviceWaitIdle(dev); dzn_device_destroy(device, pAllocator); } static void dzn_device_memory_destroy(struct dzn_device_memory *mem, const VkAllocationCallbacks *pAllocator) { if (!mem) return; struct dzn_device *device = container_of(mem->base.device, struct dzn_device, vk); if (mem->map && mem->map_res) ID3D12Resource_Unmap(mem->map_res, 0, NULL); if (mem->map_res) ID3D12Resource_Release(mem->map_res); if (mem->heap) ID3D12Heap_Release(mem->heap); if (mem->dedicated_res) ID3D12Resource_Release(mem->dedicated_res); #ifdef _WIN32 if (mem->export_handle) CloseHandle(mem->export_handle); #else if ((intptr_t)mem->export_handle >= 0) close((int)(intptr_t)mem->export_handle); #endif vk_object_base_finish(&mem->base); vk_free2(&device->vk.alloc, pAllocator, mem); } static D3D12_HEAP_PROPERTIES deduce_heap_properties_from_memory(struct dzn_physical_device *pdevice, const VkMemoryType *mem_type) { D3D12_HEAP_PROPERTIES properties = { .Type = D3D12_HEAP_TYPE_CUSTOM }; properties.MemoryPoolPreference = ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && !pdevice->architecture.UMA) ? D3D12_MEMORY_POOL_L1 : D3D12_MEMORY_POOL_L0; if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) || ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && pdevice->architecture.CacheCoherentUMA)) { properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; } else if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; } else { properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE; } return properties; } static VkResult dzn_device_memory_create(struct dzn_device *device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *out) { struct dzn_physical_device *pdevice = container_of(device->vk.physical, struct dzn_physical_device, vk); const struct dzn_buffer *buffer = NULL; const struct dzn_image *image = NULL; VkExternalMemoryHandleTypeFlags export_flags = 0; HANDLE import_handle = NULL; bool imported_from_d3d11 = false; void *host_pointer = NULL; #ifdef _WIN32 const wchar_t *import_name = NULL; const VkExportMemoryWin32HandleInfoKHR *win32_export = NULL; #endif vk_foreach_struct_const(ext, pAllocateInfo->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO: { const VkExportMemoryAllocateInfo *exp = (const VkExportMemoryAllocateInfo *)ext; export_flags = exp->handleTypes; break; } #ifdef _WIN32 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR: { const VkImportMemoryWin32HandleInfoKHR *imp = (const VkImportMemoryWin32HandleInfoKHR *)ext; switch (imp->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT: imported_from_d3d11 = true; FALLTHROUGH; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: break; default: return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } import_handle = imp->handle; import_name = imp->name; break; } case VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR: win32_export = (const VkExportMemoryWin32HandleInfoKHR *)ext; break; case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT: { const VkImportMemoryHostPointerInfoEXT *imp = (const VkImportMemoryHostPointerInfoEXT *)ext; host_pointer = imp->pHostPointer; break; } #else case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR: { const VkImportMemoryFdInfoKHR *imp = (const VkImportMemoryFdInfoKHR *)ext; switch (imp->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: break; default: return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } import_handle = (HANDLE)(intptr_t)imp->fd; break; } #endif case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO: { const VkMemoryDedicatedAllocateInfo *dedicated = (const VkMemoryDedicatedAllocateInfo *)ext; buffer = dzn_buffer_from_handle(dedicated->buffer); image = dzn_image_from_handle(dedicated->image); assert(!buffer || !image); break; } default: vk_debug_ignored_stype(ext->sType); break; } } const VkMemoryType *mem_type = &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex]; D3D12_HEAP_DESC heap_desc = { 0 }; heap_desc.SizeInBytes = pAllocateInfo->allocationSize; if (buffer) { heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; } else if (image) { heap_desc.Alignment = image->vk.samples > 1 ? D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; } else { heap_desc.Alignment = heap_desc.SizeInBytes >= D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT ? D3D12_DEFAULT_MSAA_RESOURCE_PLACEMENT_ALIGNMENT : D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; } if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) image = NULL; VkExternalMemoryHandleTypeFlags valid_flags = opaque_external_flag | (buffer || image ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT : VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT); if (image && imported_from_d3d11) valid_flags |= VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT; if (export_flags & ~valid_flags) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); struct dzn_device_memory *mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!mem) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY); #ifndef _WIN32 mem->export_handle = (HANDLE)(intptr_t)-1; #endif /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ assert(pAllocateInfo->allocationSize > 0); mem->size = pAllocateInfo->allocationSize; heap_desc.SizeInBytes = ALIGN_POT(heap_desc.SizeInBytes, heap_desc.Alignment); if (!image && !buffer) heap_desc.Flags = dzn_physical_device_get_heap_flags_for_mem_type(pdevice, pAllocateInfo->memoryTypeIndex); heap_desc.Properties = deduce_heap_properties_from_memory(pdevice, mem_type); if (export_flags) { heap_desc.Flags |= D3D12_HEAP_FLAG_SHARED; assert(host_pointer || heap_desc.Properties.CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE); } VkResult error = VK_ERROR_OUT_OF_DEVICE_MEMORY; #ifdef _WIN32 HANDLE handle_from_name = NULL; if (import_name) { if (FAILED(ID3D12Device_OpenSharedHandleByName(device->dev, import_name, GENERIC_ALL, &handle_from_name))) { error = VK_ERROR_INVALID_EXTERNAL_HANDLE; goto cleanup; } import_handle = handle_from_name; } #endif if (host_pointer) { error = VK_ERROR_INVALID_EXTERNAL_HANDLE; #if defined(_WIN32) if (!device->dev13) goto cleanup; if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, host_pointer, heap_desc.SizeInBytes, &IID_ID3D12Heap, (void**)&mem->heap))) goto cleanup; D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap); if (desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM) desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, desc.Properties.Type); if ((heap_desc.Flags & ~desc.Flags) || desc.Properties.CPUPageProperty != heap_desc.Properties.CPUPageProperty || desc.Properties.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference) goto cleanup; mem->map = host_pointer; mem->res_flags = D3D12_RESOURCE_FLAG_ALLOW_CROSS_ADAPTER; #else goto cleanup; #endif } else if (import_handle) { error = VK_ERROR_INVALID_EXTERNAL_HANDLE; if (image || buffer) { if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, import_handle, &IID_ID3D12Resource, (void **)&mem->dedicated_res))) goto cleanup; /* Verify compatibility */ D3D12_RESOURCE_DESC desc = dzn_ID3D12Resource_GetDesc(mem->dedicated_res); D3D12_HEAP_PROPERTIES opened_props = { 0 }; D3D12_HEAP_FLAGS opened_flags = 0; ID3D12Resource_GetHeapProperties(mem->dedicated_res, &opened_props, &opened_flags); if (opened_props.Type != D3D12_HEAP_TYPE_CUSTOM) opened_props = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, opened_props.Type); /* Don't validate format, cast lists aren't reflectable so it could be valid */ if (image) { if (desc.Dimension != image->desc.Dimension || desc.MipLevels != image->desc.MipLevels || desc.Width != image->desc.Width || desc.Height != image->desc.Height || desc.DepthOrArraySize != image->desc.DepthOrArraySize || (image->desc.Flags & ~desc.Flags) || desc.SampleDesc.Count != image->desc.SampleDesc.Count) goto cleanup; } else if (desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || desc.Width != buffer->desc.Width || buffer->desc.Flags & ~(desc.Flags)) goto cleanup; if (opened_props.CPUPageProperty != heap_desc.Properties.CPUPageProperty || opened_props.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference) goto cleanup; if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS) && desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) goto cleanup; if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) && (desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) goto cleanup; else if ((heap_desc.Flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES) && !(desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) goto cleanup; } else { if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, import_handle, &IID_ID3D12Heap, (void **)&mem->heap))) goto cleanup; D3D12_HEAP_DESC desc = dzn_ID3D12Heap_GetDesc(mem->heap); if (desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM) desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, desc.Properties.Type); if (desc.Alignment < heap_desc.Alignment || desc.SizeInBytes < heap_desc.SizeInBytes || (heap_desc.Flags & ~desc.Flags) || desc.Properties.CPUPageProperty != heap_desc.Properties.CPUPageProperty || desc.Properties.MemoryPoolPreference != heap_desc.Properties.MemoryPoolPreference) goto cleanup; } } else if (image) { if (device->dev10 && image->castable_format_count > 0) { D3D12_RESOURCE_DESC1 desc = { .Dimension = image->desc.Dimension, .Alignment = image->desc.Alignment, .Width = image->desc.Width, .Height = image->desc.Height, .DepthOrArraySize = image->desc.DepthOrArraySize, .MipLevels = image->desc.MipLevels, .Format = image->desc.Format, .SampleDesc = image->desc.SampleDesc, .Layout = image->desc.Layout, .Flags = image->desc.Flags, }; if (FAILED(ID3D12Device10_CreateCommittedResource3(device->dev10, &heap_desc.Properties, heap_desc.Flags, &desc, D3D12_BARRIER_LAYOUT_COMMON, NULL, NULL, image->castable_format_count, image->castable_formats, &IID_ID3D12Resource, (void **)&mem->dedicated_res))) goto cleanup; } else if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &heap_desc.Properties, heap_desc.Flags, &image->desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&mem->dedicated_res))) goto cleanup; } else if (buffer) { if (FAILED(ID3D12Device1_CreateCommittedResource(device->dev, &heap_desc.Properties, heap_desc.Flags, &buffer->desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&mem->dedicated_res))) goto cleanup; } else { if (FAILED(ID3D12Device1_CreateHeap(device->dev, &heap_desc, &IID_ID3D12Heap, (void **)&mem->heap))) goto cleanup; } if ((mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && !(heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS) && !mem->map){ assert(!image); if (buffer) { mem->map_res = mem->dedicated_res; ID3D12Resource_AddRef(mem->map_res); } else { D3D12_RESOURCE_DESC res_desc = { 0 }; res_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; res_desc.Format = DXGI_FORMAT_UNKNOWN; res_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; res_desc.Width = heap_desc.SizeInBytes; res_desc.Height = 1; res_desc.DepthOrArraySize = 1; res_desc.MipLevels = 1; res_desc.SampleDesc.Count = 1; res_desc.SampleDesc.Quality = 0; res_desc.Flags = D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; res_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; HRESULT hr = ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, 0, &res_desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&mem->map_res); if (FAILED(hr)) goto cleanup; } } if (export_flags) { error = VK_ERROR_INVALID_EXTERNAL_HANDLE; ID3D12DeviceChild *shareable = mem->heap ? (void *)mem->heap : (void *)mem->dedicated_res; DWORD dwAccess = GENERIC_ALL; /* Ignore any provided access, this is the only one D3D allows */ #ifdef _WIN32 const SECURITY_ATTRIBUTES *pAttributes = win32_export ? win32_export->pAttributes : NULL; const wchar_t *name = win32_export ? win32_export->name : NULL; #else const SECURITY_ATTRIBUTES *pAttributes = NULL; const wchar_t *name = NULL; #endif if (FAILED(ID3D12Device_CreateSharedHandle(device->dev, shareable, pAttributes, dwAccess, name, &mem->export_handle))) goto cleanup; } *out = dzn_device_memory_to_handle(mem); return VK_SUCCESS; cleanup: #ifdef _WIN32 if (handle_from_name) CloseHandle(handle_from_name); #endif dzn_device_memory_destroy(mem, pAllocator); return vk_error(device, error); } VKAPI_ATTR VkResult VKAPI_CALL dzn_AllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem) { return dzn_device_memory_create(dzn_device_from_handle(device), pAllocateInfo, pAllocator, pMem); } VKAPI_ATTR void VKAPI_CALL dzn_FreeMemory(VkDevice device, VkDeviceMemory mem, const VkAllocationCallbacks *pAllocator) { dzn_device_memory_destroy(dzn_device_memory_from_handle(mem), pAllocator); } VKAPI_ATTR VkResult VKAPI_CALL dzn_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size, VkMemoryMapFlags flags, void **ppData) { VK_FROM_HANDLE(dzn_device, device, _device); VK_FROM_HANDLE(dzn_device_memory, mem, _memory); if (mem == NULL) { *ppData = NULL; return VK_SUCCESS; } if (mem->map && !mem->map_res) { *ppData = ((uint8_t *)mem->map) + offset; return VK_SUCCESS; } if (size == VK_WHOLE_SIZE) size = mem->size - offset; /* From the Vulkan spec version 1.0.32 docs for MapMemory: * * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 * assert(size != 0); * * If size is not equal to VK_WHOLE_SIZE, size must be less than or * equal to the size of the memory minus offset */ assert(size > 0); assert(offset + size <= mem->size); assert(mem->map_res); D3D12_RANGE range = { 0 }; range.Begin = offset; range.End = offset + size; void *map = NULL; if (FAILED(ID3D12Resource_Map(mem->map_res, 0, &range, &map))) return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED); mem->map = map; mem->map_size = size; *ppData = ((uint8_t *) map) + offset; return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL dzn_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) { VK_FROM_HANDLE(dzn_device_memory, mem, _memory); if (mem == NULL) return; if (!mem->map_res) return; ID3D12Resource_Unmap(mem->map_res, 0, NULL); mem->map = NULL; mem->map_size = 0; } VKAPI_ATTR VkResult VKAPI_CALL dzn_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount, const VkMappedMemoryRange *pMemoryRanges) { return VK_SUCCESS; } static void dzn_buffer_destroy(struct dzn_buffer *buf, const VkAllocationCallbacks *pAllocator) { if (!buf) return; struct dzn_device *device = container_of(buf->base.device, struct dzn_device, vk); if (buf->res) ID3D12Resource_Release(buf->res); dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->cbv_bindless_slot); dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, buf->uav_bindless_slot); if (buf->custom_views) { hash_table_foreach(buf->custom_views, entry) { free((void *)entry->key); dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, (int)(intptr_t)entry->data); } _mesa_hash_table_destroy(buf->custom_views, NULL); } vk_object_base_finish(&buf->base); vk_free2(&device->vk.alloc, pAllocator, buf); } static VkResult dzn_buffer_create(struct dzn_device *device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBuffer *out) { struct dzn_buffer *buf = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*buf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!buf) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &buf->base, VK_OBJECT_TYPE_BUFFER); buf->create_flags = pCreateInfo->flags; buf->size = pCreateInfo->size; buf->usage = pCreateInfo->usage; if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) buf->size = MAX2(buf->size, ALIGN_POT(buf->size, 256)); if (buf->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) buf->size = MAX2(buf->size, ALIGN_POT(buf->size, 4)); buf->desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; buf->desc.Format = DXGI_FORMAT_UNKNOWN; buf->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; buf->desc.Width = buf->size; buf->desc.Height = 1; buf->desc.DepthOrArraySize = 1; buf->desc.MipLevels = 1; buf->desc.SampleDesc.Count = 1; buf->desc.SampleDesc.Quality = 0; buf->desc.Flags = D3D12_RESOURCE_FLAG_NONE; buf->desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; buf->valid_access = D3D12_BARRIER_ACCESS_VERTEX_BUFFER | D3D12_BARRIER_ACCESS_CONSTANT_BUFFER | D3D12_BARRIER_ACCESS_INDEX_BUFFER | D3D12_BARRIER_ACCESS_SHADER_RESOURCE | D3D12_BARRIER_ACCESS_STREAM_OUTPUT | D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT | D3D12_BARRIER_ACCESS_PREDICATION | D3D12_BARRIER_ACCESS_COPY_DEST | D3D12_BARRIER_ACCESS_COPY_SOURCE | D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_READ | D3D12_BARRIER_ACCESS_RAYTRACING_ACCELERATION_STRUCTURE_WRITE; if (buf->usage & (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) { buf->desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; buf->valid_access |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS; } buf->cbv_bindless_slot = buf->uav_bindless_slot = -1; if (device->bindless) { if (buf->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { buf->cbv_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); if (buf->cbv_bindless_slot < 0) { dzn_buffer_destroy(buf, pAllocator); return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } } if (buf->usage & (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) { buf->uav_bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); if (buf->uav_bindless_slot < 0) { dzn_buffer_destroy(buf, pAllocator); return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } } } if (device->bindless) mtx_init(&buf->bindless_view_lock, mtx_plain); const VkExternalMemoryBufferCreateInfo *external_info = vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO); if (external_info && external_info->handleTypes != 0) buf->shared = true; *out = dzn_buffer_to_handle(buf); return VK_SUCCESS; } DXGI_FORMAT dzn_buffer_get_dxgi_format(VkFormat format) { enum pipe_format pfmt = vk_format_to_pipe_format(format); return dzn_pipe_to_dxgi_format(pfmt); } D3D12_TEXTURE_COPY_LOCATION dzn_buffer_get_copy_loc(const struct dzn_buffer *buf, VkFormat format, const VkBufferImageCopy2 *region, VkImageAspectFlagBits aspect, uint32_t layer) { struct dzn_physical_device *pdev = container_of(buf->base.device->physical, struct dzn_physical_device, vk); const uint32_t buffer_row_length = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; VkFormat plane_format = dzn_image_get_plane_format(format, aspect); enum pipe_format pfmt = vk_format_to_pipe_format(plane_format); uint32_t blksz = util_format_get_blocksize(pfmt); uint32_t blkw = util_format_get_blockwidth(pfmt); uint32_t blkh = util_format_get_blockheight(pfmt); D3D12_TEXTURE_COPY_LOCATION loc = { .pResource = buf->res, .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, .PlacedFootprint = { .Footprint = { .Format = dzn_image_get_placed_footprint_format(pdev, format, aspect), .Width = region->imageExtent.width, .Height = region->imageExtent.height, .Depth = region->imageExtent.depth, .RowPitch = blksz * DIV_ROUND_UP(buffer_row_length, blkw), }, }, }; uint32_t buffer_layer_stride = loc.PlacedFootprint.Footprint.RowPitch * DIV_ROUND_UP(loc.PlacedFootprint.Footprint.Height, blkh); loc.PlacedFootprint.Offset = region->bufferOffset + (layer * buffer_layer_stride); return loc; } D3D12_TEXTURE_COPY_LOCATION dzn_buffer_get_line_copy_loc(const struct dzn_buffer *buf, VkFormat format, const VkBufferImageCopy2 *region, const D3D12_TEXTURE_COPY_LOCATION *loc, uint32_t y, uint32_t z, uint32_t *start_x) { uint32_t buffer_row_length = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width; uint32_t buffer_image_height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height; format = dzn_image_get_plane_format(format, region->imageSubresource.aspectMask); enum pipe_format pfmt = vk_format_to_pipe_format(format); uint32_t blksz = util_format_get_blocksize(pfmt); uint32_t blkw = util_format_get_blockwidth(pfmt); uint32_t blkh = util_format_get_blockheight(pfmt); uint32_t blkd = util_format_get_blockdepth(pfmt); D3D12_TEXTURE_COPY_LOCATION new_loc = *loc; uint32_t buffer_row_stride = DIV_ROUND_UP(buffer_row_length, blkw) * blksz; uint32_t buffer_layer_stride = buffer_row_stride * DIV_ROUND_UP(buffer_image_height, blkh); uint64_t tex_offset = ((y / blkh) * buffer_row_stride) + ((z / blkd) * buffer_layer_stride); uint64_t offset = loc->PlacedFootprint.Offset + tex_offset; uint32_t offset_alignment = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; while (offset_alignment % blksz) offset_alignment += D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; new_loc.PlacedFootprint.Footprint.Height = blkh; new_loc.PlacedFootprint.Footprint.Depth = 1; new_loc.PlacedFootprint.Offset = (offset / offset_alignment) * offset_alignment; *start_x = ((offset % offset_alignment) / blksz) * blkw; new_loc.PlacedFootprint.Footprint.Width = *start_x + region->imageExtent.width; new_loc.PlacedFootprint.Footprint.RowPitch = ALIGN_POT(DIV_ROUND_UP(new_loc.PlacedFootprint.Footprint.Width, blkw) * blksz, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); return new_loc; } bool dzn_buffer_supports_region_copy(struct dzn_physical_device *pdev, const D3D12_TEXTURE_COPY_LOCATION *loc) { if (pdev->options13.UnrestrictedBufferTextureCopyPitchSupported) return true; return !(loc->PlacedFootprint.Offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) && !(loc->PlacedFootprint.Footprint.RowPitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)); } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer) { return dzn_buffer_create(dzn_device_from_handle(device), pCreateInfo, pAllocator, pBuffer); } VKAPI_ATTR void VKAPI_CALL dzn_DestroyBuffer(VkDevice device, VkBuffer buffer, const VkAllocationCallbacks *pAllocator) { dzn_buffer_destroy(dzn_buffer_from_handle(buffer), pAllocator); } VKAPI_ATTR void VKAPI_CALL dzn_GetBufferMemoryRequirements2(VkDevice dev, const VkBufferMemoryRequirementsInfo2 *pInfo, VkMemoryRequirements2 *pMemoryRequirements) { VK_FROM_HANDLE(dzn_device, device, dev); VK_FROM_HANDLE(dzn_buffer, buffer, pInfo->buffer); struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); uint32_t alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT; VkDeviceSize size = buffer->size; if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { alignment = MAX2(alignment, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); size = ALIGN_POT(size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); } pMemoryRequirements->memoryRequirements.size = size; pMemoryRequirements->memoryRequirements.alignment = alignment; pMemoryRequirements->memoryRequirements.memoryTypeBits = dzn_physical_device_get_mem_type_mask_for_resource(pdev, &buffer->desc, buffer->shared); vk_foreach_struct(ext, pMemoryRequirements->pNext) { switch (ext->sType) { case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { VkMemoryDedicatedRequirements *requirements = (VkMemoryDedicatedRequirements *)ext; requirements->requiresDedicatedAllocation = false; requirements->prefersDedicatedAllocation = false; break; } default: vk_debug_ignored_stype(ext->sType); break; } } } VKAPI_ATTR VkResult VKAPI_CALL dzn_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindBufferMemoryInfo *pBindInfos) { VK_FROM_HANDLE(dzn_device, device, _device); for (uint32_t i = 0; i < bindInfoCount; i++) { assert(pBindInfos[i].sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO); VK_FROM_HANDLE(dzn_device_memory, mem, pBindInfos[i].memory); VK_FROM_HANDLE(dzn_buffer, buffer, pBindInfos[i].buffer); if (mem->dedicated_res) { assert(pBindInfos[i].memoryOffset == 0 && buffer->size == mem->size); buffer->res = mem->dedicated_res; ID3D12Resource_AddRef(buffer->res); } else { D3D12_RESOURCE_DESC desc = buffer->desc; desc.Flags |= mem->res_flags; if (FAILED(ID3D12Device1_CreatePlacedResource(device->dev, mem->heap, pBindInfos[i].memoryOffset, &buffer->desc, D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&buffer->res))) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } buffer->gpuva = ID3D12Resource_GetGPUVirtualAddress(buffer->res); if (device->bindless) { struct dzn_buffer_desc buf_desc = { .buffer = buffer, .offset = 0, .range = VK_WHOLE_SIZE, }; if (buffer->cbv_bindless_slot >= 0) { buf_desc.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; dzn_descriptor_heap_write_buffer_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap, buffer->cbv_bindless_slot, false, &buf_desc); } if (buffer->uav_bindless_slot >= 0) { buf_desc.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; dzn_descriptor_heap_write_buffer_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV].heap, buffer->uav_bindless_slot, true, &buf_desc); } } } return VK_SUCCESS; } static void dzn_event_destroy(struct dzn_event *event, const VkAllocationCallbacks *pAllocator) { if (!event) return; struct dzn_device *device = container_of(event->base.device, struct dzn_device, vk); if (event->fence) ID3D12Fence_Release(event->fence); vk_object_base_finish(&event->base); vk_free2(&device->vk.alloc, pAllocator, event); } static VkResult dzn_event_create(struct dzn_device *device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkEvent *out) { struct dzn_event *event = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!event) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT); if (FAILED(ID3D12Device1_CreateFence(device->dev, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void **)&event->fence))) { dzn_event_destroy(event, pAllocator); return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } *out = dzn_event_to_handle(event); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) { return dzn_event_create(dzn_device_from_handle(device), pCreateInfo, pAllocator, pEvent); } VKAPI_ATTR void VKAPI_CALL dzn_DestroyEvent(VkDevice device, VkEvent event, const VkAllocationCallbacks *pAllocator) { dzn_event_destroy(dzn_event_from_handle(event), pAllocator); } VKAPI_ATTR VkResult VKAPI_CALL dzn_ResetEvent(VkDevice dev, VkEvent evt) { VK_FROM_HANDLE(dzn_device, device, dev); VK_FROM_HANDLE(dzn_event, event, evt); if (FAILED(ID3D12Fence_Signal(event->fence, 0))) return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_SetEvent(VkDevice dev, VkEvent evt) { VK_FROM_HANDLE(dzn_device, device, dev); VK_FROM_HANDLE(dzn_event, event, evt); if (FAILED(ID3D12Fence_Signal(event->fence, 1))) return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_GetEventStatus(VkDevice device, VkEvent evt) { VK_FROM_HANDLE(dzn_event, event, evt); return ID3D12Fence_GetCompletedValue(event->fence) == 0 ? VK_EVENT_RESET : VK_EVENT_SET; } VKAPI_ATTR void VKAPI_CALL dzn_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory, VkDeviceSize *pCommittedMemoryInBytes) { VK_FROM_HANDLE(dzn_device_memory, mem, memory); // TODO: find if there's a way to query/track actual heap residency *pCommittedMemoryInBytes = mem->size; } VKAPI_ATTR VkResult VKAPI_CALL dzn_QueueBindSparse(VkQueue queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo, VkFence fence) { // FIXME: add proper implem dzn_stub(); return VK_SUCCESS; } static D3D12_TEXTURE_ADDRESS_MODE dzn_sampler_translate_addr_mode(VkSamplerAddressMode in) { switch (in) { case VK_SAMPLER_ADDRESS_MODE_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_WRAP; case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return D3D12_TEXTURE_ADDRESS_MODE_BORDER; case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return D3D12_TEXTURE_ADDRESS_MODE_MIRROR_ONCE; default: unreachable("Invalid address mode"); } } static void dzn_sampler_destroy(struct dzn_sampler *sampler, const VkAllocationCallbacks *pAllocator) { if (!sampler) return; struct dzn_device *device = container_of(sampler->base.device, struct dzn_device, vk); dzn_device_descriptor_heap_free_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, sampler->bindless_slot); vk_object_base_finish(&sampler->base); vk_free2(&device->vk.alloc, pAllocator, sampler); } static VkResult dzn_sampler_create(struct dzn_device *device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSampler *out) { struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); struct dzn_sampler *sampler = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!sampler) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER); const VkSamplerCustomBorderColorCreateInfoEXT *pBorderColor = (const VkSamplerCustomBorderColorCreateInfoEXT *) vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT); /* TODO: have a sampler pool to allocate shader-invisible descs which we * can copy to the desc_set when UpdateDescriptorSets() is called. */ sampler->desc.Filter = dzn_translate_sampler_filter(pdev, pCreateInfo); sampler->desc.AddressU = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeU); sampler->desc.AddressV = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeV); sampler->desc.AddressW = dzn_sampler_translate_addr_mode(pCreateInfo->addressModeW); sampler->desc.MipLODBias = pCreateInfo->mipLodBias; sampler->desc.MaxAnisotropy = pCreateInfo->maxAnisotropy; sampler->desc.MinLOD = pCreateInfo->minLod; sampler->desc.MaxLOD = pCreateInfo->maxLod; if (pCreateInfo->compareEnable) sampler->desc.ComparisonFunc = dzn_translate_compare_op(pCreateInfo->compareOp); bool reads_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; if (reads_border_color) { switch (pCreateInfo->borderColor) { case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: sampler->desc.FloatBorderColor[0] = 0.0f; sampler->desc.FloatBorderColor[1] = 0.0f; sampler->desc.FloatBorderColor[2] = 0.0f; sampler->desc.FloatBorderColor[3] = pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? 0.0f : 1.0f; sampler->static_border_color = pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ? D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK : D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK; break; case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: sampler->desc.FloatBorderColor[0] = sampler->desc.FloatBorderColor[1] = 1.0f; sampler->desc.FloatBorderColor[2] = sampler->desc.FloatBorderColor[3] = 1.0f; sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE; break; case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.FloatBorderColor); i++) sampler->desc.FloatBorderColor[i] = pBorderColor->customBorderColor.float32[i]; break; case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: case VK_BORDER_COLOR_INT_OPAQUE_BLACK: sampler->desc.UintBorderColor[0] = 0; sampler->desc.UintBorderColor[1] = 0; sampler->desc.UintBorderColor[2] = 0; sampler->desc.UintBorderColor[3] = pCreateInfo->borderColor == VK_BORDER_COLOR_INT_TRANSPARENT_BLACK ? 0 : 1; sampler->static_border_color = pCreateInfo->borderColor == VK_BORDER_COLOR_INT_TRANSPARENT_BLACK ? D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK : D3D12_STATIC_BORDER_COLOR_OPAQUE_BLACK_UINT; sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR; break; case VK_BORDER_COLOR_INT_OPAQUE_WHITE: sampler->desc.UintBorderColor[0] = sampler->desc.UintBorderColor[1] = 1; sampler->desc.UintBorderColor[2] = sampler->desc.UintBorderColor[3] = 1; sampler->static_border_color = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE_UINT; sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR; break; case VK_BORDER_COLOR_INT_CUSTOM_EXT: sampler->static_border_color = (D3D12_STATIC_BORDER_COLOR)-1; for (unsigned i = 0; i < ARRAY_SIZE(sampler->desc.UintBorderColor); i++) sampler->desc.UintBorderColor[i] = pBorderColor->customBorderColor.uint32[i]; sampler->desc.Flags = D3D12_SAMPLER_FLAG_UINT_BORDER_COLOR; break; default: unreachable("Unsupported border color"); } } if (pCreateInfo->unnormalizedCoordinates && pdev->options17.NonNormalizedCoordinateSamplersSupported) sampler->desc.Flags |= D3D12_SAMPLER_FLAG_NON_NORMALIZED_COORDINATES; sampler->bindless_slot = -1; if (device->bindless) { sampler->bindless_slot = dzn_device_descriptor_heap_alloc_slot(device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); if (sampler->bindless_slot < 0) { dzn_sampler_destroy(sampler, pAllocator); return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); } dzn_descriptor_heap_write_sampler_desc(device, &device->device_heaps[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER].heap, sampler->bindless_slot, sampler); } *out = dzn_sampler_to_handle(sampler); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateSampler(VkDevice device, const VkSamplerCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSampler *pSampler) { return dzn_sampler_create(dzn_device_from_handle(device), pCreateInfo, pAllocator, pSampler); } VKAPI_ATTR void VKAPI_CALL dzn_DestroySampler(VkDevice device, VkSampler sampler, const VkAllocationCallbacks *pAllocator) { dzn_sampler_destroy(dzn_sampler_from_handle(sampler), pAllocator); } int dzn_device_descriptor_heap_alloc_slot(struct dzn_device *device, D3D12_DESCRIPTOR_HEAP_TYPE type) { struct dzn_device_descriptor_heap *heap = &device->device_heaps[type]; mtx_lock(&heap->lock); int ret = -1; if (heap->slot_freelist.size) ret = util_dynarray_pop(&heap->slot_freelist, int); else if (heap->next_alloc_slot < heap->heap.desc_count) ret = heap->next_alloc_slot++; mtx_unlock(&heap->lock); return ret; } void dzn_device_descriptor_heap_free_slot(struct dzn_device *device, D3D12_DESCRIPTOR_HEAP_TYPE type, int slot) { struct dzn_device_descriptor_heap *heap = &device->device_heaps[type]; assert(slot < 0 || slot < heap->heap.desc_count); if (slot < 0) return; mtx_lock(&heap->lock); util_dynarray_append(&heap->slot_freelist, int, slot); mtx_unlock(&heap->lock); } VKAPI_ATTR void VKAPI_CALL dzn_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex, uint32_t localDeviceIndex, uint32_t remoteDeviceIndex, VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) { *pPeerMemoryFeatures = 0; } VKAPI_ATTR void VKAPI_CALL dzn_GetImageSparseMemoryRequirements2(VkDevice device, const VkImageSparseMemoryRequirementsInfo2* pInfo, uint32_t *pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) { *pSparseMemoryRequirementCount = 0; } VKAPI_ATTR VkResult VKAPI_CALL dzn_CreateSamplerYcbcrConversion(VkDevice device, const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkSamplerYcbcrConversion *pYcbcrConversion) { unreachable("Ycbcr sampler conversion is not supported"); return VK_SUCCESS; } VKAPI_ATTR void VKAPI_CALL dzn_DestroySamplerYcbcrConversion(VkDevice device, VkSamplerYcbcrConversion YcbcrConversion, const VkAllocationCallbacks *pAllocator) { unreachable("Ycbcr sampler conversion is not supported"); } VKAPI_ATTR VkDeviceAddress VKAPI_CALL dzn_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo* pInfo) { struct dzn_buffer *buffer = dzn_buffer_from_handle(pInfo->buffer); /* Insert a pointer tag so we never return null */ return ((uint64_t)buffer->uav_bindless_slot << 32ull) | (0xD3ull << 56); } VKAPI_ATTR uint64_t VKAPI_CALL dzn_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo) { return 0; } VKAPI_ATTR uint64_t VKAPI_CALL dzn_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo) { return 0; } #ifdef _WIN32 VKAPI_ATTR VkResult VKAPI_CALL dzn_GetMemoryWin32HandleKHR(VkDevice device, const VkMemoryGetWin32HandleInfoKHR *pGetWin32HandleInfo, HANDLE *pHandle) { VK_FROM_HANDLE(dzn_device_memory, mem, pGetWin32HandleInfo->memory); if (!mem->export_handle) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); switch (pGetWin32HandleInfo->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: if (!DuplicateHandle(GetCurrentProcess(), mem->export_handle, GetCurrentProcess(), pHandle, 0, false, DUPLICATE_SAME_ACCESS)) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); return VK_SUCCESS; default: return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } #else VKAPI_ATTR VkResult VKAPI_CALL dzn_GetMemoryFdKHR(VkDevice device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFd) { VK_FROM_HANDLE(dzn_device_memory, mem, pGetFdInfo->memory); if (!mem->export_handle) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); switch (pGetFdInfo->handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: *pFd = (int)(intptr_t)mem->export_handle; mem->export_handle = (HANDLE)(intptr_t)-1; return VK_SUCCESS; default: return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); } } #endif #ifdef _WIN32 VKAPI_ATTR VkResult VKAPI_CALL dzn_GetMemoryWin32HandlePropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, HANDLE handle, VkMemoryWin32HandlePropertiesKHR *pProperties) { #else VKAPI_ATTR VkResult VKAPI_CALL dzn_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd, VkMemoryFdPropertiesKHR *pProperties) { HANDLE handle = (HANDLE)(intptr_t)fd; #endif VK_FROM_HANDLE(dzn_device, device, _device); IUnknown *opened_object; if (FAILED(ID3D12Device_OpenSharedHandle(device->dev, handle, &IID_IUnknown, (void **)&opened_object))) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); VkResult result = VK_ERROR_INVALID_EXTERNAL_HANDLE; ID3D12Resource *res = NULL; ID3D12Heap *heap = NULL; struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); switch (handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT: (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Resource, (void **)&res); (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Heap, (void **)&heap); break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_TEXTURE_BIT: case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE_BIT: (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Resource, (void **)&res); break; case VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP_BIT: (void)IUnknown_QueryInterface(opened_object, &IID_ID3D12Heap, (void **)&heap); break; default: goto cleanup; } if (!res && !heap) goto cleanup; D3D12_HEAP_DESC heap_desc; if (res) ID3D12Resource_GetHeapProperties(res, &heap_desc.Properties, &heap_desc.Flags); else heap_desc = dzn_ID3D12Heap_GetDesc(heap); if (heap_desc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM) heap_desc.Properties = dzn_ID3D12Device4_GetCustomHeapProperties(device->dev, 0, heap_desc.Properties.Type); pProperties->memoryTypeBits = 0; for (uint32_t i = 0; i < pdev->memory.memoryTypeCount; ++i) { const VkMemoryType *mem_type = &pdev->memory.memoryTypes[i]; D3D12_HEAP_PROPERTIES required_props = deduce_heap_properties_from_memory(pdev, mem_type); if (heap_desc.Properties.CPUPageProperty != required_props.CPUPageProperty || heap_desc.Properties.MemoryPoolPreference != required_props.MemoryPoolPreference) continue; D3D12_HEAP_FLAGS required_flags = dzn_physical_device_get_heap_flags_for_mem_type(pdev, i); if ((heap_desc.Flags & required_flags) != required_flags) continue; pProperties->memoryTypeBits |= (1 << i); } result = VK_SUCCESS; cleanup: IUnknown_Release(opened_object); if (res) ID3D12Resource_Release(res); if (heap) ID3D12Heap_Release(heap); return result; } #if defined(_WIN32) VKAPI_ATTR VkResult VKAPI_CALL dzn_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer, VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) { VK_FROM_HANDLE(dzn_device, device, _device); if (!device->dev13) return VK_ERROR_FEATURE_NOT_PRESENT; ID3D12Heap *heap; if (FAILED(ID3D12Device13_OpenExistingHeapFromAddress1(device->dev13, pHostPointer, 1, &IID_ID3D12Heap, (void **)&heap))) return VK_ERROR_INVALID_EXTERNAL_HANDLE; struct dzn_physical_device *pdev = container_of(device->vk.physical, struct dzn_physical_device, vk); D3D12_HEAP_DESC heap_desc = dzn_ID3D12Heap_GetDesc(heap); pMemoryHostPointerProperties->memoryTypeBits = 0; for (uint32_t i = 0; i < pdev->memory.memoryTypeCount; ++i) { const VkMemoryType *mem_type = &pdev->memory.memoryTypes[i]; D3D12_HEAP_PROPERTIES required_props = deduce_heap_properties_from_memory(pdev, mem_type); if (heap_desc.Properties.CPUPageProperty != required_props.CPUPageProperty || heap_desc.Properties.MemoryPoolPreference != required_props.MemoryPoolPreference) continue; pMemoryHostPointerProperties->memoryTypeBits |= (1 << i); } ID3D12Heap_Release(heap); return VK_SUCCESS; } #endif