1 /* 2 * Copyright (c) Meta Platforms, Inc. and affiliates. 3 * All rights reserved. 4 * 5 * This source code is licensed under the BSD-style license found in the 6 * LICENSE file in the root directory of this source tree. 7 */ 8 9 #pragma once 10 11 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName 12 13 #include <executorch/backends/vulkan/runtime/vk_api/vk_api.h> 14 15 #include <executorch/backends/vulkan/runtime/vk_api/Device.h> 16 #include <executorch/backends/vulkan/runtime/vk_api/Pipeline.h> 17 18 #include <executorch/backends/vulkan/runtime/vk_api/memory/Allocator.h> 19 20 #include <array> 21 22 namespace vkcompute { 23 namespace vkapi { 24 25 // 26 // A Vulkan Adapter represents a logical device and all its properties. It 27 // manages all relevant properties of the underlying physical device, a 28 // handle to the logical device, and a number of compute queues available to 29 // the device. It is primarily responsible for managing the VkDevice handle 30 // which points to the logical device object on the GPU. 31 // 32 // This class is primarily used by the Runtime class, which holds one Adapter 33 // instance for each physical device visible to the VkInstance. Upon 34 // construction, this class will populate the physical device properties, but 35 // will not create the logical device until specifically requested via the 36 // init_device() function. 37 // 38 // init_device() will create the logical device and obtain the VkDevice handle 39 // for it. It will also create a number of compute queues up to the amount 40 // requested when the Adapter instance was constructed. 41 // 42 // Contexts (which represent one thread of execution) will request a compute 43 // queue from an Adapter. The Adapter will then select a compute queue to 44 // assign to the Context, attempting to balance load between all available 45 // queues. This will allow different Contexts (which typically execute on 46 // separate threads) to run concurrently. 47 // 48 49 #define NUM_QUEUE_MUTEXES 4 50 51 class Adapter final { 52 public: 53 explicit Adapter( 54 VkInstance instance, 55 PhysicalDevice physical_device, 56 const uint32_t num_queues, 57 const std::string& cache_data_path); 58 59 Adapter(const Adapter&) = delete; 60 Adapter& operator=(const Adapter&) = delete; 61 62 Adapter(Adapter&&) = delete; 63 Adapter& operator=(Adapter&&) = delete; 64 65 ~Adapter() = default; 66 67 struct Queue { 68 uint32_t family_index; 69 uint32_t queue_index; 70 VkQueueFlags capabilities; 71 VkQueue handle; 72 }; 73 74 private: 75 // Use a mutex to manage queue usage info since 76 // it can be accessed from multiple threads 77 std::mutex queue_usage_mutex_; 78 // Physical Device Info 79 PhysicalDevice physical_device_; 80 // Queue Management 81 std::vector<Queue> queues_; 82 std::vector<uint32_t> queue_usage_; 83 std::array<std::mutex, NUM_QUEUE_MUTEXES> queue_mutexes_; 84 // Handles 85 VkInstance instance_; 86 DeviceHandle device_; 87 // Device-level resource caches 88 ShaderLayoutCache shader_layout_cache_; 89 ShaderCache shader_cache_; 90 PipelineLayoutCache pipeline_layout_cache_; 91 ComputePipelineCache compute_pipeline_cache_; 92 // Memory Management 93 SamplerCache sampler_cache_; 94 Allocator vma_; 95 // Miscellaneous 96 bool linear_tiling_3d_enabled_; 97 98 public: 99 // Physical Device metadata 100 physical_handle()101 inline VkPhysicalDevice physical_handle() const { 102 return physical_device_.handle; 103 } 104 device_handle()105 inline VkDevice device_handle() const { 106 return device_.handle; 107 } 108 has_unified_memory()109 inline bool has_unified_memory() const { 110 return physical_device_.has_unified_memory; 111 } 112 num_compute_queues()113 inline uint32_t num_compute_queues() const { 114 return physical_device_.num_compute_queues; 115 } 116 timestamp_compute_and_graphics()117 inline bool timestamp_compute_and_graphics() const { 118 return physical_device_.has_timestamps; 119 } 120 timestamp_period()121 inline float timestamp_period() const { 122 return physical_device_.timestamp_period; 123 } 124 125 // Queue Management 126 127 Queue request_queue(); 128 void return_queue(Queue&); 129 130 // Caches 131 shader_layout_cache()132 inline ShaderLayoutCache& shader_layout_cache() { 133 return shader_layout_cache_; 134 } 135 shader_cache()136 inline ShaderCache& shader_cache() { 137 return shader_cache_; 138 } 139 pipeline_layout_cache()140 inline PipelineLayoutCache& pipeline_layout_cache() { 141 return pipeline_layout_cache_; 142 } 143 compute_pipeline_cache()144 inline ComputePipelineCache& compute_pipeline_cache() { 145 return compute_pipeline_cache_; 146 } 147 148 // Memory Allocation 149 sampler_cache()150 inline SamplerCache& sampler_cache() { 151 return sampler_cache_; 152 } 153 vma()154 inline Allocator& vma() { 155 return vma_; 156 } 157 linear_tiling_3d_enabled()158 inline bool linear_tiling_3d_enabled() const { 159 return linear_tiling_3d_enabled_; 160 } 161 162 // Physical Device Features 163 supports_16bit_storage_buffers()164 inline bool supports_16bit_storage_buffers() { 165 #ifdef VK_KHR_16bit_storage 166 return physical_device_.shader_16bit_storage.storageBuffer16BitAccess == 167 VK_TRUE; 168 #else 169 return false; 170 #endif /* VK_KHR_16bit_storage */ 171 } 172 supports_8bit_storage_buffers()173 inline bool supports_8bit_storage_buffers() { 174 #ifdef VK_KHR_8bit_storage 175 return physical_device_.shader_8bit_storage.storageBuffer8BitAccess == 176 VK_TRUE; 177 #else 178 return false; 179 #endif /* VK_KHR_8bit_storage */ 180 } 181 supports_float16_shader_types()182 inline bool supports_float16_shader_types() { 183 #ifdef VK_KHR_shader_float16_int8 184 return physical_device_.shader_float16_int8_types.shaderFloat16 == VK_TRUE; 185 #else 186 return false; 187 #endif /* VK_KHR_shader_float16_int8 */ 188 } 189 supports_int8_shader_types()190 inline bool supports_int8_shader_types() { 191 #ifdef VK_KHR_shader_float16_int8 192 return physical_device_.shader_float16_int8_types.shaderInt8 == VK_TRUE; 193 #else 194 return false; 195 #endif /* VK_KHR_shader_float16_int8 */ 196 } 197 supports_int16_shader_types()198 inline bool supports_int16_shader_types() { 199 return physical_device_.supports_int16_shader_types; 200 } 201 has_full_float16_buffers_support()202 inline bool has_full_float16_buffers_support() { 203 return supports_16bit_storage_buffers() && supports_float16_shader_types(); 204 } 205 has_full_int8_buffers_support()206 inline bool has_full_int8_buffers_support() { 207 return supports_8bit_storage_buffers() && supports_int8_shader_types(); 208 } 209 210 // Command Buffer Submission 211 212 void 213 submit_cmd(const Queue&, VkCommandBuffer, VkFence fence = VK_NULL_HANDLE); 214 215 std::string stringize() const; 216 friend std::ostream& operator<<(std::ostream&, const Adapter&); 217 }; 218 219 } // namespace vkapi 220 } // namespace vkcompute 221