xref: /aosp_15_r20/external/executorch/backends/vulkan/runtime/vk_api/Adapter.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #pragma once
10 
11 // @lint-ignore-every CLANGTIDY facebook-hte-BadMemberName
12 
13 #include <executorch/backends/vulkan/runtime/vk_api/vk_api.h>
14 
15 #include <executorch/backends/vulkan/runtime/vk_api/Device.h>
16 #include <executorch/backends/vulkan/runtime/vk_api/Pipeline.h>
17 
18 #include <executorch/backends/vulkan/runtime/vk_api/memory/Allocator.h>
19 
20 #include <array>
21 
22 namespace vkcompute {
23 namespace vkapi {
24 
25 //
26 // A Vulkan Adapter represents a logical device and all its properties. It
27 // manages all relevant properties of the underlying physical device, a
28 // handle to the logical device, and a number of compute queues available to
29 // the device. It is primarily responsible for managing the VkDevice handle
30 // which points to the logical device object on the GPU.
31 //
32 // This class is primarily used by the Runtime class, which holds one Adapter
33 // instance for each physical device visible to the VkInstance. Upon
34 // construction, this class will populate the physical device properties, but
35 // will not create the logical device until specifically requested via the
36 // init_device() function.
37 //
38 // init_device() will create the logical device and obtain the VkDevice handle
39 // for it. It will also create a number of compute queues up to the amount
40 // requested when the Adapter instance was constructed.
41 //
42 // Contexts (which represent one thread of execution) will request a compute
43 // queue from an Adapter. The Adapter will then select a compute queue to
44 // assign to the Context, attempting to balance load between all available
45 // queues. This will allow different Contexts (which typically execute on
46 // separate threads) to run concurrently.
47 //
48 
49 #define NUM_QUEUE_MUTEXES 4
50 
51 class Adapter final {
52  public:
53   explicit Adapter(
54       VkInstance instance,
55       PhysicalDevice physical_device,
56       const uint32_t num_queues,
57       const std::string& cache_data_path);
58 
59   Adapter(const Adapter&) = delete;
60   Adapter& operator=(const Adapter&) = delete;
61 
62   Adapter(Adapter&&) = delete;
63   Adapter& operator=(Adapter&&) = delete;
64 
65   ~Adapter() = default;
66 
67   struct Queue {
68     uint32_t family_index;
69     uint32_t queue_index;
70     VkQueueFlags capabilities;
71     VkQueue handle;
72   };
73 
74  private:
75   // Use a mutex to manage queue usage info since
76   // it can be accessed from multiple threads
77   std::mutex queue_usage_mutex_;
78   // Physical Device Info
79   PhysicalDevice physical_device_;
80   // Queue Management
81   std::vector<Queue> queues_;
82   std::vector<uint32_t> queue_usage_;
83   std::array<std::mutex, NUM_QUEUE_MUTEXES> queue_mutexes_;
84   // Handles
85   VkInstance instance_;
86   DeviceHandle device_;
87   // Device-level resource caches
88   ShaderLayoutCache shader_layout_cache_;
89   ShaderCache shader_cache_;
90   PipelineLayoutCache pipeline_layout_cache_;
91   ComputePipelineCache compute_pipeline_cache_;
92   // Memory Management
93   SamplerCache sampler_cache_;
94   Allocator vma_;
95   // Miscellaneous
96   bool linear_tiling_3d_enabled_;
97 
98  public:
99   // Physical Device metadata
100 
physical_handle()101   inline VkPhysicalDevice physical_handle() const {
102     return physical_device_.handle;
103   }
104 
device_handle()105   inline VkDevice device_handle() const {
106     return device_.handle;
107   }
108 
has_unified_memory()109   inline bool has_unified_memory() const {
110     return physical_device_.has_unified_memory;
111   }
112 
num_compute_queues()113   inline uint32_t num_compute_queues() const {
114     return physical_device_.num_compute_queues;
115   }
116 
timestamp_compute_and_graphics()117   inline bool timestamp_compute_and_graphics() const {
118     return physical_device_.has_timestamps;
119   }
120 
timestamp_period()121   inline float timestamp_period() const {
122     return physical_device_.timestamp_period;
123   }
124 
125   // Queue Management
126 
127   Queue request_queue();
128   void return_queue(Queue&);
129 
130   // Caches
131 
shader_layout_cache()132   inline ShaderLayoutCache& shader_layout_cache() {
133     return shader_layout_cache_;
134   }
135 
shader_cache()136   inline ShaderCache& shader_cache() {
137     return shader_cache_;
138   }
139 
pipeline_layout_cache()140   inline PipelineLayoutCache& pipeline_layout_cache() {
141     return pipeline_layout_cache_;
142   }
143 
compute_pipeline_cache()144   inline ComputePipelineCache& compute_pipeline_cache() {
145     return compute_pipeline_cache_;
146   }
147 
148   // Memory Allocation
149 
sampler_cache()150   inline SamplerCache& sampler_cache() {
151     return sampler_cache_;
152   }
153 
vma()154   inline Allocator& vma() {
155     return vma_;
156   }
157 
linear_tiling_3d_enabled()158   inline bool linear_tiling_3d_enabled() const {
159     return linear_tiling_3d_enabled_;
160   }
161 
162   // Physical Device Features
163 
supports_16bit_storage_buffers()164   inline bool supports_16bit_storage_buffers() {
165 #ifdef VK_KHR_16bit_storage
166     return physical_device_.shader_16bit_storage.storageBuffer16BitAccess ==
167         VK_TRUE;
168 #else
169     return false;
170 #endif /* VK_KHR_16bit_storage */
171   }
172 
supports_8bit_storage_buffers()173   inline bool supports_8bit_storage_buffers() {
174 #ifdef VK_KHR_8bit_storage
175     return physical_device_.shader_8bit_storage.storageBuffer8BitAccess ==
176         VK_TRUE;
177 #else
178     return false;
179 #endif /* VK_KHR_8bit_storage */
180   }
181 
supports_float16_shader_types()182   inline bool supports_float16_shader_types() {
183 #ifdef VK_KHR_shader_float16_int8
184     return physical_device_.shader_float16_int8_types.shaderFloat16 == VK_TRUE;
185 #else
186     return false;
187 #endif /* VK_KHR_shader_float16_int8 */
188   }
189 
supports_int8_shader_types()190   inline bool supports_int8_shader_types() {
191 #ifdef VK_KHR_shader_float16_int8
192     return physical_device_.shader_float16_int8_types.shaderInt8 == VK_TRUE;
193 #else
194     return false;
195 #endif /* VK_KHR_shader_float16_int8 */
196   }
197 
supports_int16_shader_types()198   inline bool supports_int16_shader_types() {
199     return physical_device_.supports_int16_shader_types;
200   }
201 
has_full_float16_buffers_support()202   inline bool has_full_float16_buffers_support() {
203     return supports_16bit_storage_buffers() && supports_float16_shader_types();
204   }
205 
has_full_int8_buffers_support()206   inline bool has_full_int8_buffers_support() {
207     return supports_8bit_storage_buffers() && supports_int8_shader_types();
208   }
209 
210   // Command Buffer Submission
211 
212   void
213   submit_cmd(const Queue&, VkCommandBuffer, VkFence fence = VK_NULL_HANDLE);
214 
215   std::string stringize() const;
216   friend std::ostream& operator<<(std::ostream&, const Adapter&);
217 };
218 
219 } // namespace vkapi
220 } // namespace vkcompute
221