xref: /aosp_15_r20/external/mesa3d/src/vulkan/runtime/vk_device.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #ifndef VK_DEVICE_H
24 #define VK_DEVICE_H
25 
26 #include "rmv/vk_rmv_common.h"
27 #include "vk_dispatch_table.h"
28 #include "vk_extensions.h"
29 #include "vk_object.h"
30 #include "vk_physical_device_features.h"
31 
32 #include "util/list.h"
33 #include "util/simple_mtx.h"
34 #include "util/u_atomic.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 struct vk_command_buffer_ops;
41 struct vk_device_shader_ops;
42 struct vk_sync;
43 
44 enum vk_queue_submit_mode {
45    /** Submits happen immediately
46     *
47     * `vkQueueSubmit()` and `vkQueueBindSparse()` call
48     * ``vk_queue::driver_submit`` directly for all submits and the last call to
49     * ``vk_queue::driver_submit`` will have completed by the time
50     * `vkQueueSubmit()` or `vkQueueBindSparse()` return.
51     */
52    VK_QUEUE_SUBMIT_MODE_IMMEDIATE,
53 
54    /** Submits may be deferred until a future `vk_queue_flush()`
55     *
56     * Submits are added to the queue and `vk_queue_flush()` is called.
57     * However, any submits with unsatisfied dependencies will be left on the
58     * queue until a future `vk_queue_flush()` call.  This is used for
59     * implementing emulated timeline semaphores without threading.
60     */
61    VK_QUEUE_SUBMIT_MODE_DEFERRED,
62 
63    /** Submits will be added to the queue and handled later by a thread
64     *
65     * This places additional requirements on the vk_sync types used by the
66     * driver:
67     *
68     *    1. All `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT` also
69     *       support `VK_SYNC_FEATURE_WAIT_PENDING` so that the threads can
70     *       sort out when a given submit has all its dependencies resolved.
71     *
72     *    2. All binary `vk_sync` types which support `VK_SYNC_FEATURE_GPU_WAIT`
73     *       also support `VK_SYNC_FEATURE_CPU_RESET` so we can reset
74     *       semaphores after waiting on them.
75     *
76     *    3. All vk_sync types used as permanent payloads of semaphores support
77     *       ``vk_sync_type::move`` so that it can move the pending signal into a
78     *       temporary vk_sync and reset the semaphore.
79     *
80     * This is requied for shared timeline semaphores where we need to handle
81     * wait-before-signal by threading in the driver if we ever see an
82     * unresolve dependency.
83     */
84    VK_QUEUE_SUBMIT_MODE_THREADED,
85 
86    /** Threaded but only if we need it to resolve dependencies
87     *
88     * This imposes all the same requirements on `vk_sync` types as
89     * `VK_QUEUE_SUBMIT_MODE_THREADED`.
90     */
91    VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND,
92 };
93 
94 /** Base struct for VkDevice */
95 struct vk_device {
96    struct vk_object_base base;
97 
98    /** Allocator used to create this device
99     *
100     * This is used as a fall-back for when a NULL pAllocator is passed into a
101     * device-level create function such as vkCreateImage().
102     */
103    VkAllocationCallbacks alloc;
104 
105    /** Pointer to the physical device */
106    struct vk_physical_device *physical;
107 
108    /** Table of enabled extensions */
109    struct vk_device_extension_table enabled_extensions;
110 
111    /** Table of enabled features */
112    struct vk_features enabled_features;
113 
114    /** Device-level dispatch table */
115    struct vk_device_dispatch_table dispatch_table;
116 
117    /** Command dispatch table
118     *
119     * This is used for emulated secondary command buffer support.  To use
120     * emulated (trace/replay) secondary command buffers:
121     *
122     *  1. Provide your "real" command buffer dispatch table here.  Because
123     *     this doesn't get populated by vk_device_init(), the driver will have
124     *     to add the vk_common entrypoints to this table itself.
125     *
126     *  2. Add vk_enqueue_unless_primary_device_entrypoint_table to your device
127     *     level dispatch table.
128     */
129    const struct vk_device_dispatch_table *command_dispatch_table;
130 
131    /** Command buffer vtable when using the common command pool */
132    const struct vk_command_buffer_ops *command_buffer_ops;
133 
134    /** Shader vtable for VK_EXT_shader_object and common pipelines */
135    const struct vk_device_shader_ops *shader_ops;
136 
137    /** Driver provided callback for capturing traces
138     *
139     * Triggers for this callback are:
140     *    - Keyboard input (F12)
141     *    - Creation of a trigger file
142     *    - Reaching the trace frame
143     */
144    VkResult (*capture_trace)(VkQueue queue);
145 
146    uint32_t current_frame;
147    bool trace_hotkey_trigger;
148    simple_mtx_t trace_mtx;
149 
150    /* For VK_EXT_private_data */
151    uint32_t private_data_next_index;
152 
153    struct list_head queues;
154 
155    struct {
156       int lost;
157       bool reported;
158    } _lost;
159 
160    /** Checks the status of this device
161     *
162     * This is expected to return either VK_SUCCESS or VK_ERROR_DEVICE_LOST.
163     * It is called before ``vk_queue::driver_submit`` and after every non-trivial
164     * wait operation to ensure the device is still around.  This gives the
165     * driver a hook to ask the kernel if its device is still valid.  If the
166     * kernel says the device has been lost, it MUST call vk_device_set_lost().
167     *
168     * This function may be called from any thread at any time.
169     */
170    VkResult (*check_status)(struct vk_device *device);
171 
172    /** Creates a vk_sync that wraps a memory object
173     *
174     * This is always a one-shot object so it need not track any additional
175     * state.  Since it's intended for synchronizing between processes using
176     * implicit synchronization mechanisms, no such tracking would be valid
177     * anyway.
178     *
179     * If `signal_memory` is set, the resulting vk_sync will be used to signal
180     * the memory object from a queue ``via vk_queue_submit::signals``.  The common
181     * code guarantees that, by the time vkQueueSubmit() returns, the signal
182     * operation has been submitted to the kernel via the driver's
183     * ``vk_queue::driver_submit`` hook.  This means that any vkQueueSubmit() call
184     * which needs implicit synchronization may block.
185     *
186     * If `signal_memory` is not set, it can be assumed that memory object
187     * already has a signal operation pending from some other process and we
188     * need only wait on it.
189     */
190    VkResult (*create_sync_for_memory)(struct vk_device *device,
191                                       VkDeviceMemory memory,
192                                       bool signal_memory,
193                                       struct vk_sync **sync_out);
194 
195    /* Set by vk_device_set_drm_fd() */
196    int drm_fd;
197 
198    /** Implicit pipeline cache, or NULL */
199    struct vk_pipeline_cache *mem_cache;
200 
201    /** An enum describing how timeline semaphores work */
202    enum vk_device_timeline_mode {
203       /** Timeline semaphores are not supported */
204       VK_DEVICE_TIMELINE_MODE_NONE,
205 
206       /** Timeline semaphores are emulated with vk_timeline
207        *
208        * In this mode, timeline semaphores are emulated using vk_timeline
209        * which is a collection of binary semaphores, one per time point.
210        * These timeline semaphores cannot be shared because the data structure
211        * exists entirely in userspace.  These timelines are virtually
212        * invisible to the driver; all it sees are the binary vk_syncs, one per
213        * time point.
214        *
215        * To handle wait-before-signal, we place all vk_queue_submits in the
216        * queue's submit list in vkQueueSubmit() and call vk_device_flush() at
217        * key points such as the end of vkQueueSubmit() and vkSemaphoreSignal().
218        * This ensures that, as soon as a given submit's dependencies are fully
219        * resolvable, it gets submitted to the driver.
220        */
221       VK_DEVICE_TIMELINE_MODE_EMULATED,
222 
223       /** Timeline semaphores are a kernel-assisted emulation
224        *
225        * In this mode, timeline semaphores are still technically an emulation
226        * in the sense that they don't support wait-before-signal natively.
227        * Instead, all GPU-waitable objects support a CPU wait-for-pending
228        * operation which lets the userspace driver wait until a given event
229        * on the (possibly shared) vk_sync is pending.  The event is "pending"
230        * if a job has been submitted to the kernel (possibly from a different
231        * process) which will signal it.  In vkQueueSubit, we use this wait
232        * mode to detect waits which are not yet pending and, the first time we
233        * do, spawn a thread to manage the queue.  That thread waits for each
234        * submit's waits to all be pending before submitting to the driver
235        * queue.
236        *
237        * We have to be a bit more careful about a few things in this mode.
238        * In particular, we can never assume that any given wait operation is
239        * pending.  For instance, when we go to export a sync file from a
240        * binary semaphore, we need to first wait for it to be pending.  The
241        * spec guarantees that the vast majority of these waits return almost
242        * immediately, but we do need to insert them for correctness.
243        */
244       VK_DEVICE_TIMELINE_MODE_ASSISTED,
245 
246       /** Timeline semaphores are 100% native
247        *
248        * In this mode, wait-before-signal is natively supported by the
249        * underlying timeline implementation.  We can submit-and-forget and
250        * assume that dependencies will get resolved for us by the kernel.
251        * Currently, this isn't supported by any Linux primitives.
252        */
253       VK_DEVICE_TIMELINE_MODE_NATIVE,
254    } timeline_mode;
255 
256    /** Per-device submit mode
257     *
258     * This represents the device-wide submit strategy which may be different
259     * from the per-queue submit mode.  See vk_queue.submit.mode for more
260     * details.
261     */
262    enum vk_queue_submit_mode submit_mode;
263 
264    struct vk_memory_trace_data memory_trace_data;
265 
266    mtx_t swapchain_private_mtx;
267    struct hash_table *swapchain_private;
268    mtx_t swapchain_name_mtx;
269    struct hash_table *swapchain_name;
270 
271    /* For VK_KHR_pipeline_binary */
272    bool disable_internal_cache;
273 };
274 
275 VK_DEFINE_HANDLE_CASTS(vk_device, base, VkDevice,
276                        VK_OBJECT_TYPE_DEVICE);
277 
278 /** Initialize a vk_device
279  *
280  * Along with initializing the data structures in `vk_device`, this function
281  * checks that every extension specified by
282  * ``VkInstanceCreateInfo::ppEnabledExtensionNames`` is actually supported by
283  * the physical device and returns `VK_ERROR_EXTENSION_NOT_PRESENT` if an
284  * unsupported extension is requested.  It also checks all the feature struct
285  * chained into the `pCreateInfo->pNext` chain against the features returned
286  * by `vkGetPhysicalDeviceFeatures2` and returns
287  * `VK_ERROR_FEATURE_NOT_PRESENT` if an unsupported feature is requested.
288  *
289  * :param device:               |out| The device to initialize
290  * :param physical_device:      |in|  The physical device
291  * :param dispatch_table:       |in|  Device-level dispatch table
292  * :param pCreateInfo:          |in|  VkDeviceCreateInfo pointer passed to
293  *                                    `vkCreateDevice()`
294  * :param alloc:                |in|  Allocation callbacks passed to
295  *                                    `vkCreateDevice()`
296  */
297 VkResult MUST_CHECK
298 vk_device_init(struct vk_device *device,
299                struct vk_physical_device *physical_device,
300                const struct vk_device_dispatch_table *dispatch_table,
301                const VkDeviceCreateInfo *pCreateInfo,
302                const VkAllocationCallbacks *alloc);
303 
304 static inline void
vk_device_set_drm_fd(struct vk_device * device,int drm_fd)305 vk_device_set_drm_fd(struct vk_device *device, int drm_fd)
306 {
307    device->drm_fd = drm_fd;
308 }
309 
310 /** Tears down a vk_device
311  *
312  * :param device:       |out| The device to tear down
313  */
314 void
315 vk_device_finish(struct vk_device *device);
316 
317 /** Enables threaded submit on this device
318  *
319  * This doesn't ensure that threaded submit will be used.  It just disables
320  * the deferred submit option for emulated timeline semaphores and forces them
321  * to always use the threaded path.  It also does some checks that the vk_sync
322  * types used by the driver work for threaded submit.
323  *
324  * This must be called before any queues are created.
325  */
326 void vk_device_enable_threaded_submit(struct vk_device *device);
327 
328 static inline bool
vk_device_supports_threaded_submit(const struct vk_device * device)329 vk_device_supports_threaded_submit(const struct vk_device *device)
330 {
331    return device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED ||
332           device->submit_mode == VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
333 }
334 
335 VkResult vk_device_flush(struct vk_device *device);
336 
337 VkResult PRINTFLIKE(4, 5)
338 _vk_device_set_lost(struct vk_device *device,
339                     const char *file, int line,
340                     const char *msg, ...);
341 
342 #define vk_device_set_lost(device, ...) \
343    _vk_device_set_lost(device, __FILE__, __LINE__, __VA_ARGS__)
344 
345 void _vk_device_report_lost(struct vk_device *device);
346 
347 static inline bool
vk_device_is_lost_no_report(struct vk_device * device)348 vk_device_is_lost_no_report(struct vk_device *device)
349 {
350    return p_atomic_read(&device->_lost.lost) > 0;
351 }
352 
353 static inline bool
vk_device_is_lost(struct vk_device * device)354 vk_device_is_lost(struct vk_device *device)
355 {
356    int lost = vk_device_is_lost_no_report(device);
357    if (unlikely(lost && !device->_lost.reported))
358       _vk_device_report_lost(device);
359    return lost;
360 }
361 
362 static inline VkResult
vk_device_check_status(struct vk_device * device)363 vk_device_check_status(struct vk_device *device)
364 {
365    if (vk_device_is_lost(device))
366       return VK_ERROR_DEVICE_LOST;
367 
368    if (!device->check_status)
369       return VK_SUCCESS;
370 
371    VkResult result = device->check_status(device);
372 
373    assert(result == VK_SUCCESS || result == VK_ERROR_DEVICE_LOST);
374    if (result == VK_ERROR_DEVICE_LOST)
375       assert(vk_device_is_lost_no_report(device));
376 
377    return result;
378 }
379 
380 #ifndef _WIN32
381 
382 uint64_t
383 vk_clock_gettime(clockid_t clock_id);
384 
385 static inline uint64_t
vk_time_max_deviation(uint64_t begin,uint64_t end,uint64_t max_clock_period)386 vk_time_max_deviation(uint64_t begin, uint64_t end, uint64_t max_clock_period)
387 {
388     /*
389      * The maximum deviation is the sum of the interval over which we
390      * perform the sampling and the maximum period of any sampled
391      * clock. That's because the maximum skew between any two sampled
392      * clock edges is when the sampled clock with the largest period is
393      * sampled at the end of that period but right at the beginning of the
394      * sampling interval and some other clock is sampled right at the
395      * beginning of its sampling period and right at the end of the
396      * sampling interval. Let's assume the GPU has the longest clock
397      * period and that the application is sampling GPU and monotonic:
398      *
399      *                               s                 e
400      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
401      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
402      *
403      *                               g
404      *		  0         1         2         3
405      *	GPU       -----_____-----_____-----_____-----_____
406      *
407      *                                                m
408      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
409      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
410      *
411      *	Interval                     <----------------->
412      *	Deviation           <-------------------------->
413      *
414      *		s  = read(raw)       2
415      *		g  = read(GPU)       1
416      *		m  = read(monotonic) 2
417      *		e  = read(raw)       b
418      *
419      * We round the sample interval up by one tick to cover sampling error
420      * in the interval clock
421      */
422 
423    uint64_t sample_interval = end - begin + 1;
424 
425    return sample_interval + max_clock_period;
426 }
427 
428 #endif //!_WIN32
429 
430 PFN_vkVoidFunction
431 vk_device_get_proc_addr(const struct vk_device *device,
432                         const char *name);
433 
434 #ifdef __cplusplus
435 }
436 #endif
437 
438 #endif /* VK_DEVICE_H */
439