xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan_hasvk/anv_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41 
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_debug_identifier.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61 
62 #include "genxml/gen7_pack.h"
63 #include "genxml/genX_bits.h"
64 
65 static const driOptionDescription anv_dri_options[] = {
66    DRI_CONF_SECTION_PERFORMANCE
67       DRI_CONF_ADAPTIVE_SYNC(true)
68       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
73       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74       DRI_CONF_NO_16BIT(false)
75       DRI_CONF_HASVK_OVERRIDE_API_VERSION(false)
76    DRI_CONF_SECTION_END
77 
78    DRI_CONF_SECTION_DEBUG
79       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
80       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
81       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
82       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
83       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
84    DRI_CONF_SECTION_END
85 
86    DRI_CONF_SECTION_QUALITY
87       DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
88    DRI_CONF_SECTION_END
89 };
90 
91 /* This is probably far to big but it reflects the max size used for messages
92  * in OpenGLs KHR_debug.
93  */
94 #define MAX_DEBUG_MESSAGE_LENGTH    4096
95 
96 /* Render engine timestamp register */
97 #define TIMESTAMP 0x2358
98 
99 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
100 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
101 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
102 #endif
103 
104 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)105 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
106 {
107    char str[MAX_DEBUG_MESSAGE_LENGTH];
108    struct anv_device *device = (struct anv_device *)data;
109    UNUSED struct anv_instance *instance = device->physical->instance;
110 
111    va_list args;
112    va_start(args, fmt);
113    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
114    va_end(args);
115 
116    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
117 }
118 
119 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)120 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
121 {
122    va_list args;
123    va_start(args, fmt);
124 
125    if (INTEL_DEBUG(DEBUG_PERF))
126       mesa_logd_v(fmt, args);
127 
128    va_end(args);
129 }
130 
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
132     defined(VK_USE_PLATFORM_XCB_KHR) || \
133     defined(VK_USE_PLATFORM_XLIB_KHR) || \
134     defined(VK_USE_PLATFORM_DISPLAY_KHR)
135 #define ANV_USE_WSI_PLATFORM
136 #endif
137 
138 #ifdef ANDROID_STRICT
139 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
140 #else
141 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
142 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
143 #endif
144 
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)145 VkResult anv_EnumerateInstanceVersion(
146     uint32_t*                                   pApiVersion)
147 {
148 #ifdef ANDROID_STRICT
149    *pApiVersion = ANV_API_VERSION;
150 #else
151    *pApiVersion = ANV_API_VERSION_1_3;
152 #endif
153    return VK_SUCCESS;
154 }
155 
156 static const struct vk_instance_extension_table instance_extensions = {
157    .KHR_device_group_creation                = true,
158    .KHR_external_fence_capabilities          = true,
159    .KHR_external_memory_capabilities         = true,
160    .KHR_external_semaphore_capabilities      = true,
161    .KHR_get_physical_device_properties2      = true,
162    .EXT_debug_report                         = true,
163    .EXT_debug_utils                          = true,
164 
165 #ifdef ANV_USE_WSI_PLATFORM
166    .KHR_get_surface_capabilities2            = true,
167    .KHR_surface                              = true,
168    .KHR_surface_protected_capabilities       = true,
169 #endif
170 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
171    .KHR_wayland_surface                      = true,
172 #endif
173 #ifdef VK_USE_PLATFORM_XCB_KHR
174    .KHR_xcb_surface                          = true,
175 #endif
176 #ifdef VK_USE_PLATFORM_XLIB_KHR
177    .KHR_xlib_surface                         = true,
178 #endif
179 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
180    .EXT_acquire_xlib_display                 = true,
181 #endif
182 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
183    .KHR_display                              = true,
184    .KHR_get_display_properties2              = true,
185    .EXT_direct_mode_display                  = true,
186    .EXT_display_surface_counter              = true,
187    .EXT_acquire_drm_display                  = true,
188 #endif
189 #ifndef VK_USE_PLATFORM_WIN32_KHR
190    .EXT_headless_surface                     = true,
191 #endif
192 };
193 
194 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)195 get_device_extensions(const struct anv_physical_device *device,
196                       struct vk_device_extension_table *ext)
197 {
198    const bool has_syncobj_wait =
199       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
200 
201    *ext = (struct vk_device_extension_table) {
202       .KHR_8bit_storage                      = device->info.ver >= 8,
203       .KHR_16bit_storage                     = device->info.ver >= 8 && !device->instance->no_16bit,
204       .KHR_bind_memory2                      = true,
205       .KHR_buffer_device_address             = device->has_a64_buffer_access,
206       .KHR_copy_commands2                    = true,
207       .KHR_create_renderpass2                = true,
208       .KHR_dedicated_allocation              = true,
209       .KHR_deferred_host_operations          = true,
210       .KHR_depth_stencil_resolve             = true,
211       .KHR_descriptor_update_template        = true,
212       .KHR_device_group                      = true,
213       .KHR_draw_indirect_count               = true,
214       .KHR_driver_properties                 = true,
215       .KHR_dynamic_rendering                 = true,
216       .KHR_external_fence                    = has_syncobj_wait,
217       .KHR_external_fence_fd                 = has_syncobj_wait,
218       .KHR_external_memory                   = true,
219       .KHR_external_memory_fd                = true,
220       .KHR_external_semaphore                = true,
221       .KHR_external_semaphore_fd             = true,
222       .KHR_format_feature_flags2             = true,
223       .KHR_get_memory_requirements2          = true,
224       .KHR_image_format_list                 = true,
225       .KHR_imageless_framebuffer             = true,
226 #ifdef ANV_USE_WSI_PLATFORM
227       .KHR_incremental_present               = true,
228 #endif
229       .KHR_maintenance1                      = true,
230       .KHR_maintenance2                      = true,
231       .KHR_maintenance3                      = true,
232       .KHR_maintenance4                      = true,
233       .KHR_multiview                         = true,
234       .KHR_performance_query =
235          !anv_use_relocations(device) && device->perf &&
236          (intel_perf_has_hold_preemption(device->perf) ||
237           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
238          device->use_call_secondary,
239       .KHR_pipeline_executable_properties    = true,
240       /* Hide these behind dri configs for now since we cannot implement it reliably on
241        * all surfaces yet. There is no surface capability query for present wait/id,
242        * but the feature is useful enough to hide behind an opt-in mechanism for now.
243        * If the instance only enables surface extensions that unconditionally support present wait,
244        * we can also expose the extension that way. */
245       .KHR_present_id =
246          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
247          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
248       .KHR_present_wait =
249          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
250          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
251       .KHR_push_descriptor                   = true,
252       .KHR_relaxed_block_layout              = true,
253       .KHR_sampler_mirror_clamp_to_edge      = true,
254       .KHR_sampler_ycbcr_conversion          = true,
255       .KHR_separate_depth_stencil_layouts    = true,
256       .KHR_shader_clock                      = true,
257       .KHR_shader_draw_parameters            = true,
258       .KHR_shader_expect_assume              = true,
259       .KHR_shader_float16_int8               = device->info.ver >= 8 && !device->instance->no_16bit,
260       .KHR_shader_float_controls             = true,
261       .KHR_shader_integer_dot_product        = true,
262       .KHR_shader_non_semantic_info          = true,
263       .KHR_shader_relaxed_extended_instruction = true,
264       .KHR_shader_subgroup_extended_types    = device->info.ver >= 8,
265       .KHR_shader_subgroup_uniform_control_flow = true,
266       .KHR_shader_terminate_invocation       = true,
267       .KHR_spirv_1_4                         = true,
268       .KHR_storage_buffer_storage_class      = true,
269 #ifdef ANV_USE_WSI_PLATFORM
270       .KHR_swapchain                         = true,
271       .KHR_swapchain_mutable_format          = true,
272 #endif
273       .KHR_synchronization2                  = true,
274       .KHR_timeline_semaphore                = true,
275       .KHR_uniform_buffer_standard_layout    = true,
276       .KHR_variable_pointers                 = true,
277       .KHR_vulkan_memory_model               = true,
278       .KHR_workgroup_memory_explicit_layout  = true,
279       .KHR_zero_initialize_workgroup_memory  = true,
280       .EXT_4444_formats                      = true,
281       .EXT_border_color_swizzle              = device->info.ver >= 8,
282       .EXT_buffer_device_address             = device->has_a64_buffer_access,
283       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
284       .EXT_color_write_enable                = true,
285       .EXT_conditional_rendering             = device->info.verx10 >= 75,
286       .EXT_custom_border_color               = device->info.ver >= 8,
287       .EXT_depth_clamp_zero_one              = true,
288       .EXT_depth_clip_control                = true,
289       .EXT_depth_clip_enable                 = true,
290 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
291       .EXT_display_control                   = true,
292 #endif
293       .EXT_extended_dynamic_state            = true,
294       .EXT_extended_dynamic_state2           = true,
295       .EXT_external_memory_dma_buf           = true,
296       .EXT_external_memory_host              = true,
297       .EXT_global_priority                   = device->max_context_priority >=
298                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
299       .EXT_global_priority_query             = device->max_context_priority >=
300                                                INTEL_CONTEXT_MEDIUM_PRIORITY,
301       .EXT_host_query_reset                  = true,
302       .EXT_image_2d_view_of_3d               = true,
303       .EXT_image_robustness                  = true,
304       .EXT_image_drm_format_modifier         = true,
305       .EXT_image_view_min_lod                = true,
306       .EXT_index_type_uint8                  = true,
307       .EXT_inline_uniform_block              = true,
308       .EXT_line_rasterization                = true,
309       /* Enable the extension only if we have support on both the local &
310        * system memory
311        */
312       .EXT_memory_budget                     = device->sys.available,
313       .EXT_non_seamless_cube_map             = true,
314       .EXT_pci_bus_info                      = true,
315       .EXT_physical_device_drm               = true,
316       .EXT_pipeline_creation_cache_control   = true,
317       .EXT_pipeline_creation_feedback        = true,
318       .EXT_primitives_generated_query        = true,
319       .EXT_primitive_topology_list_restart   = true,
320       .EXT_private_data                      = true,
321       .EXT_provoking_vertex                  = true,
322       .EXT_queue_family_foreign              = true,
323       .EXT_robustness2                       = true,
324       .EXT_sample_locations                  = true,
325       .EXT_scalar_block_layout               = true,
326       .EXT_separate_stencil_usage            = true,
327       .EXT_shader_atomic_float               = true,
328       .EXT_shader_demote_to_helper_invocation = true,
329       .EXT_shader_module_identifier          = true,
330       .EXT_shader_replicated_composites      = true,
331       .EXT_shader_subgroup_ballot            = true,
332       .EXT_shader_subgroup_vote              = true,
333       .EXT_shader_viewport_index_layer       = true,
334       .EXT_subgroup_size_control             = true,
335       .EXT_texel_buffer_alignment            = true,
336       .EXT_tooling_info                      = true,
337       .EXT_transform_feedback                = true,
338       .EXT_vertex_attribute_divisor          = true,
339       .EXT_ycbcr_image_arrays                = true,
340 #if DETECT_OS_ANDROID
341       .ANDROID_external_memory_android_hardware_buffer = true,
342       .ANDROID_native_buffer                 = true,
343 #endif
344       .GOOGLE_decorate_string                = true,
345       .GOOGLE_hlsl_functionality1            = true,
346       .GOOGLE_user_type                      = true,
347       .INTEL_performance_query               = device->perf &&
348                                                intel_perf_has_hold_preemption(device->perf),
349       .INTEL_shader_integer_functions2       = device->info.ver >= 8,
350       .EXT_multi_draw                        = true,
351       .NV_compute_shader_derivatives         = true,
352       .VALVE_mutable_descriptor_type         = true,
353    };
354 }
355 
356 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)357 get_features(const struct anv_physical_device *pdevice,
358              struct vk_features *features)
359 {
360    /* Just pick one; they're all the same */
361    const bool has_astc_ldr =
362       isl_format_supports_sampling(&pdevice->info,
363                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
364 
365    *features = (struct vk_features) {
366       /* Vulkan 1.0 */
367       .robustBufferAccess                       = true,
368       .fullDrawIndexUint32                      = true,
369       .imageCubeArray                           = true,
370       .independentBlend                         = true,
371       .geometryShader                           = true,
372       .tessellationShader                       = true,
373       .sampleRateShading                        = true,
374       .dualSrcBlend                             = true,
375       .logicOp                                  = true,
376       .multiDrawIndirect                        = true,
377       .drawIndirectFirstInstance                = true,
378       .depthClamp                               = true,
379       .depthBiasClamp                           = true,
380       .fillModeNonSolid                         = true,
381       .depthBounds                              = pdevice->info.ver >= 12,
382       .wideLines                                = true,
383       .largePoints                              = true,
384       .alphaToOne                               = true,
385       .multiViewport                            = true,
386       .samplerAnisotropy                        = true,
387       .textureCompressionETC2                   = pdevice->info.ver >= 8 ||
388                                                   pdevice->info.platform == INTEL_PLATFORM_BYT,
389       .textureCompressionASTC_LDR               = has_astc_ldr,
390       .textureCompressionBC                     = true,
391       .occlusionQueryPrecise                    = true,
392       .pipelineStatisticsQuery                  = true,
393       .fragmentStoresAndAtomics                 = true,
394       .shaderTessellationAndGeometryPointSize   = true,
395       .shaderImageGatherExtended                = true,
396       .shaderStorageImageExtendedFormats        = true,
397       .shaderStorageImageMultisample            = false,
398       .shaderStorageImageReadWithoutFormat      = false,
399       .shaderStorageImageWriteWithoutFormat     = true,
400       .shaderUniformBufferArrayDynamicIndexing  = true,
401       .shaderSampledImageArrayDynamicIndexing   = true,
402       .shaderStorageBufferArrayDynamicIndexing  = true,
403       .shaderStorageImageArrayDynamicIndexing   = true,
404       .shaderClipDistance                       = true,
405       .shaderCullDistance                       = true,
406       .shaderFloat64                            = pdevice->info.ver >= 8 &&
407                                                   pdevice->info.has_64bit_float,
408       .shaderInt64                              = pdevice->info.ver >= 8,
409       .shaderInt16                              = pdevice->info.ver >= 8,
410       .shaderResourceMinLod                     = false,
411       .variableMultisampleRate                  = true,
412       .inheritedQueries                         = true,
413 
414       /* Vulkan 1.1 */
415       .storageBuffer16BitAccess            = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
416       .uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
417       .storagePushConstant16               = pdevice->info.ver >= 8,
418       .storageInputOutput16                = false,
419       .multiview                           = true,
420       .multiviewGeometryShader             = true,
421       .multiviewTessellationShader         = true,
422       .variablePointersStorageBuffer       = true,
423       .variablePointers                    = true,
424       .protectedMemory                     = false,
425       .samplerYcbcrConversion              = true,
426       .shaderDrawParameters                = true,
427 
428       /* Vulkan 1.2 */
429       .samplerMirrorClampToEdge            = true,
430       .drawIndirectCount                   = true,
431       .storageBuffer8BitAccess             = pdevice->info.ver >= 8,
432       .uniformAndStorageBuffer8BitAccess   = pdevice->info.ver >= 8,
433       .storagePushConstant8                = pdevice->info.ver >= 8,
434       .shaderBufferInt64Atomics            = false,
435       .shaderSharedInt64Atomics            = false,
436       .shaderFloat16                       = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
437       .shaderInt8                          = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
438 
439       .descriptorIndexing                                 = false,
440       .shaderInputAttachmentArrayDynamicIndexing          = false,
441       .shaderUniformTexelBufferArrayDynamicIndexing       = false,
442       .shaderStorageTexelBufferArrayDynamicIndexing       = false,
443       .shaderUniformBufferArrayNonUniformIndexing         = false,
444       .shaderSampledImageArrayNonUniformIndexing          = false,
445       .shaderStorageBufferArrayNonUniformIndexing         = false,
446       .shaderStorageImageArrayNonUniformIndexing          = false,
447       .shaderInputAttachmentArrayNonUniformIndexing       = false,
448       .shaderUniformTexelBufferArrayNonUniformIndexing    = false,
449       .shaderStorageTexelBufferArrayNonUniformIndexing    = false,
450       .descriptorBindingUniformBufferUpdateAfterBind      = false,
451       .descriptorBindingSampledImageUpdateAfterBind       = false,
452       .descriptorBindingStorageImageUpdateAfterBind       = false,
453       .descriptorBindingStorageBufferUpdateAfterBind      = false,
454       .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
455       .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
456       .descriptorBindingUpdateUnusedWhilePending          = false,
457       .descriptorBindingPartiallyBound                    = false,
458       .descriptorBindingVariableDescriptorCount           = false,
459       .runtimeDescriptorArray                             = false,
460 
461       .samplerFilterMinmax                 = false,
462       .scalarBlockLayout                   = true,
463       .imagelessFramebuffer                = true,
464       .uniformBufferStandardLayout         = true,
465       .shaderSubgroupExtendedTypes         = true,
466       .separateDepthStencilLayouts         = true,
467       .hostQueryReset                      = true,
468       .timelineSemaphore                   = true,
469       .bufferDeviceAddress                 = pdevice->has_a64_buffer_access,
470       .bufferDeviceAddressCaptureReplay    = pdevice->has_a64_buffer_access,
471       .bufferDeviceAddressMultiDevice      = false,
472       .vulkanMemoryModel                   = true,
473       .vulkanMemoryModelDeviceScope        = true,
474       .vulkanMemoryModelAvailabilityVisibilityChains = true,
475       .shaderOutputViewportIndex           = true,
476       .shaderOutputLayer                   = true,
477       .subgroupBroadcastDynamicId          = true,
478 
479       /* Vulkan 1.3 */
480       .robustImageAccess = true,
481       .inlineUniformBlock = true,
482       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
483       .pipelineCreationCacheControl = true,
484       .privateData = true,
485       .shaderDemoteToHelperInvocation = true,
486       .shaderTerminateInvocation = true,
487       .subgroupSizeControl = true,
488       .computeFullSubgroups = true,
489       .synchronization2 = true,
490       .textureCompressionASTC_HDR = false,
491       .shaderZeroInitializeWorkgroupMemory = true,
492       .dynamicRendering = true,
493       .shaderIntegerDotProduct = true,
494       .maintenance4 = true,
495 
496       /* VK_EXT_4444_formats */
497       .formatA4R4G4B4 = true,
498       .formatA4B4G4R4 = false,
499 
500       /* VK_EXT_border_color_swizzle */
501       .borderColorSwizzle = true,
502       .borderColorSwizzleFromImage = true,
503 
504       /* VK_EXT_color_write_enable */
505       .colorWriteEnable = true,
506 
507       /* VK_EXT_image_2d_view_of_3d */
508       .image2DViewOf3D = true,
509       .sampler2DViewOf3D = false,
510 
511       /* VK_NV_compute_shader_derivatives */
512       .computeDerivativeGroupQuads = true,
513       .computeDerivativeGroupLinear = true,
514 
515       /* VK_EXT_conditional_rendering */
516       .conditionalRendering = pdevice->info.verx10 >= 75,
517       .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
518 
519       /* VK_EXT_custom_border_color */
520       .customBorderColors = pdevice->info.ver >= 8,
521       .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
522 
523       /* VK_EXT_depth_clamp_zero_one */
524       .depthClampZeroOne = true,
525 
526       /* VK_EXT_depth_clip_enable */
527       .depthClipEnable = true,
528 
529       /* VK_KHR_global_priority */
530       .globalPriorityQuery = true,
531 
532       /* VK_EXT_image_view_min_lod */
533       .minLod = true,
534 
535       /* VK_EXT_index_type_uint8 */
536       .indexTypeUint8 = true,
537 
538       /* VK_EXT_line_rasterization */
539       /* Rectangular lines must use the strict algorithm, which is not
540        * supported for wide lines prior to ICL.  See rasterization_mode for
541        * details and how the HW states are programmed.
542        */
543       .rectangularLines = false,
544       .bresenhamLines = true,
545       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
546        * BSpec section "Multisample ModesState" table for "AA Line Support
547        * Requirements":
548        *
549        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
550        *
551        * Fortunately, this isn't a case most people care about.
552        */
553       .smoothLines = pdevice->info.ver < 10,
554       .stippledRectangularLines = false,
555       .stippledBresenhamLines = true,
556       .stippledSmoothLines = false,
557 
558       /* VK_EXT_mutable_descriptor_type */
559       .mutableDescriptorType = true,
560 
561       /* VK_KHR_performance_query */
562       .performanceCounterQueryPools = true,
563       /* HW only supports a single configuration at a time. */
564       .performanceCounterMultipleQueryPools = false,
565 
566       /* VK_KHR_pipeline_executable_properties */
567       .pipelineExecutableInfo = true,
568 
569       /* VK_EXT_primitives_generated_query */
570       .primitivesGeneratedQuery = true,
571       .primitivesGeneratedQueryWithRasterizerDiscard = false,
572       .primitivesGeneratedQueryWithNonZeroStreams = false,
573 
574       /* VK_EXT_provoking_vertex */
575       .provokingVertexLast = true,
576       .transformFeedbackPreservesProvokingVertex = true,
577 
578       /* VK_EXT_robustness2 */
579       .robustBufferAccess2 = true,
580       .robustImageAccess2 = true,
581       .nullDescriptor = true,
582 
583       /* VK_EXT_shader_atomic_float */
584       .shaderBufferFloat32Atomics =    true,
585       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
586       .shaderBufferFloat64Atomics =
587          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
588       .shaderBufferFloat64AtomicAdd =  false,
589       .shaderSharedFloat32Atomics =    true,
590       .shaderSharedFloat32AtomicAdd =  false,
591       .shaderSharedFloat64Atomics =    false,
592       .shaderSharedFloat64AtomicAdd =  false,
593       .shaderImageFloat32Atomics =     true,
594       .shaderImageFloat32AtomicAdd =   false,
595       .sparseImageFloat32Atomics =     false,
596       .sparseImageFloat32AtomicAdd =   false,
597 
598       /* VK_KHR_shader_clock */
599       .shaderSubgroupClock = true,
600       .shaderDeviceClock = false,
601 
602       /* VK_INTEL_shader_integer_functions2 */
603       .shaderIntegerFunctions2 = true,
604 
605       /* VK_EXT_shader_module_identifier */
606       .shaderModuleIdentifier = true,
607 
608       /* VK_EXT_shader_replicated_composites */
609       .shaderReplicatedComposites = true,
610 
611       /* VK_KHR_shader_subgroup_uniform_control_flow */
612       .shaderSubgroupUniformControlFlow = true,
613 
614       /* VK_EXT_texel_buffer_alignment */
615       .texelBufferAlignment = true,
616 
617       /* VK_EXT_transform_feedback */
618       .transformFeedback = true,
619       .geometryStreams = true,
620 
621       /* VK_EXT_vertex_attribute_divisor */
622       .vertexAttributeInstanceRateDivisor = true,
623       .vertexAttributeInstanceRateZeroDivisor = true,
624 
625       /* VK_KHR_workgroup_memory_explicit_layout */
626       .workgroupMemoryExplicitLayout = true,
627       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
628       .workgroupMemoryExplicitLayout8BitAccess = true,
629       .workgroupMemoryExplicitLayout16BitAccess = true,
630 
631       /* VK_EXT_ycbcr_image_arrays */
632       .ycbcrImageArrays = true,
633 
634       /* VK_EXT_extended_dynamic_state */
635       .extendedDynamicState = true,
636 
637       /* VK_EXT_extended_dynamic_state2 */
638       .extendedDynamicState2 = true,
639       .extendedDynamicState2LogicOp = true,
640       .extendedDynamicState2PatchControlPoints = false,
641 
642       /* VK_EXT_multi_draw */
643       .multiDraw = true,
644 
645       /* VK_EXT_non_seamless_cube_map */
646       .nonSeamlessCubeMap = true,
647 
648       /* VK_EXT_primitive_topology_list_restart */
649       .primitiveTopologyListRestart = true,
650       .primitiveTopologyPatchListRestart = true,
651 
652       /* VK_EXT_depth_clip_control */
653       .depthClipControl = true,
654 
655       /* VK_KHR_present_id */
656       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
657 
658       /* VK_KHR_present_wait */
659       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
660 
661       /* VK_KHR_shader_expect_assume */
662       .shaderExpectAssume = true,
663 
664       /* VK_KHR_shader_relaxed_extended_instruction */
665       .shaderRelaxedExtendedInstruction = true,
666    };
667 
668    /* We can't do image stores in vec4 shaders */
669    features->vertexPipelineStoresAndAtomics =
670       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
671       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
672 
673    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
674 
675    /* The new DOOM and Wolfenstein games require depthBounds without
676     * checking for it.  They seem to run fine without it so just claim it's
677     * there and accept the consequences.
678     */
679    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
680       features->depthBounds = true;
681 }
682 
683 
684 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
685 
686 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
687 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
688 
689 #define MAX_CUSTOM_BORDER_COLORS                   4096
690 
691 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)692 get_properties_1_1(const struct anv_physical_device *pdevice,
693                    struct vk_properties *p)
694 {
695    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
696    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
697    memset(p->deviceLUID, 0, VK_LUID_SIZE);
698    p->deviceNodeMask = 0;
699    p->deviceLUIDValid = false;
700 
701    p->subgroupSize = ELK_SUBGROUP_SIZE;
702    VkShaderStageFlags scalar_stages = 0;
703    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
704       if (pdevice->compiler->scalar_stage[stage])
705          scalar_stages |= mesa_to_vk_shader_stage(stage);
706    }
707    p->subgroupSupportedStages = scalar_stages;
708    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
709                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
710                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
711                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
712                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
713                                     VK_SUBGROUP_FEATURE_QUAD_BIT;
714    if (pdevice->info.ver >= 8) {
715       /* TODO: There's no technical reason why these can't be made to
716        * work on gfx7 but they don't at the moment so it's best to leave
717        * the feature disabled than enabled and broken.
718        */
719       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
720                                         VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
721    }
722    p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
723 
724    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
725    p->maxMultiviewViewCount      = 16;
726    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
727    p->protectedNoFault           = false;
728    /* This value doesn't matter for us today as our per-stage descriptors are
729     * the real limit.
730     */
731    p->maxPerSetDescriptors       = 1024;
732    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
733 }
734 
735 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)736 get_properties_1_2(const struct anv_physical_device *pdevice,
737                    struct vk_properties *p)
738 {
739    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
740    memset(p->driverName, 0, sizeof(p->driverName));
741    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
742             "Intel open-source Mesa driver");
743    memset(p->driverInfo, 0, sizeof(p->driverInfo));
744    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
745             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
746 
747    /* Don't advertise conformance with a particular version if the hardware's
748     * support is incomplete/alpha.
749     */
750    if (pdevice->is_alpha) {
751       p->conformanceVersion = (VkConformanceVersion) {
752          .major = 0,
753          .minor = 0,
754          .subminor = 0,
755          .patch = 0,
756       };
757    }
758    else {
759       p->conformanceVersion = (VkConformanceVersion) {
760          .major = 1,
761          .minor = pdevice->use_softpin ? 3 : 2,
762          .subminor = 0,
763          .patch = 0,
764       };
765    }
766 
767    p->denormBehaviorIndependence =
768       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
769    p->roundingModeIndependence =
770       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
771 
772    /* Broadwell does not support HF denorms and there are restrictions
773     * other gens. According to Kabylake's PRM:
774     *
775     * "math - Extended Math Function
776     * [...]
777     * Restriction : Half-float denorms are always retained."
778     */
779    p->shaderDenormFlushToZeroFloat16         = false;
780    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
781    p->shaderRoundingModeRTEFloat16           = true;
782    p->shaderRoundingModeRTZFloat16           = true;
783    p->shaderSignedZeroInfNanPreserveFloat16  = true;
784 
785    p->shaderDenormFlushToZeroFloat32         = true;
786    p->shaderDenormPreserveFloat32            = pdevice->info.ver >= 8;
787    p->shaderRoundingModeRTEFloat32           = true;
788    p->shaderRoundingModeRTZFloat32           = true;
789    p->shaderSignedZeroInfNanPreserveFloat32  = true;
790 
791    p->shaderDenormFlushToZeroFloat64         = true;
792    p->shaderDenormPreserveFloat64            = true;
793    p->shaderRoundingModeRTEFloat64           = true;
794    p->shaderRoundingModeRTZFloat64           = true;
795    p->shaderSignedZeroInfNanPreserveFloat64  = true;
796 
797    /* It's a bit hard to exactly map our implementation to the limits
798     * described by Vulkan.  The bindless surface handle in the extended
799     * message descriptors is 20 bits and it's an index into the table of
800     * RENDER_SURFACE_STATE structs that starts at bindless surface base
801     * address.  This means that we can have at must 1M surface states
802     * allocated at any given time.  Since most image views take two
803     * descriptors, this means we have a limit of about 500K image views.
804     *
805     * However, since we allocate surface states at vkCreateImageView time,
806     * this means our limit is actually something on the order of 500K image
807     * views allocated at any time.  The actual limit describe by Vulkan, on
808     * the other hand, is a limit of how many you can have in a descriptor set.
809     * Assuming anyone using 1M descriptors will be using the same image view
810     * twice a bunch of times (or a bunch of null descriptors), we can safely
811     * advertise a larger limit here.
812     */
813    const unsigned max_bindless_views = 1 << 20;
814    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
815    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
816    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
817    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
818    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
819    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
820    p->robustBufferAccessUpdateAfterBind                  = true;
821    p->quadDivergentImplicitLod                           = false;
822    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
823    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
824    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
825    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
826    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
827    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
828    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
829    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
830    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
831    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
832    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
833    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
834    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
835    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
836    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
837 
838    /* We support all of the depth resolve modes */
839    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
840                                       VK_RESOLVE_MODE_AVERAGE_BIT |
841                                       VK_RESOLVE_MODE_MIN_BIT |
842                                       VK_RESOLVE_MODE_MAX_BIT;
843    /* Average doesn't make sense for stencil so we don't support that */
844    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
845    if (pdevice->info.ver >= 8) {
846       /* The advanced stencil resolve modes currently require stencil
847        * sampling be supported by the hardware.
848        */
849       p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
850                                          VK_RESOLVE_MODE_MAX_BIT;
851    }
852    p->independentResolveNone  = true;
853    p->independentResolve      = true;
854 
855    p->filterMinmaxSingleComponentFormats  = false;
856    p->filterMinmaxImageComponentMapping   = false;
857 
858    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
859 
860    p->framebufferIntegerColorSampleCounts =
861       pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
862 }
863 
864 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)865 get_properties_1_3(const struct anv_physical_device *pdevice,
866                    struct vk_properties *p)
867 {
868    p->minSubgroupSize = 8;
869    p->maxSubgroupSize = 32;
870    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
871    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
872 
873    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
874    p->maxPerStageDescriptorInlineUniformBlocks =
875       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
876    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
877       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
878    p->maxDescriptorSetInlineUniformBlocks =
879       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
880    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
881       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
882    p->maxInlineUniformTotalSize = UINT16_MAX;
883 
884    p->integerDotProduct8BitUnsignedAccelerated = false;
885    p->integerDotProduct8BitSignedAccelerated = false;
886    p->integerDotProduct8BitMixedSignednessAccelerated = false;
887    p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
888    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
889    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
890    p->integerDotProduct16BitUnsignedAccelerated = false;
891    p->integerDotProduct16BitSignedAccelerated = false;
892    p->integerDotProduct16BitMixedSignednessAccelerated = false;
893    p->integerDotProduct32BitUnsignedAccelerated = false;
894    p->integerDotProduct32BitSignedAccelerated = false;
895    p->integerDotProduct32BitMixedSignednessAccelerated = false;
896    p->integerDotProduct64BitUnsignedAccelerated = false;
897    p->integerDotProduct64BitSignedAccelerated = false;
898    p->integerDotProduct64BitMixedSignednessAccelerated = false;
899    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
900    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
901    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
902    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
903    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
904    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
905    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
906    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
907    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
908    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
909    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
910    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
911    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
912    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
913    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
914 
915    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
916     * Base Address:
917     *
918     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
919     *    specifies the base address of the first element of the surface,
920     *    computed in software by adding the surface base address to the
921     *    byte offset of the element in the buffer. The base address must
922     *    be aligned to element size."
923     *
924     * The typed dataport messages require that things be texel aligned.
925     * Otherwise, we may just load/store the wrong data or, in the worst
926     * case, there may be hangs.
927     */
928    p->storageTexelBufferOffsetAlignmentBytes = 16;
929    p->storageTexelBufferOffsetSingleTexelAlignment = true;
930 
931    /* The sampler, however, is much more forgiving and it can handle
932     * arbitrary byte alignment for linear and buffer surfaces.  It's
933     * hard to find a good PRM citation for this but years of empirical
934     * experience demonstrate that this is true.
935     */
936    p->uniformTexelBufferOffsetAlignmentBytes = 1;
937    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
938 
939    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
940 }
941 
942 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)943 get_properties(const struct anv_physical_device *pdevice,
944                struct vk_properties *props)
945 {
946    const struct intel_device_info *devinfo = &pdevice->info;
947 
948    const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
949    const uint32_t max_textures = 128;
950    const uint32_t max_samplers =
951       pdevice->has_bindless_samplers ? UINT16_MAX :
952       (devinfo->verx10 >= 75) ? 128 : 16;
953    const uint32_t max_images = MAX_IMAGES;
954 
955    /* If we can use bindless for everything, claim a high per-stage limit,
956     * otherwise use the binding table size, minus the slots reserved for
957     * render targets and one slot for the descriptor buffer. */
958    const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
959 
960    const uint32_t max_workgroup_size =
961       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
962 
963    VkSampleCountFlags sample_counts =
964       isl_device_get_sample_counts(&pdevice->isl_dev);
965 
966    *props = (struct vk_properties) {
967 #if DETECT_OS_ANDROID
968       .apiVersion = ANV_API_VERSION,
969 #else
970       .apiVersion = pdevice->use_softpin ? ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
971 #endif /* DETECT_OS_ANDROID */
972       .driverVersion = vk_get_driver_version(),
973       .vendorID = 0x8086,
974       .deviceID = pdevice->info.pci_device_id,
975       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
976 
977       /* Limits: */
978       .maxImageDimension1D                      = (1 << 14),
979       /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
980        * is greater than 8192 pixels. */
981       .maxImageDimension2D                      = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
982       .maxImageDimension3D                      = (1 << 11),
983       .maxImageDimensionCube                    = (1 << 14),
984       .maxImageArrayLayers                      = (1 << 11),
985       .maxTexelBufferElements                   = 128 * 1024 * 1024,
986       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
987       .maxStorageBufferRange                    = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
988       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
989       .maxMemoryAllocationCount                 = UINT32_MAX,
990       .maxSamplerAllocationCount                = 64 * 1024,
991       .bufferImageGranularity                   = 1,
992       .sparseAddressSpaceSize                   = 0,
993       .maxBoundDescriptorSets                   = MAX_SETS,
994       .maxPerStageDescriptorSamplers            = max_samplers,
995       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
996       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
997       .maxPerStageDescriptorSampledImages       = max_textures,
998       .maxPerStageDescriptorStorageImages       = max_images,
999       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1000       .maxPerStageResources                     = max_per_stage,
1001       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1002       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1003       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1004       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1005       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1006       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1007       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1008       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1009       .maxVertexInputAttributes                 = MAX_VES,
1010       .maxVertexInputBindings                   = MAX_VBS,
1011       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1012        *
1013        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1014        */
1015       .maxVertexInputAttributeOffset            = 2047,
1016       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1017        *
1018        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1019        *
1020        * Skylake PRMs: Volume 2d: Command Reference: Structures:
1021        *
1022        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1023        */
1024       .maxVertexInputBindingStride              = devinfo->ver < 9 ? 2048 : 4095,
1025       .maxVertexOutputComponents                = 128,
1026       .maxTessellationGenerationLevel           = 64,
1027       .maxTessellationPatchSize                 = 32,
1028       .maxTessellationControlPerVertexInputComponents = 128,
1029       .maxTessellationControlPerVertexOutputComponents = 128,
1030       .maxTessellationControlPerPatchOutputComponents = 128,
1031       .maxTessellationControlTotalOutputComponents = 2048,
1032       .maxTessellationEvaluationInputComponents = 128,
1033       .maxTessellationEvaluationOutputComponents = 128,
1034       .maxGeometryShaderInvocations             = 32,
1035       .maxGeometryInputComponents               = devinfo->ver >= 8 ? 128 : 64,
1036       .maxGeometryOutputComponents              = 128,
1037       .maxGeometryOutputVertices                = 256,
1038       .maxGeometryTotalOutputComponents         = 1024,
1039       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1040       .maxFragmentOutputAttachments             = 8,
1041       .maxFragmentDualSrcAttachments            = 1,
1042       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1043       .maxComputeSharedMemorySize               = 64 * 1024,
1044       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1045       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1046       .maxComputeWorkGroupSize = {
1047          max_workgroup_size,
1048          max_workgroup_size,
1049          max_workgroup_size,
1050       },
1051       .subPixelPrecisionBits                    = 8,
1052       .subTexelPrecisionBits                    = 8,
1053       .mipmapPrecisionBits                      = 8,
1054       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1055       .maxDrawIndirectCount                     = UINT32_MAX,
1056       .maxSamplerLodBias                        = 16,
1057       .maxSamplerAnisotropy                     = 16,
1058       .maxViewports                             = MAX_VIEWPORTS,
1059       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1060       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1061       .viewportSubPixelBits                     = 13, /* We take a float? */
1062       .minMemoryMapAlignment                    = 4096, /* A page */
1063       /* The dataport requires texel alignment so we need to assume a worst
1064        * case of R32G32B32A32 which is 16 bytes.
1065        */
1066       .minTexelBufferOffsetAlignment            = 16,
1067       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1068       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1069       .minTexelOffset                           = -8,
1070       .maxTexelOffset                           = 7,
1071       .minTexelGatherOffset                     = -32,
1072       .maxTexelGatherOffset                     = 31,
1073       .minInterpolationOffset                   = -0.5,
1074       .maxInterpolationOffset                   = 0.4375,
1075       .subPixelInterpolationOffsetBits          = 4,
1076       .maxFramebufferWidth                      = (1 << 14),
1077       .maxFramebufferHeight                     = (1 << 14),
1078       .maxFramebufferLayers                     = (1 << 11),
1079       .framebufferColorSampleCounts             = sample_counts,
1080       .framebufferDepthSampleCounts             = sample_counts,
1081       .framebufferStencilSampleCounts           = sample_counts,
1082       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1083       .maxColorAttachments                      = MAX_RTS,
1084       .sampledImageColorSampleCounts            = sample_counts,
1085       /* Multisampling with SINT formats is not supported on gfx7 */
1086       .sampledImageIntegerSampleCounts          = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1087       .sampledImageDepthSampleCounts            = sample_counts,
1088       .sampledImageStencilSampleCounts          = sample_counts,
1089       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1090       .maxSampleMaskWords                       = 1,
1091       .timestampComputeAndGraphics              = true,
1092       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1093       .maxClipDistances                         = 8,
1094       .maxCullDistances                         = 8,
1095       .maxCombinedClipAndCullDistances          = 8,
1096       .discreteQueuePriorities                  = 2,
1097       .pointSizeRange                           = { 0.125, 255.875 },
1098       /* While SKL and up support much wider lines than we are setting here,
1099        * in practice we run into conformance issues if we go past this limit.
1100        * Since the Windows driver does the same, it's probably fair to assume
1101        * that no one needs more than this.
1102        */
1103       .lineWidthRange                           = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1104       .pointSizeGranularity                     = (1.0 / 8.0),
1105       .lineWidthGranularity                     = (1.0 / 128.0),
1106       .strictLines                              = false,
1107       .standardSampleLocations                  = true,
1108       .optimalBufferCopyOffsetAlignment         = 128,
1109       .optimalBufferCopyRowPitchAlignment       = 128,
1110       .nonCoherentAtomSize                      = 64,
1111 
1112       /* Broadwell doesn't do sparse. */
1113       .sparseResidencyStandard2DBlockShape = false,
1114       .sparseResidencyStandard2DMultisampleBlockShape = false,
1115       .sparseResidencyStandard3DBlockShape = false,
1116       .sparseResidencyAlignedMipSize = false,
1117       .sparseResidencyNonResidentStrict = false,
1118    };
1119 
1120    snprintf(props->deviceName, sizeof(props->deviceName),
1121             "%s", pdevice->info.name);
1122    memcpy(props->pipelineCacheUUID,
1123           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1124 
1125    get_properties_1_1(pdevice, props);
1126    get_properties_1_2(pdevice, props);
1127    get_properties_1_3(pdevice, props);
1128 
1129    /* VK_KHR_performance_query */
1130    {
1131       /* We could support this by spawning a shader to do the equation normalization. */
1132       props->allowCommandBufferQueryCopies = false;
1133    }
1134 
1135    /* VK_KHR_push_descriptor */
1136    {
1137       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1138    }
1139 
1140    /* VK_KHR_vertex_attribute_divisor */
1141    {
1142       /* We have to restrict this a bit for multiview */
1143       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1144    }
1145 
1146    /* VK_EXT_custom_border_color */
1147    {
1148       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1149    }
1150 
1151    /* VK_EXT_external_memory_host */
1152    {
1153       /* Userptr needs page aligned memory. */
1154       props->minImportedHostPointerAlignment = 4096;
1155    }
1156 
1157    /* VK_EXT_line_rasterization */
1158    {
1159       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1160        * Rules - Legacy Mode", it says the following:
1161        *
1162        *    "Note that the device divides a pixel into a 16x16 array of
1163        *     subpixels, referenced by their upper left corners."
1164        *
1165        * This is the only known reference in the PRMs to the subpixel
1166        * precision of line rasterization and a "16x16 array of subpixels"
1167        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1168        * subpixel precision bits applies to all line rasterization types.
1169        */
1170       props->lineSubPixelPrecisionBits = 4;
1171    }
1172 
1173    /* VK_EXT_multi_draw */
1174    {
1175       props->maxMultiDrawCount = 2048;
1176    }
1177 
1178    /* VK_EXT_pci_bus_info */
1179    {
1180       props->pciDomain = pdevice->info.pci_domain;
1181       props->pciBus = pdevice->info.pci_bus;
1182       props->pciDevice = pdevice->info.pci_dev;
1183       props->pciFunction = pdevice->info.pci_func;
1184    }
1185 
1186    /* VK_EXT_physical_device_drm */
1187    {
1188       props->drmHasPrimary = pdevice->has_master;
1189       props->drmPrimaryMajor = pdevice->master_major;
1190       props->drmPrimaryMinor = pdevice->master_minor;
1191       props->drmHasRender = pdevice->has_local;
1192       props->drmRenderMajor = pdevice->local_major;
1193       props->drmRenderMinor = pdevice->local_minor;
1194    }
1195 
1196    /* VK_EXT_provoking_vertex */
1197    {
1198       props->provokingVertexModePerPipeline = true;
1199       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1200    }
1201 
1202    /* VK_EXT_robustness2 */
1203    {
1204       props->robustStorageBufferAccessSizeAlignment =
1205          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1206       props->robustUniformBufferAccessSizeAlignment =
1207          ANV_UBO_ALIGNMENT;
1208    }
1209 
1210    /* VK_EXT_sample_locations */
1211    {
1212       props->sampleLocationSampleCounts =
1213          isl_device_get_sample_counts(&pdevice->isl_dev);
1214 
1215       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1216       props->maxSampleLocationGridSize.width = 1;
1217       props->maxSampleLocationGridSize.height = 1;
1218 
1219       props->sampleLocationCoordinateRange[0] = 0;
1220       props->sampleLocationCoordinateRange[1] = 0.9375;
1221       props->sampleLocationSubPixelBits = 4;
1222 
1223       props->variableSampleLocations = true;
1224    }
1225 
1226    /* VK_EXT_shader_module_identifier */
1227    {
1228       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1229                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1230       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1231              vk_shaderModuleIdentifierAlgorithmUUID,
1232              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1233    }
1234 
1235    /* VK_EXT_transform_feedback */
1236    {
1237       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1238       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1239       props->maxTransformFeedbackBufferSize = (1ull << 32);
1240       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1241       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1242       props->maxTransformFeedbackBufferDataStride = 2048;
1243       props->transformFeedbackQueries = true;
1244       props->transformFeedbackStreamsLinesTriangles = false;
1245       props->transformFeedbackRasterizationStreamSelect = false;
1246       /* This requires MI_MATH */
1247       props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
1248    }
1249 
1250    /* VK_ANDROID_native_buffer */
1251 #if DETECT_OS_ANDROID
1252    {
1253       props->sharedImage = VK_FALSE;
1254    }
1255 #endif /* DETECT_OS_ANDROID */
1256 
1257 }
1258 
1259 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)1260 anv_compute_sys_heap_size(struct anv_physical_device *device,
1261                           uint64_t available_ram)
1262 {
1263    /* We want to leave some padding for things we allocate in the driver,
1264     * so don't go over 3/4 of the GTT either.
1265     */
1266    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
1267 
1268    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
1269       /* When running with an overridden PCI ID, we may get a GTT size from
1270        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
1271        * address support can still fail.  Just clamp the address space size to
1272        * 2 GiB if we don't have 48-bit support.
1273        */
1274       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
1275                 "not support for 48-bit addresses",
1276                 __FILE__, __LINE__);
1277       available_ram = 2ull << 30;
1278    }
1279 
1280    return available_ram;
1281 }
1282 
1283 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1284 anv_init_meminfo(struct anv_physical_device *device, int fd)
1285 {
1286    const struct intel_device_info *devinfo = &device->info;
1287 
1288    device->sys.size =
1289       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
1290    device->sys.available = devinfo->mem.sram.mappable.free;
1291 
1292    return VK_SUCCESS;
1293 }
1294 
1295 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1296 anv_update_meminfo(struct anv_physical_device *device, int fd)
1297 {
1298    if (!intel_device_info_update_memory_info(&device->info, fd))
1299       return;
1300 
1301    const struct intel_device_info *devinfo = &device->info;
1302    device->sys.available = devinfo->mem.sram.mappable.free;
1303 }
1304 
1305 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1306 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1307 {
1308    VkResult result = anv_init_meminfo(device, fd);
1309    if (result != VK_SUCCESS)
1310       return result;
1311 
1312    assert(device->sys.size != 0);
1313 
1314    if (device->info.has_llc) {
1315       device->memory.heap_count = 1;
1316       device->memory.heaps[0] = (struct anv_memory_heap) {
1317          .size = device->sys.size,
1318          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1319       };
1320 
1321       /* Big core GPUs share LLC with the CPU and thus one memory type can be
1322        * both cached and coherent at the same time.
1323        *
1324        * But some game engines can't handle single type well
1325        * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
1326        *
1327        * And Intel on Windows uses 3 types so it's better to add extra one here
1328        */
1329       device->memory.type_count = 2;
1330       device->memory.types[0] = (struct anv_memory_type) {
1331           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1332           .heapIndex = 0,
1333       };
1334       device->memory.types[1] = (struct anv_memory_type) {
1335           .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1336                            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1337                            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1338                            VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1339           .heapIndex = 0,
1340       };
1341    } else {
1342       device->memory.heap_count = 1;
1343       device->memory.heaps[0] = (struct anv_memory_heap) {
1344          .size = device->sys.size,
1345          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1346       };
1347 
1348       /* The spec requires that we expose a host-visible, coherent memory
1349        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
1350        * to give the application a choice between cached, but not coherent and
1351        * coherent but uncached (WC though).
1352        */
1353       device->memory.type_count = 2;
1354       device->memory.types[0] = (struct anv_memory_type) {
1355          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1356                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1357                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1358          .heapIndex = 0,
1359       };
1360       device->memory.types[1] = (struct anv_memory_type) {
1361          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1362                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1363                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1364          .heapIndex = 0,
1365       };
1366    }
1367 
1368    device->memory.need_flush = false;
1369    for (unsigned i = 0; i < device->memory.type_count; i++) {
1370       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1371       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1372           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1373          device->memory.need_flush = true;
1374    }
1375 
1376    return VK_SUCCESS;
1377 }
1378 
1379 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1380 anv_physical_device_init_uuids(struct anv_physical_device *device)
1381 {
1382    const struct build_id_note *note =
1383       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1384    if (!note) {
1385       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1386                        "Failed to find build-id");
1387    }
1388 
1389    unsigned build_id_len = build_id_length(note);
1390    if (build_id_len < 20) {
1391       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1392                        "build-id too short.  It needs to be a SHA");
1393    }
1394 
1395    memcpy(device->driver_build_sha1, build_id_data(note), 20);
1396 
1397    struct mesa_sha1 sha1_ctx;
1398    uint8_t sha1[20];
1399    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1400 
1401    /* The pipeline cache UUID is used for determining when a pipeline cache is
1402     * invalid.  It needs both a driver build and the PCI ID of the device.
1403     */
1404    _mesa_sha1_init(&sha1_ctx);
1405    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1406    _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
1407                      sizeof(device->info.pci_device_id));
1408    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1409                      sizeof(device->always_use_bindless));
1410    _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
1411                      sizeof(device->has_a64_buffer_access));
1412    _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
1413                      sizeof(device->has_bindless_samplers));
1414    _mesa_sha1_final(&sha1_ctx, sha1);
1415    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1416 
1417    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1418    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1419 
1420    return VK_SUCCESS;
1421 }
1422 
1423 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1424 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1425 {
1426 #ifdef ENABLE_SHADER_CACHE
1427    char renderer[10];
1428    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
1429                                device->info.pci_device_id);
1430    assert(len == sizeof(renderer) - 2);
1431 
1432    char timestamp[41];
1433    _mesa_sha1_format(timestamp, device->driver_build_sha1);
1434 
1435    const uint64_t driver_flags =
1436       elk_get_compiler_config_value(device->compiler);
1437    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1438 #endif
1439 }
1440 
1441 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)1442 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
1443 {
1444 #ifdef ENABLE_SHADER_CACHE
1445    if (device->vk.disk_cache) {
1446       disk_cache_destroy(device->vk.disk_cache);
1447       device->vk.disk_cache = NULL;
1448    }
1449 #else
1450    assert(device->vk.disk_cache == NULL);
1451 #endif
1452 }
1453 
1454 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
1455  * queue overrides.
1456  *
1457  * To override the number queues:
1458  *  * "gc" is for graphics queues with compute support
1459  *  * "g" is for graphics queues with no compute support
1460  *  * "c" is for compute queues with no graphics support
1461  *
1462  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
1463  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
1464  * with compute-only support.
1465  *
1466  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
1467  * include 1 queue with compute-only support, but it will not change the
1468  * number of graphics+compute queues.
1469  *
1470  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
1471  * to include 1 queue with compute-only support, and it would override the
1472  * number of graphics+compute queues to be 0.
1473  */
1474 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)1475 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
1476 {
1477    int gc_override = -1;
1478    int g_override = -1;
1479    int c_override = -1;
1480    const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
1481 
1482    if (env_ == NULL)
1483       return;
1484 
1485    char *env = strdup(env_);
1486    char *save = NULL;
1487    char *next = strtok_r(env, ",", &save);
1488    while (next != NULL) {
1489       if (strncmp(next, "gc=", 3) == 0) {
1490          gc_override = strtol(next + 3, NULL, 0);
1491       } else if (strncmp(next, "g=", 2) == 0) {
1492          g_override = strtol(next + 2, NULL, 0);
1493       } else if (strncmp(next, "c=", 2) == 0) {
1494          c_override = strtol(next + 2, NULL, 0);
1495       } else {
1496          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
1497       }
1498       next = strtok_r(NULL, ",", &save);
1499    }
1500    free(env);
1501    if (gc_override >= 0)
1502       *gc_count = gc_override;
1503    if (g_override >= 0)
1504       *g_count = g_override;
1505    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
1506       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
1507                 "Vulkan specification");
1508    if (c_override >= 0)
1509       *c_count = c_override;
1510 }
1511 
1512 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)1513 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
1514 {
1515    uint32_t family_count = 0;
1516 
1517    if (pdevice->engine_info) {
1518       int gc_count =
1519          intel_engines_count(pdevice->engine_info,
1520                              INTEL_ENGINE_CLASS_RENDER);
1521       int g_count = 0;
1522       int c_count = 0;
1523 
1524       anv_override_engine_counts(&gc_count, &g_count, &c_count);
1525 
1526       if (gc_count > 0) {
1527          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1528             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1529                           VK_QUEUE_COMPUTE_BIT |
1530                           VK_QUEUE_TRANSFER_BIT,
1531             .queueCount = gc_count,
1532             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1533          };
1534       }
1535       if (g_count > 0) {
1536          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1537             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1538                           VK_QUEUE_TRANSFER_BIT,
1539             .queueCount = g_count,
1540             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1541          };
1542       }
1543       if (c_count > 0) {
1544          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1545             .queueFlags = VK_QUEUE_COMPUTE_BIT |
1546                           VK_QUEUE_TRANSFER_BIT,
1547             .queueCount = c_count,
1548             .engine_class = INTEL_ENGINE_CLASS_RENDER,
1549          };
1550       }
1551       /* Increase count below when other families are added as a reminder to
1552        * increase the ANV_MAX_QUEUE_FAMILIES value.
1553        */
1554       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
1555    } else {
1556       /* Default to a single render queue */
1557       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1558          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1559                        VK_QUEUE_COMPUTE_BIT |
1560                        VK_QUEUE_TRANSFER_BIT,
1561          .queueCount = 1,
1562          .engine_class = INTEL_ENGINE_CLASS_RENDER,
1563       };
1564       family_count = 1;
1565    }
1566    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
1567    pdevice->queue.family_count = family_count;
1568 }
1569 
1570 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)1571 anv_physical_device_try_create(struct vk_instance *vk_instance,
1572                                struct _drmDevice *drm_device,
1573                                struct vk_physical_device **out)
1574 {
1575    struct anv_instance *instance =
1576       container_of(vk_instance, struct anv_instance, vk);
1577 
1578    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1579        drm_device->bustype != DRM_BUS_PCI ||
1580        drm_device->deviceinfo.pci->vendor_id != 0x8086)
1581       return VK_ERROR_INCOMPATIBLE_DRIVER;
1582 
1583    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
1584    const char *path = drm_device->nodes[DRM_NODE_RENDER];
1585    VkResult result;
1586    int fd;
1587    int master_fd = -1;
1588 
1589    process_intel_debug_variable();
1590 
1591    fd = open(path, O_RDWR | O_CLOEXEC);
1592    if (fd < 0) {
1593       if (errno == ENOMEM) {
1594          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1595                           "Unable to open device %s: out of memory", path);
1596       }
1597       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1598                        "Unable to open device %s: %m", path);
1599    }
1600 
1601    struct intel_device_info devinfo;
1602    if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1603       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1604       goto fail_fd;
1605    }
1606 
1607    bool is_alpha = true;
1608    bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1609    if (devinfo.platform == INTEL_PLATFORM_HSW) {
1610       if (warn)
1611          mesa_logw("Haswell Vulkan support is incomplete");
1612    } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1613       if (warn)
1614          mesa_logw("Ivy Bridge Vulkan support is incomplete");
1615    } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1616       if (warn)
1617          mesa_logw("Bay Trail Vulkan support is incomplete");
1618    } else if (devinfo.ver == 8) {
1619       /* Gfx8 fully supported */
1620       is_alpha = false;
1621    } else {
1622       /* Silently fail here, anv will either pick up this device or display an
1623        * error message.
1624        */
1625       result = VK_ERROR_INCOMPATIBLE_DRIVER;
1626       goto fail_fd;
1627    }
1628 
1629    struct anv_physical_device *device =
1630       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1631                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1632    if (device == NULL) {
1633       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1634       goto fail_fd;
1635    }
1636 
1637    struct vk_physical_device_dispatch_table dispatch_table;
1638    vk_physical_device_dispatch_table_from_entrypoints(
1639       &dispatch_table, &anv_physical_device_entrypoints, true);
1640    vk_physical_device_dispatch_table_from_entrypoints(
1641       &dispatch_table, &wsi_physical_device_entrypoints, false);
1642 
1643    result = vk_physical_device_init(&device->vk, &instance->vk,
1644                                     NULL, NULL, NULL, /* We set up extensions later */
1645                                     &dispatch_table);
1646    if (result != VK_SUCCESS) {
1647       vk_error(instance, result);
1648       goto fail_alloc;
1649    }
1650    device->instance = instance;
1651 
1652    assert(strlen(path) < ARRAY_SIZE(device->path));
1653    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1654 
1655    device->info = devinfo;
1656    device->is_alpha = is_alpha;
1657 
1658    device->cmd_parser_version = -1;
1659    if (device->info.ver == 7) {
1660       if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1661           device->cmd_parser_version == -1) {
1662          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1663                             "failed to get command parser version");
1664          goto fail_base;
1665       }
1666    }
1667 
1668    int val;
1669    if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1670       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1671                          "kernel missing gem wait");
1672       goto fail_base;
1673    }
1674 
1675    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1676       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1677                          "kernel missing execbuf2");
1678       goto fail_base;
1679    }
1680 
1681    if (!device->info.has_llc &&
1682        (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1683        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1684                           "kernel missing wc mmap");
1685       goto fail_base;
1686    }
1687 
1688    device->use_relocations = device->info.ver < 8 ||
1689                              device->info.platform == INTEL_PLATFORM_CHV;
1690 
1691    if (!device->use_relocations &&
1692        (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1693       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1694                          "kernel missing softpin");
1695       goto fail_alloc;
1696    }
1697 
1698    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1699       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1700                          "kernel missing syncobj support");
1701       goto fail_base;
1702    }
1703 
1704    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1705       device->has_exec_async = val;
1706    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1707       device->has_exec_capture = val;
1708 
1709    /* Start with medium; sorted low to high */
1710    const int priorities[] = {
1711       INTEL_CONTEXT_MEDIUM_PRIORITY,
1712       INTEL_CONTEXT_HIGH_PRIORITY,
1713       INTEL_CONTEXT_REALTIME_PRIORITY,
1714    };
1715    device->max_context_priority = INT_MIN;
1716    for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1717       if (!anv_gem_has_context_priority(fd, priorities[i]))
1718          break;
1719       device->max_context_priority = priorities[i];
1720    }
1721 
1722    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1723                                               device->info.aperture_bytes;
1724 
1725    /* We only allow 48-bit addresses with softpin because knowing the actual
1726     * address is required for the vertex cache flush workaround.
1727     */
1728    device->supports_48bit_addresses = (device->info.ver >= 8) &&
1729                                       device->gtt_size > (4ULL << 30 /* GiB */);
1730 
1731    result = anv_physical_device_init_heaps(device, fd);
1732    if (result != VK_SUCCESS)
1733       goto fail_base;
1734 
1735    assert(device->supports_48bit_addresses == !device->use_relocations);
1736    device->use_softpin = !device->use_relocations;
1737 
1738    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1739       device->has_exec_timeline = val;
1740    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1741       device->has_exec_timeline = false;
1742 
1743    unsigned st_idx = 0;
1744 
1745    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1746    if (!device->has_exec_timeline)
1747       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1748    device->sync_types[st_idx++] = &device->sync_syncobj_type;
1749 
1750    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1751       device->sync_types[st_idx++] = &anv_bo_sync_type;
1752 
1753    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1754       device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1755       device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1756    }
1757 
1758    device->sync_types[st_idx++] = NULL;
1759    assert(st_idx <= ARRAY_SIZE(device->sync_types));
1760    device->vk.supported_sync_types = device->sync_types;
1761 
1762    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1763 
1764    device->always_use_bindless =
1765       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1766 
1767    device->use_call_secondary =
1768       device->use_softpin &&
1769       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1770 
1771    /* We first got the A64 messages on broadwell and we can only use them if
1772     * we can pass addresses directly into the shader which requires softpin.
1773     */
1774    device->has_a64_buffer_access = device->info.ver >= 8 &&
1775                                    device->use_softpin;
1776 
1777    /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1778     * because it's just a matter of setting the sampler address in the sample
1779     * message header.  However, we've not bothered to wire it up for vec4 so
1780     * we leave it disabled on gfx7.
1781     */
1782    device->has_bindless_samplers = device->info.ver >= 8;
1783 
1784    /* Check if we can read the GPU timestamp register from the CPU */
1785    uint64_t u64_ignore;
1786    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1787                                                                device->info.kmd_type,
1788                                                                &u64_ignore);
1789 
1790    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1791       driQueryOptionb(&instance->dri_options, "always_flush_cache");
1792 
1793    device->compiler = elk_compiler_create(NULL, &device->info);
1794    if (device->compiler == NULL) {
1795       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1796       goto fail_base;
1797    }
1798    device->compiler->shader_debug_log = compiler_debug_log;
1799    device->compiler->shader_perf_log = compiler_perf_log;
1800    device->compiler->constant_buffer_0_is_relative =
1801       device->info.ver < 8 || !device->info.has_context_isolation;
1802    device->compiler->supports_shader_constants = true;
1803 
1804    isl_device_init(&device->isl_dev, &device->info);
1805 
1806    result = anv_physical_device_init_uuids(device);
1807    if (result != VK_SUCCESS)
1808       goto fail_compiler;
1809 
1810    anv_physical_device_init_disk_cache(device);
1811 
1812    if (instance->vk.enabled_extensions.KHR_display) {
1813       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1814       if (master_fd >= 0) {
1815          /* fail if we don't have permission to even render on this device */
1816          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1817             close(master_fd);
1818             master_fd = -1;
1819          }
1820       }
1821    }
1822    device->master_fd = master_fd;
1823 
1824    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1825    anv_physical_device_init_queue_families(device);
1826 
1827    device->local_fd = fd;
1828 
1829    anv_physical_device_init_perf(device, fd);
1830 
1831    /* Gather major/minor before WSI. */
1832    struct stat st;
1833 
1834    if (stat(primary_path, &st) == 0) {
1835       device->has_master = true;
1836       device->master_major = major(st.st_rdev);
1837       device->master_minor = minor(st.st_rdev);
1838    } else {
1839       device->has_master = false;
1840       device->master_major = 0;
1841       device->master_minor = 0;
1842    }
1843 
1844    if (stat(path, &st) == 0) {
1845       device->has_local = true;
1846       device->local_major = major(st.st_rdev);
1847       device->local_minor = minor(st.st_rdev);
1848    } else {
1849       device->has_local = false;
1850       device->local_major = 0;
1851       device->local_minor = 0;
1852    }
1853 
1854    get_device_extensions(device, &device->vk.supported_extensions);
1855    get_features(device, &device->vk.supported_features);
1856    get_properties(device, &device->vk.properties);
1857 
1858    result = anv_init_wsi(device);
1859    if (result != VK_SUCCESS)
1860       goto fail_perf;
1861 
1862    anv_measure_device_init(device);
1863 
1864    anv_genX(&device->info, init_physical_device_state)(device);
1865 
1866    *out = &device->vk;
1867 
1868    return VK_SUCCESS;
1869 
1870 fail_perf:
1871    intel_perf_free(device->perf);
1872    free(device->engine_info);
1873    anv_physical_device_free_disk_cache(device);
1874 fail_compiler:
1875    ralloc_free(device->compiler);
1876 fail_base:
1877    vk_physical_device_finish(&device->vk);
1878 fail_alloc:
1879    vk_free(&instance->vk.alloc, device);
1880 fail_fd:
1881    close(fd);
1882    if (master_fd != -1)
1883       close(master_fd);
1884    return result;
1885 }
1886 
1887 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1888 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1889 {
1890    struct anv_physical_device *device =
1891       container_of(vk_device, struct anv_physical_device, vk);
1892 
1893    anv_finish_wsi(device);
1894    anv_measure_device_destroy(device);
1895    free(device->engine_info);
1896    anv_physical_device_free_disk_cache(device);
1897    ralloc_free(device->compiler);
1898    intel_perf_free(device->perf);
1899    close(device->local_fd);
1900    if (device->master_fd >= 0)
1901       close(device->master_fd);
1902    vk_physical_device_finish(&device->vk);
1903    vk_free(&device->instance->vk.alloc, device);
1904 }
1905 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1906 VkResult anv_EnumerateInstanceExtensionProperties(
1907     const char*                                 pLayerName,
1908     uint32_t*                                   pPropertyCount,
1909     VkExtensionProperties*                      pProperties)
1910 {
1911    if (pLayerName)
1912       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1913 
1914    return vk_enumerate_instance_extension_properties(
1915       &instance_extensions, pPropertyCount, pProperties);
1916 }
1917 
1918 static void
anv_init_dri_options(struct anv_instance * instance)1919 anv_init_dri_options(struct anv_instance *instance)
1920 {
1921    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1922                       ARRAY_SIZE(anv_dri_options));
1923    driParseConfigFiles(&instance->dri_options,
1924                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1925                        instance->vk.app_info.app_name,
1926                        instance->vk.app_info.app_version,
1927                        instance->vk.app_info.engine_name,
1928                        instance->vk.app_info.engine_version);
1929 
1930     instance->assume_full_subgroups =
1931             driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1932     instance->limit_trig_input_range =
1933             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1934     instance->sample_mask_out_opengl_behaviour =
1935             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1936     instance->lower_depth_range_rate =
1937             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1938     instance->no_16bit =
1939             driQueryOptionb(&instance->dri_options, "no_16bit");
1940     instance->report_vk_1_3 =
1941             driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1942 }
1943 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1944 VkResult anv_CreateInstance(
1945     const VkInstanceCreateInfo*                 pCreateInfo,
1946     const VkAllocationCallbacks*                pAllocator,
1947     VkInstance*                                 pInstance)
1948 {
1949    struct anv_instance *instance;
1950    VkResult result;
1951 
1952    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1953 
1954    if (pAllocator == NULL)
1955       pAllocator = vk_default_allocator();
1956 
1957    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1958                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1959    if (!instance)
1960       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1961 
1962    struct vk_instance_dispatch_table dispatch_table;
1963    vk_instance_dispatch_table_from_entrypoints(
1964       &dispatch_table, &anv_instance_entrypoints, true);
1965    vk_instance_dispatch_table_from_entrypoints(
1966       &dispatch_table, &wsi_instance_entrypoints, false);
1967 
1968    result = vk_instance_init(&instance->vk, &instance_extensions,
1969                              &dispatch_table, pCreateInfo, pAllocator);
1970    if (result != VK_SUCCESS) {
1971       vk_free(pAllocator, instance);
1972       return vk_error(NULL, result);
1973    }
1974 
1975    instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1976    instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1977 
1978    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1979 
1980    anv_init_dri_options(instance);
1981 
1982    intel_driver_ds_init();
1983 
1984    *pInstance = anv_instance_to_handle(instance);
1985 
1986    return VK_SUCCESS;
1987 }
1988 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1989 void anv_DestroyInstance(
1990     VkInstance                                  _instance,
1991     const VkAllocationCallbacks*                pAllocator)
1992 {
1993    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1994 
1995    if (!instance)
1996       return;
1997 
1998    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1999 
2000    driDestroyOptionCache(&instance->dri_options);
2001    driDestroyOptionInfo(&instance->available_dri_options);
2002 
2003    vk_instance_finish(&instance->vk);
2004    vk_free(&instance->vk.alloc, instance);
2005 }
2006 
2007 static int
vk_priority_to_gen(int priority)2008 vk_priority_to_gen(int priority)
2009 {
2010    switch (priority) {
2011    case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2012       return INTEL_CONTEXT_LOW_PRIORITY;
2013    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2014       return INTEL_CONTEXT_MEDIUM_PRIORITY;
2015    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2016       return INTEL_CONTEXT_HIGH_PRIORITY;
2017    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2018       return INTEL_CONTEXT_REALTIME_PRIORITY;
2019    default:
2020       unreachable("Invalid priority");
2021    }
2022 }
2023 
2024 static const VkQueueFamilyProperties
2025 anv_queue_family_properties_template = {
2026    .timestampValidBits = 36, /* XXX: Real value here */
2027    .minImageTransferGranularity = { 1, 1, 1 },
2028 };
2029 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2030 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2031     VkPhysicalDevice                            physicalDevice,
2032     uint32_t*                                   pQueueFamilyPropertyCount,
2033     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2034 {
2035    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2036    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2037                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2038 
2039    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2040       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2041       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2042          p->queueFamilyProperties = anv_queue_family_properties_template;
2043          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2044          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2045 
2046          vk_foreach_struct(ext, p->pNext) {
2047             switch (ext->sType) {
2048             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2049                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2050                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2051 
2052                /* Deliberately sorted low to high */
2053                VkQueueGlobalPriorityKHR all_priorities[] = {
2054                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2055                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2056                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2057                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2058                };
2059 
2060                uint32_t count = 0;
2061                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2062                   if (vk_priority_to_gen(all_priorities[i]) >
2063                       pdevice->max_context_priority)
2064                      break;
2065 
2066                   properties->priorities[count++] = all_priorities[i];
2067                }
2068                properties->priorityCount = count;
2069                break;
2070             }
2071 
2072             default:
2073                vk_debug_ignored_stype(ext->sType);
2074             }
2075          }
2076       }
2077    }
2078 }
2079 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2080 void anv_GetPhysicalDeviceMemoryProperties(
2081     VkPhysicalDevice                            physicalDevice,
2082     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2083 {
2084    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2085 
2086    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2087    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2088       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2089          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2090          .heapIndex     = physical_device->memory.types[i].heapIndex,
2091       };
2092    }
2093 
2094    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2095    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2096       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2097          .size    = physical_device->memory.heaps[i].size,
2098          .flags   = physical_device->memory.heaps[i].flags,
2099       };
2100    }
2101 }
2102 
2103 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2104 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2105                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2106 {
2107    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2108 
2109    if (!device->vk.supported_extensions.EXT_memory_budget)
2110       return;
2111 
2112    anv_update_meminfo(device, device->local_fd);
2113 
2114    VkDeviceSize total_sys_heaps_size = 0;
2115    for (size_t i = 0; i < device->memory.heap_count; i++)
2116       total_sys_heaps_size += device->memory.heaps[i].size;
2117 
2118    for (size_t i = 0; i < device->memory.heap_count; i++) {
2119       VkDeviceSize heap_size = device->memory.heaps[i].size;
2120       VkDeviceSize heap_used = device->memory.heaps[i].used;
2121       VkDeviceSize heap_budget, total_heaps_size;
2122       uint64_t mem_available = 0;
2123 
2124       total_heaps_size = total_sys_heaps_size;
2125       mem_available = device->sys.available;
2126 
2127       double heap_proportion = (double) heap_size / total_heaps_size;
2128       VkDeviceSize available_prop = mem_available * heap_proportion;
2129 
2130       /*
2131        * Let's not incite the app to starve the system: report at most 90% of
2132        * the available heap memory.
2133        */
2134       uint64_t heap_available = available_prop * 9 / 10;
2135       heap_budget = MIN2(heap_size, heap_used + heap_available);
2136 
2137       /*
2138        * Round down to the nearest MB
2139        */
2140       heap_budget &= ~((1ull << 20) - 1);
2141 
2142       /*
2143        * The heapBudget value must be non-zero for array elements less than
2144        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2145        * value must be less than or equal to VkMemoryHeap::size for each heap.
2146        */
2147       assert(0 < heap_budget && heap_budget <= heap_size);
2148 
2149       memoryBudget->heapUsage[i] = heap_used;
2150       memoryBudget->heapBudget[i] = heap_budget;
2151    }
2152 
2153    /* The heapBudget and heapUsage values must be zero for array elements
2154     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2155     */
2156    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2157       memoryBudget->heapBudget[i] = 0;
2158       memoryBudget->heapUsage[i] = 0;
2159    }
2160 }
2161 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2162 void anv_GetPhysicalDeviceMemoryProperties2(
2163     VkPhysicalDevice                            physicalDevice,
2164     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2165 {
2166    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2167                                          &pMemoryProperties->memoryProperties);
2168 
2169    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2170       switch (ext->sType) {
2171       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2172          anv_get_memory_budget(physicalDevice, (void*)ext);
2173          break;
2174       default:
2175          vk_debug_ignored_stype(ext->sType);
2176          break;
2177       }
2178    }
2179 }
2180 
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2181 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2182     VkInstance                                  _instance,
2183     const char*                                 pName)
2184 {
2185    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2186    return vk_instance_get_proc_addr(&instance->vk,
2187                                     &anv_instance_entrypoints,
2188                                     pName);
2189 }
2190 
2191 /* With version 1+ of the loader interface the ICD should expose
2192  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2193  */
2194 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2195 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2196     VkInstance                                  instance,
2197     const char*                                 pName)
2198 {
2199    return anv_GetInstanceProcAddr(instance, pName);
2200 }
2201 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2202 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2203 {
2204    struct anv_state state;
2205 
2206    state = anv_state_pool_alloc(pool, size, align);
2207    memcpy(state.map, p, size);
2208 
2209    return state;
2210 }
2211 
2212 static void
anv_device_init_border_colors(struct anv_device * device)2213 anv_device_init_border_colors(struct anv_device *device)
2214 {
2215    if (device->info->platform == INTEL_PLATFORM_HSW) {
2216       static const struct hsw_border_color border_colors[] = {
2217          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2218          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2219          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2220          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2221          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2222          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2223       };
2224 
2225       device->border_colors =
2226          anv_state_pool_emit_data(&device->dynamic_state_pool,
2227                                   sizeof(border_colors), 512, border_colors);
2228    } else {
2229       static const struct gfx8_border_color border_colors[] = {
2230          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2231          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2232          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2233          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
2234          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
2235          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
2236       };
2237 
2238       device->border_colors =
2239          anv_state_pool_emit_data(&device->dynamic_state_pool,
2240                                   sizeof(border_colors), 64, border_colors);
2241    }
2242 }
2243 
2244 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2245 anv_device_init_trivial_batch(struct anv_device *device)
2246 {
2247    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2248                                          ANV_BO_ALLOC_MAPPED,
2249                                          0 /* explicit_address */,
2250                                          &device->trivial_batch_bo);
2251    if (result != VK_SUCCESS)
2252       return result;
2253 
2254    struct anv_batch batch = {
2255       .start = device->trivial_batch_bo->map,
2256       .next = device->trivial_batch_bo->map,
2257       .end = device->trivial_batch_bo->map + 4096,
2258    };
2259 
2260    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2261    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2262 
2263 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2264    if (device->physical->memory.need_flush)
2265       intel_flush_range(batch.start, batch.next - batch.start);
2266 #endif
2267 
2268    return VK_SUCCESS;
2269 }
2270 
2271 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2272 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2273                  struct anv_block_pool *pool,
2274                  uint64_t address)
2275 {
2276    anv_block_pool_foreach_bo(bo, pool) {
2277       uint64_t bo_address = intel_48b_address(bo->offset);
2278       if (address >= bo_address && address < (bo_address + bo->size)) {
2279          *ret = (struct intel_batch_decode_bo) {
2280             .addr = bo_address,
2281             .size = bo->size,
2282             .map = bo->map,
2283          };
2284          return true;
2285       }
2286    }
2287    return false;
2288 }
2289 
2290 /* Finding a buffer for batch decoding */
2291 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2292 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2293 {
2294    struct anv_device *device = v_batch;
2295    struct intel_batch_decode_bo ret_bo = {};
2296 
2297    assert(ppgtt);
2298 
2299    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2300       return ret_bo;
2301    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2302       return ret_bo;
2303    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2304       return ret_bo;
2305    if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2306       return ret_bo;
2307 
2308    if (!device->cmd_buffer_being_decoded)
2309       return (struct intel_batch_decode_bo) { };
2310 
2311    struct anv_batch_bo **bo;
2312 
2313    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2314       /* The decoder zeroes out the top 16 bits, so we need to as well */
2315       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2316 
2317       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2318          return (struct intel_batch_decode_bo) {
2319             .addr = bo_address,
2320             .size = (*bo)->bo->size,
2321             .map = (*bo)->bo->map,
2322          };
2323       }
2324    }
2325 
2326    return (struct intel_batch_decode_bo) { };
2327 }
2328 
2329 static VkResult anv_device_check_status(struct vk_device *vk_device);
2330 
2331 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2332 anv_device_setup_context(struct anv_device *device,
2333                          const VkDeviceCreateInfo *pCreateInfo,
2334                          const uint32_t num_queues)
2335 {
2336    struct anv_physical_device *physical_device = device->physical;
2337    VkResult result = VK_SUCCESS;
2338 
2339    if (device->physical->engine_info) {
2340       /* The kernel API supports at most 64 engines */
2341       assert(num_queues <= 64);
2342       enum intel_engine_class engine_classes[64];
2343       int engine_count = 0;
2344       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2345          const VkDeviceQueueCreateInfo *queueCreateInfo =
2346             &pCreateInfo->pQueueCreateInfos[i];
2347 
2348          assert(queueCreateInfo->queueFamilyIndex <
2349                 physical_device->queue.family_count);
2350          struct anv_queue_family *queue_family =
2351             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2352 
2353          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2354             engine_classes[engine_count++] = queue_family->engine_class;
2355       }
2356       if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2357                                             physical_device->engine_info,
2358                                             engine_count, engine_classes,
2359                                             0 /* vm_id */,
2360                                             (uint32_t *)&device->context_id))
2361          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2362                             "kernel context creation failed");
2363    } else {
2364       assert(num_queues == 1);
2365       if (!intel_gem_create_context(device->fd, &device->context_id))
2366          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2367    }
2368 
2369    if (result != VK_SUCCESS)
2370       return result;
2371 
2372    /* Here we tell the kernel not to attempt to recover our context but
2373     * immediately (on the next batchbuffer submission) report that the
2374     * context is lost, and we will do the recovery ourselves.  In the case
2375     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2376     * the client clean up the pieces.
2377     */
2378    anv_gem_set_context_param(device->fd, device->context_id,
2379                              I915_CONTEXT_PARAM_RECOVERABLE, false);
2380 
2381    /* Check if client specified queue priority. */
2382    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2383       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2384                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2385 
2386    VkQueueGlobalPriorityKHR priority =
2387       queue_priority ? queue_priority->globalPriority :
2388          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2389 
2390    /* As per spec, the driver implementation may deny requests to acquire
2391     * a priority above the default priority (MEDIUM) if the caller does not
2392     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2393     * is returned.
2394     */
2395    if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2396       int err = anv_gem_set_context_param(device->fd, device->context_id,
2397                                           I915_CONTEXT_PARAM_PRIORITY,
2398                                           vk_priority_to_gen(priority));
2399       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2400          result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2401          goto fail_context;
2402       }
2403    }
2404 
2405    return result;
2406 
2407 fail_context:
2408    intel_gem_destroy_context(device->fd, device->context_id);
2409    return result;
2410 }
2411 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2412 VkResult anv_CreateDevice(
2413     VkPhysicalDevice                            physicalDevice,
2414     const VkDeviceCreateInfo*                   pCreateInfo,
2415     const VkAllocationCallbacks*                pAllocator,
2416     VkDevice*                                   pDevice)
2417 {
2418    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2419    VkResult result;
2420    struct anv_device *device;
2421 
2422    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2423 
2424    /* Check requested queues and fail if we are requested to create any
2425     * queues with flags we don't support.
2426     */
2427    assert(pCreateInfo->queueCreateInfoCount > 0);
2428    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2429       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2430          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2431    }
2432 
2433    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2434                        sizeof(*device), 8,
2435                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2436    if (!device)
2437       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2438 
2439    struct vk_device_dispatch_table dispatch_table;
2440 
2441    bool override_initial_entrypoints = true;
2442    if (physical_device->instance->vk.app_info.app_name &&
2443        !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2444       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2445       override_initial_entrypoints = false;
2446    }
2447    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2448       anv_genX(&physical_device->info, device_entrypoints),
2449       override_initial_entrypoints);
2450    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2451       &anv_device_entrypoints, false);
2452    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2453       &wsi_device_entrypoints, false);
2454 
2455    result = vk_device_init(&device->vk, &physical_device->vk,
2456                            &dispatch_table, pCreateInfo, pAllocator);
2457    if (result != VK_SUCCESS)
2458       goto fail_alloc;
2459 
2460    if (INTEL_DEBUG(DEBUG_BATCH)) {
2461       const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2462 
2463       intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2464                                       &physical_device->compiler->isa,
2465                                       &physical_device->info,
2466                                       stderr, decode_flags, NULL,
2467                                       decode_get_bo, NULL, device);
2468 
2469       device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2470       device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2471       device->decoder_ctx.instruction_base =
2472          INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2473    }
2474 
2475    anv_device_set_physical(device, physical_device);
2476 
2477    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2478    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2479    if (device->fd == -1) {
2480       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2481       goto fail_device;
2482    }
2483 
2484    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2485    device->vk.check_status = anv_device_check_status;
2486    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2487    vk_device_set_drm_fd(&device->vk, device->fd);
2488 
2489    uint32_t num_queues = 0;
2490    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2491       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2492 
2493    result = anv_device_setup_context(device, pCreateInfo, num_queues);
2494    if (result != VK_SUCCESS)
2495       goto fail_fd;
2496 
2497    device->queues =
2498       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2499                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2500    if (device->queues == NULL) {
2501       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2502       goto fail_context_id;
2503    }
2504 
2505    device->queue_count = 0;
2506    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2507       const VkDeviceQueueCreateInfo *queueCreateInfo =
2508          &pCreateInfo->pQueueCreateInfos[i];
2509 
2510       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2511          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2512           * engine-based contexts, the bottom 6 bits of exec_flags are used
2513           * for the engine ID.
2514           */
2515          uint32_t exec_flags = device->physical->engine_info ?
2516                                device->queue_count : I915_EXEC_RENDER;
2517 
2518          result = anv_queue_init(device, &device->queues[device->queue_count],
2519                                  exec_flags, queueCreateInfo, j);
2520          if (result != VK_SUCCESS)
2521             goto fail_queues;
2522 
2523          device->queue_count++;
2524       }
2525    }
2526 
2527    if (!anv_use_relocations(physical_device)) {
2528       if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2529          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2530          goto fail_queues;
2531       }
2532 
2533       /* keep the page with address zero out of the allocator */
2534       util_vma_heap_init(&device->vma_lo,
2535                          LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2536 
2537       util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2538                          CLIENT_VISIBLE_HEAP_SIZE);
2539 
2540       /* Leave the last 4GiB out of the high vma range, so that no state
2541        * base address + size can overflow 48 bits. For more information see
2542        * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2543        */
2544       util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2545                          physical_device->gtt_size - (1ull << 32) -
2546                          HIGH_HEAP_MIN_ADDRESS);
2547    }
2548 
2549    list_inithead(&device->memory_objects);
2550 
2551    /* On Broadwell and later, we can use batch chaining to more efficiently
2552     * implement growing command buffers.  Prior to Haswell, the kernel
2553     * command parser gets in the way and we have to fall back to growing
2554     * the batch.
2555     */
2556    device->can_chain_batches = device->info->ver >= 8;
2557 
2558    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2559       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2560       goto fail_vmas;
2561    }
2562 
2563    pthread_condattr_t condattr;
2564    if (pthread_condattr_init(&condattr) != 0) {
2565       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2566       goto fail_mutex;
2567    }
2568    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2569       pthread_condattr_destroy(&condattr);
2570       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2571       goto fail_mutex;
2572    }
2573    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2574       pthread_condattr_destroy(&condattr);
2575       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2576       goto fail_mutex;
2577    }
2578    pthread_condattr_destroy(&condattr);
2579 
2580    result = anv_bo_cache_init(&device->bo_cache, device);
2581    if (result != VK_SUCCESS)
2582       goto fail_queue_cond;
2583 
2584    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2585 
2586    /* Because scratch is also relative to General State Base Address, we leave
2587     * the base address 0 and start the pool memory at an offset.  This way we
2588     * get the correct offsets in the anv_states that get allocated from it.
2589     */
2590    result = anv_state_pool_init(&device->general_state_pool, device,
2591                                 "general pool",
2592                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2593    if (result != VK_SUCCESS)
2594       goto fail_batch_bo_pool;
2595 
2596    result = anv_state_pool_init(&device->dynamic_state_pool, device,
2597                                 "dynamic pool",
2598                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2599    if (result != VK_SUCCESS)
2600       goto fail_general_state_pool;
2601 
2602    if (device->info->ver >= 8) {
2603       /* The border color pointer is limited to 24 bits, so we need to make
2604        * sure that any such color used at any point in the program doesn't
2605        * exceed that limit.
2606        * We achieve that by reserving all the custom border colors we support
2607        * right off the bat, so they are close to the base address.
2608        */
2609       anv_state_reserved_pool_init(&device->custom_border_colors,
2610                                    &device->dynamic_state_pool,
2611                                    MAX_CUSTOM_BORDER_COLORS,
2612                                    sizeof(struct gfx8_border_color), 64);
2613    }
2614 
2615    result = anv_state_pool_init(&device->instruction_state_pool, device,
2616                                 "instruction pool",
2617                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2618    if (result != VK_SUCCESS)
2619       goto fail_dynamic_state_pool;
2620 
2621    result = anv_state_pool_init(&device->surface_state_pool, device,
2622                                 "surface state pool",
2623                                 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2624    if (result != VK_SUCCESS)
2625       goto fail_instruction_state_pool;
2626 
2627    if (!anv_use_relocations(physical_device)) {
2628       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2629                                (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2630       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2631       result = anv_state_pool_init(&device->binding_table_pool, device,
2632                                    "binding table pool",
2633                                    SURFACE_STATE_POOL_MIN_ADDRESS,
2634                                    bt_pool_offset,
2635                                    BINDING_TABLE_POOL_BLOCK_SIZE);
2636    }
2637    if (result != VK_SUCCESS)
2638       goto fail_surface_state_pool;
2639 
2640    result = anv_device_alloc_bo(device, "workaround", 4096,
2641                                 ANV_BO_ALLOC_CAPTURE |
2642                                 ANV_BO_ALLOC_MAPPED,
2643                                 0 /* explicit_address */,
2644                                 &device->workaround_bo);
2645    if (result != VK_SUCCESS)
2646       goto fail_binding_table_pool;
2647 
2648    device->workaround_address = (struct anv_address) {
2649       .bo = device->workaround_bo,
2650       .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2651                                                     device->workaround_bo->size,
2652                                                     "hasvk"), 32),
2653    };
2654 
2655    device->workarounds.doom64_images = NULL;
2656 
2657    device->debug_frame_desc =
2658       intel_debug_get_identifier_block(device->workaround_bo->map,
2659                                        device->workaround_bo->size,
2660                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
2661 
2662    result = anv_device_init_trivial_batch(device);
2663    if (result != VK_SUCCESS)
2664       goto fail_workaround_bo;
2665 
2666    /* Allocate a null surface state at surface state offset 0.  This makes
2667     * NULL descriptor handling trivial because we can just memset structures
2668     * to zero and they have a valid descriptor.
2669     */
2670    device->null_surface_state =
2671       anv_state_pool_alloc(&device->surface_state_pool,
2672                            device->isl_dev.ss.size,
2673                            device->isl_dev.ss.align);
2674    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2675                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2676    assert(device->null_surface_state.offset == 0);
2677 
2678    anv_scratch_pool_init(device, &device->scratch_pool);
2679 
2680    result = anv_genX(device->info, init_device_state)(device);
2681    if (result != VK_SUCCESS)
2682       goto fail_trivial_batch_bo_and_scratch_pool;
2683 
2684    struct vk_pipeline_cache_create_info pcc_info = { };
2685    device->default_pipeline_cache =
2686       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2687    if (!device->default_pipeline_cache) {
2688       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2689       goto fail_trivial_batch_bo_and_scratch_pool;
2690    }
2691 
2692    /* Internal shaders need their own pipeline cache because, unlike the rest
2693     * of ANV, it won't work at all without the cache. It depends on it for
2694     * shaders to remain resident while it runs. Therefore, we need a special
2695     * cache just for BLORP/RT that's forced to always be enabled.
2696     */
2697    pcc_info.force_enable = true;
2698    device->internal_cache =
2699       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2700    if (device->internal_cache == NULL) {
2701       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2702       goto fail_default_pipeline_cache;
2703    }
2704 
2705    device->robust_buffer_access =
2706       device->vk.enabled_features.robustBufferAccess ||
2707       device->vk.enabled_features.nullDescriptor;
2708 
2709    anv_device_init_blorp(device);
2710 
2711    anv_device_init_border_colors(device);
2712 
2713    anv_device_perf_init(device);
2714 
2715    anv_device_utrace_init(device);
2716 
2717    *pDevice = anv_device_to_handle(device);
2718 
2719    return VK_SUCCESS;
2720 
2721  fail_default_pipeline_cache:
2722    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2723  fail_trivial_batch_bo_and_scratch_pool:
2724    anv_scratch_pool_finish(device, &device->scratch_pool);
2725    anv_device_release_bo(device, device->trivial_batch_bo);
2726  fail_workaround_bo:
2727    anv_device_release_bo(device, device->workaround_bo);
2728  fail_binding_table_pool:
2729    if (!anv_use_relocations(physical_device))
2730       anv_state_pool_finish(&device->binding_table_pool);
2731  fail_surface_state_pool:
2732    anv_state_pool_finish(&device->surface_state_pool);
2733  fail_instruction_state_pool:
2734    anv_state_pool_finish(&device->instruction_state_pool);
2735  fail_dynamic_state_pool:
2736    if (device->info->ver >= 8)
2737       anv_state_reserved_pool_finish(&device->custom_border_colors);
2738    anv_state_pool_finish(&device->dynamic_state_pool);
2739  fail_general_state_pool:
2740    anv_state_pool_finish(&device->general_state_pool);
2741  fail_batch_bo_pool:
2742    anv_bo_pool_finish(&device->batch_bo_pool);
2743    anv_bo_cache_finish(&device->bo_cache);
2744  fail_queue_cond:
2745    pthread_cond_destroy(&device->queue_submit);
2746  fail_mutex:
2747    pthread_mutex_destroy(&device->mutex);
2748  fail_vmas:
2749    if (!anv_use_relocations(physical_device)) {
2750       util_vma_heap_finish(&device->vma_hi);
2751       util_vma_heap_finish(&device->vma_cva);
2752       util_vma_heap_finish(&device->vma_lo);
2753    }
2754  fail_queues:
2755    for (uint32_t i = 0; i < device->queue_count; i++)
2756       anv_queue_finish(&device->queues[i]);
2757    vk_free(&device->vk.alloc, device->queues);
2758  fail_context_id:
2759    intel_gem_destroy_context(device->fd, device->context_id);
2760  fail_fd:
2761    close(device->fd);
2762  fail_device:
2763    vk_device_finish(&device->vk);
2764  fail_alloc:
2765    vk_free(&device->vk.alloc, device);
2766 
2767    return result;
2768 }
2769 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2770 void anv_DestroyDevice(
2771     VkDevice                                    _device,
2772     const VkAllocationCallbacks*                pAllocator)
2773 {
2774    ANV_FROM_HANDLE(anv_device, device, _device);
2775 
2776    if (!device)
2777       return;
2778 
2779    anv_device_utrace_finish(device);
2780 
2781    anv_device_finish_blorp(device);
2782 
2783    vk_pipeline_cache_destroy(device->internal_cache, NULL);
2784    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2785 
2786 #ifdef HAVE_VALGRIND
2787    /* We only need to free these to prevent valgrind errors.  The backing
2788     * BO will go away in a couple of lines so we don't actually leak.
2789     */
2790    if (device->info->ver >= 8)
2791       anv_state_reserved_pool_finish(&device->custom_border_colors);
2792    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2793    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2794 #endif
2795 
2796    anv_scratch_pool_finish(device, &device->scratch_pool);
2797 
2798    anv_device_release_bo(device, device->workaround_bo);
2799    anv_device_release_bo(device, device->trivial_batch_bo);
2800 
2801    if (!anv_use_relocations(device->physical))
2802       anv_state_pool_finish(&device->binding_table_pool);
2803    anv_state_pool_finish(&device->surface_state_pool);
2804    anv_state_pool_finish(&device->instruction_state_pool);
2805    anv_state_pool_finish(&device->dynamic_state_pool);
2806    anv_state_pool_finish(&device->general_state_pool);
2807 
2808    anv_bo_pool_finish(&device->batch_bo_pool);
2809 
2810    anv_bo_cache_finish(&device->bo_cache);
2811 
2812    if (!anv_use_relocations(device->physical)) {
2813       util_vma_heap_finish(&device->vma_hi);
2814       util_vma_heap_finish(&device->vma_cva);
2815       util_vma_heap_finish(&device->vma_lo);
2816    }
2817 
2818    pthread_cond_destroy(&device->queue_submit);
2819    pthread_mutex_destroy(&device->mutex);
2820 
2821    for (uint32_t i = 0; i < device->queue_count; i++)
2822       anv_queue_finish(&device->queues[i]);
2823    vk_free(&device->vk.alloc, device->queues);
2824 
2825    intel_gem_destroy_context(device->fd, device->context_id);
2826 
2827    if (INTEL_DEBUG(DEBUG_BATCH))
2828       intel_batch_decode_ctx_finish(&device->decoder_ctx);
2829 
2830    close(device->fd);
2831 
2832    vk_device_finish(&device->vk);
2833    vk_free(&device->vk.alloc, device);
2834 }
2835 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2836 VkResult anv_EnumerateInstanceLayerProperties(
2837     uint32_t*                                   pPropertyCount,
2838     VkLayerProperties*                          pProperties)
2839 {
2840    if (pProperties == NULL) {
2841       *pPropertyCount = 0;
2842       return VK_SUCCESS;
2843    }
2844 
2845    /* None supported at this time */
2846    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2847 }
2848 
2849 static VkResult
anv_device_check_status(struct vk_device * vk_device)2850 anv_device_check_status(struct vk_device *vk_device)
2851 {
2852    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2853 
2854    uint32_t active, pending;
2855    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2856                                              &active, &pending);
2857    if (ret == -1) {
2858       /* We don't know the real error. */
2859       return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2860    }
2861 
2862    if (active) {
2863       return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2864    } else if (pending) {
2865       return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2866    }
2867 
2868    return VK_SUCCESS;
2869 }
2870 
2871 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2872 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2873                 int64_t timeout)
2874 {
2875    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2876    if (ret == -1 && errno == ETIME) {
2877       return VK_TIMEOUT;
2878    } else if (ret == -1) {
2879       /* We don't know the real error. */
2880       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2881    } else {
2882       return VK_SUCCESS;
2883    }
2884 }
2885 
2886 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2887 anv_vma_alloc(struct anv_device *device,
2888               uint64_t size, uint64_t align,
2889               enum anv_bo_alloc_flags alloc_flags,
2890               uint64_t client_address)
2891 {
2892    pthread_mutex_lock(&device->vma_mutex);
2893 
2894    uint64_t addr = 0;
2895 
2896    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2897       if (client_address) {
2898          if (util_vma_heap_alloc_addr(&device->vma_cva,
2899                                       client_address, size)) {
2900             addr = client_address;
2901          }
2902       } else {
2903          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2904       }
2905       /* We don't want to fall back to other heaps */
2906       goto done;
2907    }
2908 
2909    assert(client_address == 0);
2910 
2911    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2912       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2913 
2914    if (addr == 0)
2915       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2916 
2917 done:
2918    pthread_mutex_unlock(&device->vma_mutex);
2919 
2920    assert(addr == intel_48b_address(addr));
2921    return intel_canonical_address(addr);
2922 }
2923 
2924 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2925 anv_vma_free(struct anv_device *device,
2926              uint64_t address, uint64_t size)
2927 {
2928    const uint64_t addr_48b = intel_48b_address(address);
2929 
2930    pthread_mutex_lock(&device->vma_mutex);
2931 
2932    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2933        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2934       util_vma_heap_free(&device->vma_lo, addr_48b, size);
2935    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2936               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2937       util_vma_heap_free(&device->vma_cva, addr_48b, size);
2938    } else {
2939       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2940       util_vma_heap_free(&device->vma_hi, addr_48b, size);
2941    }
2942 
2943    pthread_mutex_unlock(&device->vma_mutex);
2944 }
2945 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2946 VkResult anv_AllocateMemory(
2947     VkDevice                                    _device,
2948     const VkMemoryAllocateInfo*                 pAllocateInfo,
2949     const VkAllocationCallbacks*                pAllocator,
2950     VkDeviceMemory*                             pMem)
2951 {
2952    ANV_FROM_HANDLE(anv_device, device, _device);
2953    struct anv_physical_device *pdevice = device->physical;
2954    struct anv_device_memory *mem;
2955    VkResult result = VK_SUCCESS;
2956 
2957    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2958 
2959    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
2960    assert(pAllocateInfo->allocationSize > 0);
2961 
2962    VkDeviceSize aligned_alloc_size =
2963       align64(pAllocateInfo->allocationSize, 4096);
2964 
2965    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
2966       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2967 
2968    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
2969    struct anv_memory_type *mem_type =
2970       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
2971    assert(mem_type->heapIndex < pdevice->memory.heap_count);
2972    struct anv_memory_heap *mem_heap =
2973       &pdevice->memory.heaps[mem_type->heapIndex];
2974 
2975    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2976    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
2977       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2978 
2979    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2980                          VK_OBJECT_TYPE_DEVICE_MEMORY);
2981    if (mem == NULL)
2982       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2983 
2984    mem->type = mem_type;
2985    mem->map = NULL;
2986    mem->map_size = 0;
2987    mem->map_delta = 0;
2988    mem->ahw = NULL;
2989    mem->host_ptr = NULL;
2990 
2991    enum anv_bo_alloc_flags alloc_flags = 0;
2992 
2993    const VkExportMemoryAllocateInfo *export_info = NULL;
2994    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
2995    const VkImportMemoryFdInfoKHR *fd_info = NULL;
2996    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
2997    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
2998    VkMemoryAllocateFlags vk_flags = 0;
2999    uint64_t client_address = 0;
3000 
3001    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3002       switch (ext->sType) {
3003       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3004          export_info = (void *)ext;
3005          break;
3006 
3007       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3008          ahw_import_info = (void *)ext;
3009          break;
3010 
3011       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3012          fd_info = (void *)ext;
3013          break;
3014 
3015       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3016          host_ptr_info = (void *)ext;
3017          break;
3018 
3019       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3020          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3021          vk_flags = flags_info->flags;
3022          break;
3023       }
3024 
3025       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3026          dedicated_info = (void *)ext;
3027          break;
3028 
3029       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3030          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3031             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3032          client_address = addr_info->opaqueCaptureAddress;
3033          break;
3034       }
3035 
3036       default:
3037          if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3038             /* this isn't a real enum value,
3039              * so use conditional to avoid compiler warn
3040              */
3041             vk_debug_ignored_stype(ext->sType);
3042          break;
3043       }
3044    }
3045 
3046    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3047       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3048 
3049    if ((export_info && export_info->handleTypes) ||
3050        (fd_info && fd_info->handleType) ||
3051        (host_ptr_info && host_ptr_info->handleType)) {
3052       /* Anything imported or exported is EXTERNAL */
3053       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3054    }
3055 
3056    /* Check if we need to support Android HW buffer export. If so,
3057     * create AHardwareBuffer and import memory from it.
3058     */
3059    bool android_export = false;
3060    if (export_info && export_info->handleTypes &
3061        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3062       android_export = true;
3063 
3064    if (ahw_import_info) {
3065       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3066       if (result != VK_SUCCESS)
3067          goto fail;
3068 
3069       goto success;
3070    } else if (android_export) {
3071       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3072       if (result != VK_SUCCESS)
3073          goto fail;
3074 
3075       goto success;
3076    }
3077 
3078    /* The Vulkan spec permits handleType to be 0, in which case the struct is
3079     * ignored.
3080     */
3081    if (fd_info && fd_info->handleType) {
3082       /* At the moment, we support only the below handle types. */
3083       assert(fd_info->handleType ==
3084                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3085              fd_info->handleType ==
3086                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3087 
3088       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3089                                     client_address, &mem->bo);
3090       if (result != VK_SUCCESS)
3091          goto fail;
3092 
3093       /* For security purposes, we reject importing the bo if it's smaller
3094        * than the requested allocation size.  This prevents a malicious client
3095        * from passing a buffer to a trusted client, lying about the size, and
3096        * telling the trusted client to try and texture from an image that goes
3097        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
3098        * in the trusted client.  The trusted client can protect itself against
3099        * this sort of attack but only if it can trust the buffer size.
3100        */
3101       if (mem->bo->size < aligned_alloc_size) {
3102          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3103                             "aligned allocationSize too large for "
3104                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3105                             "%"PRIu64"B > %"PRIu64"B",
3106                             aligned_alloc_size, mem->bo->size);
3107          anv_device_release_bo(device, mem->bo);
3108          goto fail;
3109       }
3110 
3111       /* From the Vulkan spec:
3112        *
3113        *    "Importing memory from a file descriptor transfers ownership of
3114        *    the file descriptor from the application to the Vulkan
3115        *    implementation. The application must not perform any operations on
3116        *    the file descriptor after a successful import."
3117        *
3118        * If the import fails, we leave the file descriptor open.
3119        */
3120       close(fd_info->fd);
3121       goto success;
3122    }
3123 
3124    if (host_ptr_info && host_ptr_info->handleType) {
3125       if (host_ptr_info->handleType ==
3126           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3127          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3128          goto fail;
3129       }
3130 
3131       assert(host_ptr_info->handleType ==
3132              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3133 
3134       result = anv_device_import_bo_from_host_ptr(device,
3135                                                   host_ptr_info->pHostPointer,
3136                                                   pAllocateInfo->allocationSize,
3137                                                   alloc_flags,
3138                                                   client_address,
3139                                                   &mem->bo);
3140       if (result != VK_SUCCESS)
3141          goto fail;
3142 
3143       mem->host_ptr = host_ptr_info->pHostPointer;
3144       goto success;
3145    }
3146 
3147    /* Regular allocate (not importing memory). */
3148 
3149    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3150                                 alloc_flags, client_address, &mem->bo);
3151    if (result != VK_SUCCESS)
3152       goto fail;
3153 
3154    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3155       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3156 
3157       /* Some legacy (non-modifiers) consumers need the tiling to be set on
3158        * the BO.  In this case, we have a dedicated allocation.
3159        */
3160       if (image->vk.wsi_legacy_scanout) {
3161          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3162          result = anv_device_set_bo_tiling(device, mem->bo,
3163                                            surf->row_pitch_B,
3164                                            surf->tiling);
3165          if (result != VK_SUCCESS) {
3166             anv_device_release_bo(device, mem->bo);
3167             goto fail;
3168          }
3169       }
3170    }
3171 
3172  success:
3173    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3174    if (mem_heap_used > mem_heap->size) {
3175       p_atomic_add(&mem_heap->used, -mem->bo->size);
3176       anv_device_release_bo(device, mem->bo);
3177       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3178                          "Out of heap memory");
3179       goto fail;
3180    }
3181 
3182    pthread_mutex_lock(&device->mutex);
3183    list_addtail(&mem->link, &device->memory_objects);
3184    pthread_mutex_unlock(&device->mutex);
3185 
3186    *pMem = anv_device_memory_to_handle(mem);
3187 
3188    return VK_SUCCESS;
3189 
3190  fail:
3191    vk_object_free(&device->vk, pAllocator, mem);
3192 
3193    return result;
3194 }
3195 
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3196 VkResult anv_GetMemoryFdKHR(
3197     VkDevice                                    device_h,
3198     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
3199     int*                                        pFd)
3200 {
3201    ANV_FROM_HANDLE(anv_device, dev, device_h);
3202    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3203 
3204    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3205 
3206    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3207           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3208 
3209    return anv_device_export_bo(dev, mem->bo, pFd);
3210 }
3211 
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3212 VkResult anv_GetMemoryFdPropertiesKHR(
3213     VkDevice                                    _device,
3214     VkExternalMemoryHandleTypeFlagBits          handleType,
3215     int                                         fd,
3216     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
3217 {
3218    ANV_FROM_HANDLE(anv_device, device, _device);
3219 
3220    switch (handleType) {
3221    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3222       /* dma-buf can be imported as any memory type */
3223       pMemoryFdProperties->memoryTypeBits =
3224          (1 << device->physical->memory.type_count) - 1;
3225       return VK_SUCCESS;
3226 
3227    default:
3228       /* The valid usage section for this function says:
3229        *
3230        *    "handleType must not be one of the handle types defined as
3231        *    opaque."
3232        *
3233        * So opaque handle types fall into the default "unsupported" case.
3234        */
3235       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3236    }
3237 }
3238 
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3239 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3240    VkDevice                                    _device,
3241    VkExternalMemoryHandleTypeFlagBits          handleType,
3242    const void*                                 pHostPointer,
3243    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
3244 {
3245    ANV_FROM_HANDLE(anv_device, device, _device);
3246 
3247    assert(pMemoryHostPointerProperties->sType ==
3248           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3249 
3250    switch (handleType) {
3251    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3252       /* Host memory can be imported as any memory type. */
3253       pMemoryHostPointerProperties->memoryTypeBits =
3254          (1ull << device->physical->memory.type_count) - 1;
3255 
3256       return VK_SUCCESS;
3257 
3258    default:
3259       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3260    }
3261 }
3262 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3263 void anv_FreeMemory(
3264     VkDevice                                    _device,
3265     VkDeviceMemory                              _mem,
3266     const VkAllocationCallbacks*                pAllocator)
3267 {
3268    ANV_FROM_HANDLE(anv_device, device, _device);
3269    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3270 
3271    if (mem == NULL)
3272       return;
3273 
3274    pthread_mutex_lock(&device->mutex);
3275    list_del(&mem->link);
3276    pthread_mutex_unlock(&device->mutex);
3277 
3278    if (mem->map)
3279       anv_UnmapMemory(_device, _mem);
3280 
3281    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3282                 -mem->bo->size);
3283 
3284    anv_device_release_bo(device, mem->bo);
3285 
3286 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3287    if (mem->ahw)
3288       AHardwareBuffer_release(mem->ahw);
3289 #endif
3290 
3291    vk_object_free(&device->vk, pAllocator, mem);
3292 }
3293 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3294 VkResult anv_MapMemory(
3295     VkDevice                                    _device,
3296     VkDeviceMemory                              _memory,
3297     VkDeviceSize                                offset,
3298     VkDeviceSize                                size,
3299     VkMemoryMapFlags                            flags,
3300     void**                                      ppData)
3301 {
3302    ANV_FROM_HANDLE(anv_device, device, _device);
3303    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3304 
3305    if (mem == NULL) {
3306       *ppData = NULL;
3307       return VK_SUCCESS;
3308    }
3309 
3310    if (mem->host_ptr) {
3311       *ppData = mem->host_ptr + offset;
3312       return VK_SUCCESS;
3313    }
3314 
3315    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3316     *
3317     *  * memory must have been created with a memory type that reports
3318     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3319     */
3320    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3321       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3322                        "Memory object not mappable.");
3323    }
3324 
3325    if (size == VK_WHOLE_SIZE)
3326       size = mem->bo->size - offset;
3327 
3328    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3329     *
3330     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3331     *    assert(size != 0);
3332     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3333     *    equal to the size of the memory minus offset
3334     */
3335    assert(size > 0);
3336    assert(offset + size <= mem->bo->size);
3337 
3338    if (size != (size_t)size) {
3339       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3340                        "requested size 0x%"PRIx64" does not fit in %u bits",
3341                        size, (unsigned)(sizeof(size_t) * 8));
3342    }
3343 
3344    /* From the Vulkan 1.2.194 spec:
3345     *
3346     *    "memory must not be currently host mapped"
3347     */
3348    if (mem->map != NULL) {
3349       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3350                        "Memory object already mapped.");
3351    }
3352 
3353    uint32_t gem_flags = 0;
3354 
3355    if (!device->info->has_llc &&
3356        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3357       gem_flags |= I915_MMAP_WC;
3358 
3359    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
3360    uint64_t map_offset;
3361    if (!device->physical->info.has_mmap_offset)
3362       map_offset = offset & ~4095ull;
3363    else
3364       map_offset = 0;
3365    assert(offset >= map_offset);
3366    uint64_t map_size = (offset + size) - map_offset;
3367 
3368    /* Let's map whole pages */
3369    map_size = align64(map_size, 4096);
3370 
3371    void *map;
3372    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3373                                        map_size, gem_flags, &map);
3374    if (result != VK_SUCCESS)
3375       return result;
3376 
3377    mem->map = map;
3378    mem->map_size = map_size;
3379    mem->map_delta = (offset - map_offset);
3380    *ppData = mem->map + mem->map_delta;
3381 
3382    return VK_SUCCESS;
3383 }
3384 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3385 void anv_UnmapMemory(
3386     VkDevice                                    _device,
3387     VkDeviceMemory                              _memory)
3388 {
3389    ANV_FROM_HANDLE(anv_device, device, _device);
3390    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3391 
3392    if (mem == NULL || mem->host_ptr)
3393       return;
3394 
3395    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3396 
3397    mem->map = NULL;
3398    mem->map_size = 0;
3399    mem->map_delta = 0;
3400 }
3401 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3402 VkResult anv_FlushMappedMemoryRanges(
3403     VkDevice                                    _device,
3404     uint32_t                                    memoryRangeCount,
3405     const VkMappedMemoryRange*                  pMemoryRanges)
3406 {
3407    ANV_FROM_HANDLE(anv_device, device, _device);
3408 
3409    if (!device->physical->memory.need_flush)
3410       return VK_SUCCESS;
3411 
3412 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3413    /* Make sure the writes we're flushing have landed. */
3414    __builtin_ia32_mfence();
3415 #endif
3416 
3417    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3418       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3419       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3420          continue;
3421 
3422       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3423       if (map_offset >= mem->map_size)
3424          continue;
3425 
3426 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3427       intel_flush_range(mem->map + map_offset,
3428                         MIN2(pMemoryRanges[i].size,
3429                              mem->map_size - map_offset));
3430 #endif
3431    }
3432 
3433    return VK_SUCCESS;
3434 }
3435 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3436 VkResult anv_InvalidateMappedMemoryRanges(
3437     VkDevice                                    _device,
3438     uint32_t                                    memoryRangeCount,
3439     const VkMappedMemoryRange*                  pMemoryRanges)
3440 {
3441    ANV_FROM_HANDLE(anv_device, device, _device);
3442 
3443    if (!device->physical->memory.need_flush)
3444       return VK_SUCCESS;
3445 
3446    for (uint32_t i = 0; i < memoryRangeCount; i++) {
3447       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3448       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3449          continue;
3450 
3451       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3452       if (map_offset >= mem->map_size)
3453          continue;
3454 
3455 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3456       intel_invalidate_range(mem->map + map_offset,
3457                              MIN2(pMemoryRanges[i].size,
3458                                   mem->map_size - map_offset));
3459 #endif
3460    }
3461 
3462 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3463    /* Make sure no reads get moved up above the invalidate. */
3464    __builtin_ia32_mfence();
3465 #endif
3466 
3467    return VK_SUCCESS;
3468 }
3469 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3470 void anv_GetDeviceMemoryCommitment(
3471     VkDevice                                    device,
3472     VkDeviceMemory                              memory,
3473     VkDeviceSize*                               pCommittedMemoryInBytes)
3474 {
3475    *pCommittedMemoryInBytes = 0;
3476 }
3477 
3478 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3479 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3480 {
3481    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3482    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3483 
3484    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3485 
3486    if (mem) {
3487       assert(pBindInfo->memoryOffset < mem->bo->size);
3488       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3489       buffer->address = (struct anv_address) {
3490          .bo = mem->bo,
3491          .offset = pBindInfo->memoryOffset,
3492       };
3493    } else {
3494       buffer->address = ANV_NULL_ADDRESS;
3495    }
3496 }
3497 
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3498 VkResult anv_BindBufferMemory2(
3499     VkDevice                                    device,
3500     uint32_t                                    bindInfoCount,
3501     const VkBindBufferMemoryInfo*               pBindInfos)
3502 {
3503    for (uint32_t i = 0; i < bindInfoCount; i++)
3504       anv_bind_buffer_memory(&pBindInfos[i]);
3505 
3506    return VK_SUCCESS;
3507 }
3508 
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3509 VkResult anv_QueueBindSparse(
3510     VkQueue                                     _queue,
3511     uint32_t                                    bindInfoCount,
3512     const VkBindSparseInfo*                     pBindInfo,
3513     VkFence                                     fence)
3514 {
3515    ANV_FROM_HANDLE(anv_queue, queue, _queue);
3516    if (vk_device_is_lost(&queue->device->vk))
3517       return VK_ERROR_DEVICE_LOST;
3518 
3519    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3520 }
3521 
3522 // Event functions
3523 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3524 VkResult anv_CreateEvent(
3525     VkDevice                                    _device,
3526     const VkEventCreateInfo*                    pCreateInfo,
3527     const VkAllocationCallbacks*                pAllocator,
3528     VkEvent*                                    pEvent)
3529 {
3530    ANV_FROM_HANDLE(anv_device, device, _device);
3531    struct anv_event *event;
3532 
3533    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3534 
3535    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3536                            VK_OBJECT_TYPE_EVENT);
3537    if (event == NULL)
3538       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3539 
3540    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3541                                        sizeof(uint64_t), 8);
3542    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3543 
3544    *pEvent = anv_event_to_handle(event);
3545 
3546    return VK_SUCCESS;
3547 }
3548 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3549 void anv_DestroyEvent(
3550     VkDevice                                    _device,
3551     VkEvent                                     _event,
3552     const VkAllocationCallbacks*                pAllocator)
3553 {
3554    ANV_FROM_HANDLE(anv_device, device, _device);
3555    ANV_FROM_HANDLE(anv_event, event, _event);
3556 
3557    if (!event)
3558       return;
3559 
3560    anv_state_pool_free(&device->dynamic_state_pool, event->state);
3561 
3562    vk_object_free(&device->vk, pAllocator, event);
3563 }
3564 
anv_GetEventStatus(VkDevice _device,VkEvent _event)3565 VkResult anv_GetEventStatus(
3566     VkDevice                                    _device,
3567     VkEvent                                     _event)
3568 {
3569    ANV_FROM_HANDLE(anv_device, device, _device);
3570    ANV_FROM_HANDLE(anv_event, event, _event);
3571 
3572    if (vk_device_is_lost(&device->vk))
3573       return VK_ERROR_DEVICE_LOST;
3574 
3575    return *(uint64_t *)event->state.map;
3576 }
3577 
anv_SetEvent(VkDevice _device,VkEvent _event)3578 VkResult anv_SetEvent(
3579     VkDevice                                    _device,
3580     VkEvent                                     _event)
3581 {
3582    ANV_FROM_HANDLE(anv_event, event, _event);
3583 
3584    *(uint64_t *)event->state.map = VK_EVENT_SET;
3585 
3586    return VK_SUCCESS;
3587 }
3588 
anv_ResetEvent(VkDevice _device,VkEvent _event)3589 VkResult anv_ResetEvent(
3590     VkDevice                                    _device,
3591     VkEvent                                     _event)
3592 {
3593    ANV_FROM_HANDLE(anv_event, event, _event);
3594 
3595    *(uint64_t *)event->state.map = VK_EVENT_RESET;
3596 
3597    return VK_SUCCESS;
3598 }
3599 
3600 // Buffer functions
3601 
3602 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3603 anv_get_buffer_memory_requirements(struct anv_device *device,
3604                                    VkDeviceSize size,
3605                                    VkBufferUsageFlags usage,
3606                                    VkMemoryRequirements2* pMemoryRequirements)
3607 {
3608    /* The Vulkan spec (git aaed022) says:
3609     *
3610     *    memoryTypeBits is a bitfield and contains one bit set for every
3611     *    supported memory type for the resource. The bit `1<<i` is set if and
3612     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3613     *    structure for the physical device is supported.
3614     */
3615    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3616 
3617    /* Base alignment requirement of a cache line */
3618    uint32_t alignment = 16;
3619 
3620    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3621       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3622 
3623    pMemoryRequirements->memoryRequirements.size = size;
3624    pMemoryRequirements->memoryRequirements.alignment = alignment;
3625 
3626    /* Storage and Uniform buffers should have their size aligned to
3627     * 32-bits to avoid boundary checks when last DWord is not complete.
3628     * This would ensure that not internal padding would be needed for
3629     * 16-bit types.
3630     */
3631    if (device->vk.enabled_features.robustBufferAccess &&
3632        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3633         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3634       pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3635 
3636    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3637 
3638    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3639       switch (ext->sType) {
3640       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3641          VkMemoryDedicatedRequirements *requirements = (void *)ext;
3642          requirements->prefersDedicatedAllocation = false;
3643          requirements->requiresDedicatedAllocation = false;
3644          break;
3645       }
3646 
3647       default:
3648          vk_debug_ignored_stype(ext->sType);
3649          break;
3650       }
3651    }
3652 }
3653 
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3654 void anv_GetBufferMemoryRequirements2(
3655     VkDevice                                    _device,
3656     const VkBufferMemoryRequirementsInfo2*      pInfo,
3657     VkMemoryRequirements2*                      pMemoryRequirements)
3658 {
3659    ANV_FROM_HANDLE(anv_device, device, _device);
3660    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3661 
3662    anv_get_buffer_memory_requirements(device,
3663                                       buffer->vk.size,
3664                                       buffer->vk.usage,
3665                                       pMemoryRequirements);
3666 }
3667 
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3668 void anv_GetDeviceBufferMemoryRequirements(
3669     VkDevice                                    _device,
3670     const VkDeviceBufferMemoryRequirements*     pInfo,
3671     VkMemoryRequirements2*                      pMemoryRequirements)
3672 {
3673    ANV_FROM_HANDLE(anv_device, device, _device);
3674 
3675    anv_get_buffer_memory_requirements(device,
3676                                       pInfo->pCreateInfo->size,
3677                                       pInfo->pCreateInfo->usage,
3678                                       pMemoryRequirements);
3679 }
3680 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3681 VkResult anv_CreateBuffer(
3682     VkDevice                                    _device,
3683     const VkBufferCreateInfo*                   pCreateInfo,
3684     const VkAllocationCallbacks*                pAllocator,
3685     VkBuffer*                                   pBuffer)
3686 {
3687    ANV_FROM_HANDLE(anv_device, device, _device);
3688    struct anv_buffer *buffer;
3689 
3690    /* Don't allow creating buffers bigger than our address space.  The real
3691     * issue here is that we may align up the buffer size and we don't want
3692     * doing so to cause roll-over.  However, no one has any business
3693     * allocating a buffer larger than our GTT size.
3694     */
3695    if (pCreateInfo->size > device->physical->gtt_size)
3696       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3697 
3698    buffer = vk_buffer_create(&device->vk, pCreateInfo,
3699                              pAllocator, sizeof(*buffer));
3700    if (buffer == NULL)
3701       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3702 
3703    buffer->address = ANV_NULL_ADDRESS;
3704 
3705    *pBuffer = anv_buffer_to_handle(buffer);
3706 
3707    return VK_SUCCESS;
3708 }
3709 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3710 void anv_DestroyBuffer(
3711     VkDevice                                    _device,
3712     VkBuffer                                    _buffer,
3713     const VkAllocationCallbacks*                pAllocator)
3714 {
3715    ANV_FROM_HANDLE(anv_device, device, _device);
3716    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3717 
3718    if (!buffer)
3719       return;
3720 
3721    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3722 }
3723 
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3724 VkDeviceAddress anv_GetBufferDeviceAddress(
3725     VkDevice                                    device,
3726     const VkBufferDeviceAddressInfo*            pInfo)
3727 {
3728    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3729 
3730    assert(!anv_address_is_null(buffer->address));
3731    assert(anv_bo_is_pinned(buffer->address.bo));
3732 
3733    return anv_address_physical(buffer->address);
3734 }
3735 
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3736 uint64_t anv_GetBufferOpaqueCaptureAddress(
3737     VkDevice                                    device,
3738     const VkBufferDeviceAddressInfo*            pInfo)
3739 {
3740    return 0;
3741 }
3742 
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3743 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3744     VkDevice                                    device,
3745     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3746 {
3747    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3748 
3749    assert(anv_bo_is_pinned(memory->bo));
3750    assert(memory->bo->has_client_visible_address);
3751 
3752    return intel_48b_address(memory->bo->offset);
3753 }
3754 
3755 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3756 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3757                               enum isl_format format,
3758                               struct isl_swizzle swizzle,
3759                               isl_surf_usage_flags_t usage,
3760                               struct anv_address address,
3761                               uint32_t range, uint32_t stride)
3762 {
3763    isl_buffer_fill_state(&device->isl_dev, state.map,
3764                          .address = anv_address_physical(address),
3765                          .mocs = isl_mocs(&device->isl_dev, usage,
3766                                           address.bo && address.bo->is_external),
3767                          .size_B = range,
3768                          .format = format,
3769                          .swizzle = swizzle,
3770                          .stride_B = stride);
3771 }
3772 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3773 void anv_DestroySampler(
3774     VkDevice                                    _device,
3775     VkSampler                                   _sampler,
3776     const VkAllocationCallbacks*                pAllocator)
3777 {
3778    ANV_FROM_HANDLE(anv_device, device, _device);
3779    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3780 
3781    if (!sampler)
3782       return;
3783 
3784    if (sampler->bindless_state.map) {
3785       anv_state_pool_free(&device->dynamic_state_pool,
3786                           sampler->bindless_state);
3787    }
3788 
3789    if (sampler->custom_border_color.map) {
3790       anv_state_reserved_pool_free(&device->custom_border_colors,
3791                                    sampler->custom_border_color);
3792    }
3793 
3794    vk_object_free(&device->vk, pAllocator, sampler);
3795 }
3796 
3797 static const VkTimeDomainEXT anv_time_domains[] = {
3798    VK_TIME_DOMAIN_DEVICE_EXT,
3799    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
3800 #ifdef CLOCK_MONOTONIC_RAW
3801    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
3802 #endif
3803 };
3804 
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)3805 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
3806    VkPhysicalDevice                             physicalDevice,
3807    uint32_t                                     *pTimeDomainCount,
3808    VkTimeDomainEXT                              *pTimeDomains)
3809 {
3810    int d;
3811    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
3812 
3813    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
3814       vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
3815          *i = anv_time_domains[d];
3816       }
3817    }
3818 
3819    return vk_outarray_status(&out);
3820 }
3821 
3822 static uint64_t
anv_clock_gettime(clockid_t clock_id)3823 anv_clock_gettime(clockid_t clock_id)
3824 {
3825    struct timespec current;
3826    int ret;
3827 
3828    ret = clock_gettime(clock_id, &current);
3829 #ifdef CLOCK_MONOTONIC_RAW
3830    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3831       ret = clock_gettime(CLOCK_MONOTONIC, &current);
3832 #endif
3833    if (ret < 0)
3834       return 0;
3835 
3836    return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3837 }
3838 
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)3839 VkResult anv_GetCalibratedTimestampsEXT(
3840    VkDevice                                     _device,
3841    uint32_t                                     timestampCount,
3842    const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
3843    uint64_t                                     *pTimestamps,
3844    uint64_t                                     *pMaxDeviation)
3845 {
3846    ANV_FROM_HANDLE(anv_device, device, _device);
3847    uint64_t timestamp_frequency = device->info->timestamp_frequency;
3848    int d;
3849    uint64_t begin, end;
3850    uint64_t max_clock_period = 0;
3851 
3852 #ifdef CLOCK_MONOTONIC_RAW
3853    begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3854 #else
3855    begin = anv_clock_gettime(CLOCK_MONOTONIC);
3856 #endif
3857 
3858    for (d = 0; d < timestampCount; d++) {
3859       switch (pTimestampInfos[d].timeDomain) {
3860       case VK_TIME_DOMAIN_DEVICE_EXT:
3861          if (!intel_gem_read_render_timestamp(device->fd,
3862                                               device->info->kmd_type,
3863                                               &pTimestamps[d])) {
3864             return vk_device_set_lost(&device->vk, "Failed to read the "
3865                                       "TIMESTAMP register: %m");
3866          }
3867          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
3868          max_clock_period = MAX2(max_clock_period, device_period);
3869          break;
3870       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
3871          pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
3872          max_clock_period = MAX2(max_clock_period, 1);
3873          break;
3874 
3875 #ifdef CLOCK_MONOTONIC_RAW
3876       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
3877          pTimestamps[d] = begin;
3878          break;
3879 #endif
3880       default:
3881          pTimestamps[d] = 0;
3882          break;
3883       }
3884    }
3885 
3886 #ifdef CLOCK_MONOTONIC_RAW
3887    end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3888 #else
3889    end = anv_clock_gettime(CLOCK_MONOTONIC);
3890 #endif
3891 
3892     /*
3893      * The maximum deviation is the sum of the interval over which we
3894      * perform the sampling and the maximum period of any sampled
3895      * clock. That's because the maximum skew between any two sampled
3896      * clock edges is when the sampled clock with the largest period is
3897      * sampled at the end of that period but right at the beginning of the
3898      * sampling interval and some other clock is sampled right at the
3899      * beginning of its sampling period and right at the end of the
3900      * sampling interval. Let's assume the GPU has the longest clock
3901      * period and that the application is sampling GPU and monotonic:
3902      *
3903      *                               s                 e
3904      *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
3905      *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3906      *
3907      *                               g
3908      *		  0         1         2         3
3909      *	GPU       -----_____-----_____-----_____-----_____
3910      *
3911      *                                                m
3912      *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
3913      *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3914      *
3915      *	Interval                     <----------------->
3916      *	Deviation           <-------------------------->
3917      *
3918      *		s  = read(raw)       2
3919      *		g  = read(GPU)       1
3920      *		m  = read(monotonic) 2
3921      *		e  = read(raw)       b
3922      *
3923      * We round the sample interval up by one tick to cover sampling error
3924      * in the interval clock
3925      */
3926 
3927    uint64_t sample_interval = end - begin + 1;
3928 
3929    *pMaxDeviation = sample_interval + max_clock_period;
3930 
3931    return VK_SUCCESS;
3932 }
3933 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3934 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3935     VkPhysicalDevice                            physicalDevice,
3936     VkSampleCountFlagBits                       samples,
3937     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3938 {
3939    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3940 
3941    assert(pMultisampleProperties->sType ==
3942           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3943 
3944    VkExtent2D grid_size;
3945    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
3946       grid_size.width = 1;
3947       grid_size.height = 1;
3948    } else {
3949       grid_size.width = 0;
3950       grid_size.height = 0;
3951    }
3952    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
3953 
3954    vk_foreach_struct(ext, pMultisampleProperties->pNext)
3955       vk_debug_ignored_stype(ext->sType);
3956 }
3957