1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/i915/intel_defines.h"
58 #include "common/intel_debug_identifier.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61
62 #include "genxml/gen7_pack.h"
63 #include "genxml/genX_bits.h"
64
65 static const driOptionDescription anv_dri_options[] = {
66 DRI_CONF_SECTION_PERFORMANCE
67 DRI_CONF_ADAPTIVE_SYNC(true)
68 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72 DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(0)
73 DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74 DRI_CONF_NO_16BIT(false)
75 DRI_CONF_HASVK_OVERRIDE_API_VERSION(false)
76 DRI_CONF_SECTION_END
77
78 DRI_CONF_SECTION_DEBUG
79 DRI_CONF_ALWAYS_FLUSH_CACHE(false)
80 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
81 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
82 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
83 DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
84 DRI_CONF_SECTION_END
85
86 DRI_CONF_SECTION_QUALITY
87 DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
88 DRI_CONF_SECTION_END
89 };
90
91 /* This is probably far to big but it reflects the max size used for messages
92 * in OpenGLs KHR_debug.
93 */
94 #define MAX_DEBUG_MESSAGE_LENGTH 4096
95
96 /* Render engine timestamp register */
97 #define TIMESTAMP 0x2358
98
99 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
100 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
101 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
102 #endif
103
104 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)105 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
106 {
107 char str[MAX_DEBUG_MESSAGE_LENGTH];
108 struct anv_device *device = (struct anv_device *)data;
109 UNUSED struct anv_instance *instance = device->physical->instance;
110
111 va_list args;
112 va_start(args, fmt);
113 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
114 va_end(args);
115
116 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
117 }
118
119 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)120 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
121 {
122 va_list args;
123 va_start(args, fmt);
124
125 if (INTEL_DEBUG(DEBUG_PERF))
126 mesa_logd_v(fmt, args);
127
128 va_end(args);
129 }
130
131 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
132 defined(VK_USE_PLATFORM_XCB_KHR) || \
133 defined(VK_USE_PLATFORM_XLIB_KHR) || \
134 defined(VK_USE_PLATFORM_DISPLAY_KHR)
135 #define ANV_USE_WSI_PLATFORM
136 #endif
137
138 #ifdef ANDROID_STRICT
139 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
140 #else
141 #define ANV_API_VERSION_1_3 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
142 #define ANV_API_VERSION_1_2 VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
143 #endif
144
anv_EnumerateInstanceVersion(uint32_t * pApiVersion)145 VkResult anv_EnumerateInstanceVersion(
146 uint32_t* pApiVersion)
147 {
148 #ifdef ANDROID_STRICT
149 *pApiVersion = ANV_API_VERSION;
150 #else
151 *pApiVersion = ANV_API_VERSION_1_3;
152 #endif
153 return VK_SUCCESS;
154 }
155
156 static const struct vk_instance_extension_table instance_extensions = {
157 .KHR_device_group_creation = true,
158 .KHR_external_fence_capabilities = true,
159 .KHR_external_memory_capabilities = true,
160 .KHR_external_semaphore_capabilities = true,
161 .KHR_get_physical_device_properties2 = true,
162 .EXT_debug_report = true,
163 .EXT_debug_utils = true,
164
165 #ifdef ANV_USE_WSI_PLATFORM
166 .KHR_get_surface_capabilities2 = true,
167 .KHR_surface = true,
168 .KHR_surface_protected_capabilities = true,
169 #endif
170 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
171 .KHR_wayland_surface = true,
172 #endif
173 #ifdef VK_USE_PLATFORM_XCB_KHR
174 .KHR_xcb_surface = true,
175 #endif
176 #ifdef VK_USE_PLATFORM_XLIB_KHR
177 .KHR_xlib_surface = true,
178 #endif
179 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
180 .EXT_acquire_xlib_display = true,
181 #endif
182 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
183 .KHR_display = true,
184 .KHR_get_display_properties2 = true,
185 .EXT_direct_mode_display = true,
186 .EXT_display_surface_counter = true,
187 .EXT_acquire_drm_display = true,
188 #endif
189 #ifndef VK_USE_PLATFORM_WIN32_KHR
190 .EXT_headless_surface = true,
191 #endif
192 };
193
194 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)195 get_device_extensions(const struct anv_physical_device *device,
196 struct vk_device_extension_table *ext)
197 {
198 const bool has_syncobj_wait =
199 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
200
201 *ext = (struct vk_device_extension_table) {
202 .KHR_8bit_storage = device->info.ver >= 8,
203 .KHR_16bit_storage = device->info.ver >= 8 && !device->instance->no_16bit,
204 .KHR_bind_memory2 = true,
205 .KHR_buffer_device_address = device->has_a64_buffer_access,
206 .KHR_copy_commands2 = true,
207 .KHR_create_renderpass2 = true,
208 .KHR_dedicated_allocation = true,
209 .KHR_deferred_host_operations = true,
210 .KHR_depth_stencil_resolve = true,
211 .KHR_descriptor_update_template = true,
212 .KHR_device_group = true,
213 .KHR_draw_indirect_count = true,
214 .KHR_driver_properties = true,
215 .KHR_dynamic_rendering = true,
216 .KHR_external_fence = has_syncobj_wait,
217 .KHR_external_fence_fd = has_syncobj_wait,
218 .KHR_external_memory = true,
219 .KHR_external_memory_fd = true,
220 .KHR_external_semaphore = true,
221 .KHR_external_semaphore_fd = true,
222 .KHR_format_feature_flags2 = true,
223 .KHR_get_memory_requirements2 = true,
224 .KHR_image_format_list = true,
225 .KHR_imageless_framebuffer = true,
226 #ifdef ANV_USE_WSI_PLATFORM
227 .KHR_incremental_present = true,
228 #endif
229 .KHR_maintenance1 = true,
230 .KHR_maintenance2 = true,
231 .KHR_maintenance3 = true,
232 .KHR_maintenance4 = true,
233 .KHR_multiview = true,
234 .KHR_performance_query =
235 !anv_use_relocations(device) && device->perf &&
236 (intel_perf_has_hold_preemption(device->perf) ||
237 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
238 device->use_call_secondary,
239 .KHR_pipeline_executable_properties = true,
240 /* Hide these behind dri configs for now since we cannot implement it reliably on
241 * all surfaces yet. There is no surface capability query for present wait/id,
242 * but the feature is useful enough to hide behind an opt-in mechanism for now.
243 * If the instance only enables surface extensions that unconditionally support present wait,
244 * we can also expose the extension that way. */
245 .KHR_present_id =
246 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
247 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
248 .KHR_present_wait =
249 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
250 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
251 .KHR_push_descriptor = true,
252 .KHR_relaxed_block_layout = true,
253 .KHR_sampler_mirror_clamp_to_edge = true,
254 .KHR_sampler_ycbcr_conversion = true,
255 .KHR_separate_depth_stencil_layouts = true,
256 .KHR_shader_clock = true,
257 .KHR_shader_draw_parameters = true,
258 .KHR_shader_expect_assume = true,
259 .KHR_shader_float16_int8 = device->info.ver >= 8 && !device->instance->no_16bit,
260 .KHR_shader_float_controls = true,
261 .KHR_shader_integer_dot_product = true,
262 .KHR_shader_non_semantic_info = true,
263 .KHR_shader_relaxed_extended_instruction = true,
264 .KHR_shader_subgroup_extended_types = device->info.ver >= 8,
265 .KHR_shader_subgroup_uniform_control_flow = true,
266 .KHR_shader_terminate_invocation = true,
267 .KHR_spirv_1_4 = true,
268 .KHR_storage_buffer_storage_class = true,
269 #ifdef ANV_USE_WSI_PLATFORM
270 .KHR_swapchain = true,
271 .KHR_swapchain_mutable_format = true,
272 #endif
273 .KHR_synchronization2 = true,
274 .KHR_timeline_semaphore = true,
275 .KHR_uniform_buffer_standard_layout = true,
276 .KHR_variable_pointers = true,
277 .KHR_vulkan_memory_model = true,
278 .KHR_workgroup_memory_explicit_layout = true,
279 .KHR_zero_initialize_workgroup_memory = true,
280 .EXT_4444_formats = true,
281 .EXT_border_color_swizzle = device->info.ver >= 8,
282 .EXT_buffer_device_address = device->has_a64_buffer_access,
283 .EXT_calibrated_timestamps = device->has_reg_timestamp,
284 .EXT_color_write_enable = true,
285 .EXT_conditional_rendering = device->info.verx10 >= 75,
286 .EXT_custom_border_color = device->info.ver >= 8,
287 .EXT_depth_clamp_zero_one = true,
288 .EXT_depth_clip_control = true,
289 .EXT_depth_clip_enable = true,
290 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
291 .EXT_display_control = true,
292 #endif
293 .EXT_extended_dynamic_state = true,
294 .EXT_extended_dynamic_state2 = true,
295 .EXT_external_memory_dma_buf = true,
296 .EXT_external_memory_host = true,
297 .EXT_global_priority = device->max_context_priority >=
298 INTEL_CONTEXT_MEDIUM_PRIORITY,
299 .EXT_global_priority_query = device->max_context_priority >=
300 INTEL_CONTEXT_MEDIUM_PRIORITY,
301 .EXT_host_query_reset = true,
302 .EXT_image_2d_view_of_3d = true,
303 .EXT_image_robustness = true,
304 .EXT_image_drm_format_modifier = true,
305 .EXT_image_view_min_lod = true,
306 .EXT_index_type_uint8 = true,
307 .EXT_inline_uniform_block = true,
308 .EXT_line_rasterization = true,
309 /* Enable the extension only if we have support on both the local &
310 * system memory
311 */
312 .EXT_memory_budget = device->sys.available,
313 .EXT_non_seamless_cube_map = true,
314 .EXT_pci_bus_info = true,
315 .EXT_physical_device_drm = true,
316 .EXT_pipeline_creation_cache_control = true,
317 .EXT_pipeline_creation_feedback = true,
318 .EXT_primitives_generated_query = true,
319 .EXT_primitive_topology_list_restart = true,
320 .EXT_private_data = true,
321 .EXT_provoking_vertex = true,
322 .EXT_queue_family_foreign = true,
323 .EXT_robustness2 = true,
324 .EXT_sample_locations = true,
325 .EXT_scalar_block_layout = true,
326 .EXT_separate_stencil_usage = true,
327 .EXT_shader_atomic_float = true,
328 .EXT_shader_demote_to_helper_invocation = true,
329 .EXT_shader_module_identifier = true,
330 .EXT_shader_replicated_composites = true,
331 .EXT_shader_subgroup_ballot = true,
332 .EXT_shader_subgroup_vote = true,
333 .EXT_shader_viewport_index_layer = true,
334 .EXT_subgroup_size_control = true,
335 .EXT_texel_buffer_alignment = true,
336 .EXT_tooling_info = true,
337 .EXT_transform_feedback = true,
338 .EXT_vertex_attribute_divisor = true,
339 .EXT_ycbcr_image_arrays = true,
340 #if DETECT_OS_ANDROID
341 .ANDROID_external_memory_android_hardware_buffer = true,
342 .ANDROID_native_buffer = true,
343 #endif
344 .GOOGLE_decorate_string = true,
345 .GOOGLE_hlsl_functionality1 = true,
346 .GOOGLE_user_type = true,
347 .INTEL_performance_query = device->perf &&
348 intel_perf_has_hold_preemption(device->perf),
349 .INTEL_shader_integer_functions2 = device->info.ver >= 8,
350 .EXT_multi_draw = true,
351 .NV_compute_shader_derivatives = true,
352 .VALVE_mutable_descriptor_type = true,
353 };
354 }
355
356 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)357 get_features(const struct anv_physical_device *pdevice,
358 struct vk_features *features)
359 {
360 /* Just pick one; they're all the same */
361 const bool has_astc_ldr =
362 isl_format_supports_sampling(&pdevice->info,
363 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
364
365 *features = (struct vk_features) {
366 /* Vulkan 1.0 */
367 .robustBufferAccess = true,
368 .fullDrawIndexUint32 = true,
369 .imageCubeArray = true,
370 .independentBlend = true,
371 .geometryShader = true,
372 .tessellationShader = true,
373 .sampleRateShading = true,
374 .dualSrcBlend = true,
375 .logicOp = true,
376 .multiDrawIndirect = true,
377 .drawIndirectFirstInstance = true,
378 .depthClamp = true,
379 .depthBiasClamp = true,
380 .fillModeNonSolid = true,
381 .depthBounds = pdevice->info.ver >= 12,
382 .wideLines = true,
383 .largePoints = true,
384 .alphaToOne = true,
385 .multiViewport = true,
386 .samplerAnisotropy = true,
387 .textureCompressionETC2 = pdevice->info.ver >= 8 ||
388 pdevice->info.platform == INTEL_PLATFORM_BYT,
389 .textureCompressionASTC_LDR = has_astc_ldr,
390 .textureCompressionBC = true,
391 .occlusionQueryPrecise = true,
392 .pipelineStatisticsQuery = true,
393 .fragmentStoresAndAtomics = true,
394 .shaderTessellationAndGeometryPointSize = true,
395 .shaderImageGatherExtended = true,
396 .shaderStorageImageExtendedFormats = true,
397 .shaderStorageImageMultisample = false,
398 .shaderStorageImageReadWithoutFormat = false,
399 .shaderStorageImageWriteWithoutFormat = true,
400 .shaderUniformBufferArrayDynamicIndexing = true,
401 .shaderSampledImageArrayDynamicIndexing = true,
402 .shaderStorageBufferArrayDynamicIndexing = true,
403 .shaderStorageImageArrayDynamicIndexing = true,
404 .shaderClipDistance = true,
405 .shaderCullDistance = true,
406 .shaderFloat64 = pdevice->info.ver >= 8 &&
407 pdevice->info.has_64bit_float,
408 .shaderInt64 = pdevice->info.ver >= 8,
409 .shaderInt16 = pdevice->info.ver >= 8,
410 .shaderResourceMinLod = false,
411 .variableMultisampleRate = true,
412 .inheritedQueries = true,
413
414 /* Vulkan 1.1 */
415 .storageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
416 .uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
417 .storagePushConstant16 = pdevice->info.ver >= 8,
418 .storageInputOutput16 = false,
419 .multiview = true,
420 .multiviewGeometryShader = true,
421 .multiviewTessellationShader = true,
422 .variablePointersStorageBuffer = true,
423 .variablePointers = true,
424 .protectedMemory = false,
425 .samplerYcbcrConversion = true,
426 .shaderDrawParameters = true,
427
428 /* Vulkan 1.2 */
429 .samplerMirrorClampToEdge = true,
430 .drawIndirectCount = true,
431 .storageBuffer8BitAccess = pdevice->info.ver >= 8,
432 .uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8,
433 .storagePushConstant8 = pdevice->info.ver >= 8,
434 .shaderBufferInt64Atomics = false,
435 .shaderSharedInt64Atomics = false,
436 .shaderFloat16 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
437 .shaderInt8 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit,
438
439 .descriptorIndexing = false,
440 .shaderInputAttachmentArrayDynamicIndexing = false,
441 .shaderUniformTexelBufferArrayDynamicIndexing = false,
442 .shaderStorageTexelBufferArrayDynamicIndexing = false,
443 .shaderUniformBufferArrayNonUniformIndexing = false,
444 .shaderSampledImageArrayNonUniformIndexing = false,
445 .shaderStorageBufferArrayNonUniformIndexing = false,
446 .shaderStorageImageArrayNonUniformIndexing = false,
447 .shaderInputAttachmentArrayNonUniformIndexing = false,
448 .shaderUniformTexelBufferArrayNonUniformIndexing = false,
449 .shaderStorageTexelBufferArrayNonUniformIndexing = false,
450 .descriptorBindingUniformBufferUpdateAfterBind = false,
451 .descriptorBindingSampledImageUpdateAfterBind = false,
452 .descriptorBindingStorageImageUpdateAfterBind = false,
453 .descriptorBindingStorageBufferUpdateAfterBind = false,
454 .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
455 .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
456 .descriptorBindingUpdateUnusedWhilePending = false,
457 .descriptorBindingPartiallyBound = false,
458 .descriptorBindingVariableDescriptorCount = false,
459 .runtimeDescriptorArray = false,
460
461 .samplerFilterMinmax = false,
462 .scalarBlockLayout = true,
463 .imagelessFramebuffer = true,
464 .uniformBufferStandardLayout = true,
465 .shaderSubgroupExtendedTypes = true,
466 .separateDepthStencilLayouts = true,
467 .hostQueryReset = true,
468 .timelineSemaphore = true,
469 .bufferDeviceAddress = pdevice->has_a64_buffer_access,
470 .bufferDeviceAddressCaptureReplay = pdevice->has_a64_buffer_access,
471 .bufferDeviceAddressMultiDevice = false,
472 .vulkanMemoryModel = true,
473 .vulkanMemoryModelDeviceScope = true,
474 .vulkanMemoryModelAvailabilityVisibilityChains = true,
475 .shaderOutputViewportIndex = true,
476 .shaderOutputLayer = true,
477 .subgroupBroadcastDynamicId = true,
478
479 /* Vulkan 1.3 */
480 .robustImageAccess = true,
481 .inlineUniformBlock = true,
482 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
483 .pipelineCreationCacheControl = true,
484 .privateData = true,
485 .shaderDemoteToHelperInvocation = true,
486 .shaderTerminateInvocation = true,
487 .subgroupSizeControl = true,
488 .computeFullSubgroups = true,
489 .synchronization2 = true,
490 .textureCompressionASTC_HDR = false,
491 .shaderZeroInitializeWorkgroupMemory = true,
492 .dynamicRendering = true,
493 .shaderIntegerDotProduct = true,
494 .maintenance4 = true,
495
496 /* VK_EXT_4444_formats */
497 .formatA4R4G4B4 = true,
498 .formatA4B4G4R4 = false,
499
500 /* VK_EXT_border_color_swizzle */
501 .borderColorSwizzle = true,
502 .borderColorSwizzleFromImage = true,
503
504 /* VK_EXT_color_write_enable */
505 .colorWriteEnable = true,
506
507 /* VK_EXT_image_2d_view_of_3d */
508 .image2DViewOf3D = true,
509 .sampler2DViewOf3D = false,
510
511 /* VK_NV_compute_shader_derivatives */
512 .computeDerivativeGroupQuads = true,
513 .computeDerivativeGroupLinear = true,
514
515 /* VK_EXT_conditional_rendering */
516 .conditionalRendering = pdevice->info.verx10 >= 75,
517 .inheritedConditionalRendering = pdevice->info.verx10 >= 75,
518
519 /* VK_EXT_custom_border_color */
520 .customBorderColors = pdevice->info.ver >= 8,
521 .customBorderColorWithoutFormat = pdevice->info.ver >= 8,
522
523 /* VK_EXT_depth_clamp_zero_one */
524 .depthClampZeroOne = true,
525
526 /* VK_EXT_depth_clip_enable */
527 .depthClipEnable = true,
528
529 /* VK_KHR_global_priority */
530 .globalPriorityQuery = true,
531
532 /* VK_EXT_image_view_min_lod */
533 .minLod = true,
534
535 /* VK_EXT_index_type_uint8 */
536 .indexTypeUint8 = true,
537
538 /* VK_EXT_line_rasterization */
539 /* Rectangular lines must use the strict algorithm, which is not
540 * supported for wide lines prior to ICL. See rasterization_mode for
541 * details and how the HW states are programmed.
542 */
543 .rectangularLines = false,
544 .bresenhamLines = true,
545 /* Support for Smooth lines with MSAA was removed on gfx11. From the
546 * BSpec section "Multisample ModesState" table for "AA Line Support
547 * Requirements":
548 *
549 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
550 *
551 * Fortunately, this isn't a case most people care about.
552 */
553 .smoothLines = pdevice->info.ver < 10,
554 .stippledRectangularLines = false,
555 .stippledBresenhamLines = true,
556 .stippledSmoothLines = false,
557
558 /* VK_EXT_mutable_descriptor_type */
559 .mutableDescriptorType = true,
560
561 /* VK_KHR_performance_query */
562 .performanceCounterQueryPools = true,
563 /* HW only supports a single configuration at a time. */
564 .performanceCounterMultipleQueryPools = false,
565
566 /* VK_KHR_pipeline_executable_properties */
567 .pipelineExecutableInfo = true,
568
569 /* VK_EXT_primitives_generated_query */
570 .primitivesGeneratedQuery = true,
571 .primitivesGeneratedQueryWithRasterizerDiscard = false,
572 .primitivesGeneratedQueryWithNonZeroStreams = false,
573
574 /* VK_EXT_provoking_vertex */
575 .provokingVertexLast = true,
576 .transformFeedbackPreservesProvokingVertex = true,
577
578 /* VK_EXT_robustness2 */
579 .robustBufferAccess2 = true,
580 .robustImageAccess2 = true,
581 .nullDescriptor = true,
582
583 /* VK_EXT_shader_atomic_float */
584 .shaderBufferFloat32Atomics = true,
585 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
586 .shaderBufferFloat64Atomics =
587 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
588 .shaderBufferFloat64AtomicAdd = false,
589 .shaderSharedFloat32Atomics = true,
590 .shaderSharedFloat32AtomicAdd = false,
591 .shaderSharedFloat64Atomics = false,
592 .shaderSharedFloat64AtomicAdd = false,
593 .shaderImageFloat32Atomics = true,
594 .shaderImageFloat32AtomicAdd = false,
595 .sparseImageFloat32Atomics = false,
596 .sparseImageFloat32AtomicAdd = false,
597
598 /* VK_KHR_shader_clock */
599 .shaderSubgroupClock = true,
600 .shaderDeviceClock = false,
601
602 /* VK_INTEL_shader_integer_functions2 */
603 .shaderIntegerFunctions2 = true,
604
605 /* VK_EXT_shader_module_identifier */
606 .shaderModuleIdentifier = true,
607
608 /* VK_EXT_shader_replicated_composites */
609 .shaderReplicatedComposites = true,
610
611 /* VK_KHR_shader_subgroup_uniform_control_flow */
612 .shaderSubgroupUniformControlFlow = true,
613
614 /* VK_EXT_texel_buffer_alignment */
615 .texelBufferAlignment = true,
616
617 /* VK_EXT_transform_feedback */
618 .transformFeedback = true,
619 .geometryStreams = true,
620
621 /* VK_EXT_vertex_attribute_divisor */
622 .vertexAttributeInstanceRateDivisor = true,
623 .vertexAttributeInstanceRateZeroDivisor = true,
624
625 /* VK_KHR_workgroup_memory_explicit_layout */
626 .workgroupMemoryExplicitLayout = true,
627 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
628 .workgroupMemoryExplicitLayout8BitAccess = true,
629 .workgroupMemoryExplicitLayout16BitAccess = true,
630
631 /* VK_EXT_ycbcr_image_arrays */
632 .ycbcrImageArrays = true,
633
634 /* VK_EXT_extended_dynamic_state */
635 .extendedDynamicState = true,
636
637 /* VK_EXT_extended_dynamic_state2 */
638 .extendedDynamicState2 = true,
639 .extendedDynamicState2LogicOp = true,
640 .extendedDynamicState2PatchControlPoints = false,
641
642 /* VK_EXT_multi_draw */
643 .multiDraw = true,
644
645 /* VK_EXT_non_seamless_cube_map */
646 .nonSeamlessCubeMap = true,
647
648 /* VK_EXT_primitive_topology_list_restart */
649 .primitiveTopologyListRestart = true,
650 .primitiveTopologyPatchListRestart = true,
651
652 /* VK_EXT_depth_clip_control */
653 .depthClipControl = true,
654
655 /* VK_KHR_present_id */
656 .presentId = pdevice->vk.supported_extensions.KHR_present_id,
657
658 /* VK_KHR_present_wait */
659 .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
660
661 /* VK_KHR_shader_expect_assume */
662 .shaderExpectAssume = true,
663
664 /* VK_KHR_shader_relaxed_extended_instruction */
665 .shaderRelaxedExtendedInstruction = true,
666 };
667
668 /* We can't do image stores in vec4 shaders */
669 features->vertexPipelineStoresAndAtomics =
670 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
671 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
672
673 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
674
675 /* The new DOOM and Wolfenstein games require depthBounds without
676 * checking for it. They seem to run fine without it so just claim it's
677 * there and accept the consequences.
678 */
679 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
680 features->depthBounds = true;
681 }
682
683
684 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
685
686 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
687 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
688
689 #define MAX_CUSTOM_BORDER_COLORS 4096
690
691 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)692 get_properties_1_1(const struct anv_physical_device *pdevice,
693 struct vk_properties *p)
694 {
695 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
696 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
697 memset(p->deviceLUID, 0, VK_LUID_SIZE);
698 p->deviceNodeMask = 0;
699 p->deviceLUIDValid = false;
700
701 p->subgroupSize = ELK_SUBGROUP_SIZE;
702 VkShaderStageFlags scalar_stages = 0;
703 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
704 if (pdevice->compiler->scalar_stage[stage])
705 scalar_stages |= mesa_to_vk_shader_stage(stage);
706 }
707 p->subgroupSupportedStages = scalar_stages;
708 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
709 VK_SUBGROUP_FEATURE_VOTE_BIT |
710 VK_SUBGROUP_FEATURE_BALLOT_BIT |
711 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
712 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
713 VK_SUBGROUP_FEATURE_QUAD_BIT;
714 if (pdevice->info.ver >= 8) {
715 /* TODO: There's no technical reason why these can't be made to
716 * work on gfx7 but they don't at the moment so it's best to leave
717 * the feature disabled than enabled and broken.
718 */
719 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
720 VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
721 }
722 p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
723
724 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
725 p->maxMultiviewViewCount = 16;
726 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
727 p->protectedNoFault = false;
728 /* This value doesn't matter for us today as our per-stage descriptors are
729 * the real limit.
730 */
731 p->maxPerSetDescriptors = 1024;
732 p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
733 }
734
735 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)736 get_properties_1_2(const struct anv_physical_device *pdevice,
737 struct vk_properties *p)
738 {
739 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
740 memset(p->driverName, 0, sizeof(p->driverName));
741 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
742 "Intel open-source Mesa driver");
743 memset(p->driverInfo, 0, sizeof(p->driverInfo));
744 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
745 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
746
747 /* Don't advertise conformance with a particular version if the hardware's
748 * support is incomplete/alpha.
749 */
750 if (pdevice->is_alpha) {
751 p->conformanceVersion = (VkConformanceVersion) {
752 .major = 0,
753 .minor = 0,
754 .subminor = 0,
755 .patch = 0,
756 };
757 }
758 else {
759 p->conformanceVersion = (VkConformanceVersion) {
760 .major = 1,
761 .minor = pdevice->use_softpin ? 3 : 2,
762 .subminor = 0,
763 .patch = 0,
764 };
765 }
766
767 p->denormBehaviorIndependence =
768 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
769 p->roundingModeIndependence =
770 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
771
772 /* Broadwell does not support HF denorms and there are restrictions
773 * other gens. According to Kabylake's PRM:
774 *
775 * "math - Extended Math Function
776 * [...]
777 * Restriction : Half-float denorms are always retained."
778 */
779 p->shaderDenormFlushToZeroFloat16 = false;
780 p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
781 p->shaderRoundingModeRTEFloat16 = true;
782 p->shaderRoundingModeRTZFloat16 = true;
783 p->shaderSignedZeroInfNanPreserveFloat16 = true;
784
785 p->shaderDenormFlushToZeroFloat32 = true;
786 p->shaderDenormPreserveFloat32 = pdevice->info.ver >= 8;
787 p->shaderRoundingModeRTEFloat32 = true;
788 p->shaderRoundingModeRTZFloat32 = true;
789 p->shaderSignedZeroInfNanPreserveFloat32 = true;
790
791 p->shaderDenormFlushToZeroFloat64 = true;
792 p->shaderDenormPreserveFloat64 = true;
793 p->shaderRoundingModeRTEFloat64 = true;
794 p->shaderRoundingModeRTZFloat64 = true;
795 p->shaderSignedZeroInfNanPreserveFloat64 = true;
796
797 /* It's a bit hard to exactly map our implementation to the limits
798 * described by Vulkan. The bindless surface handle in the extended
799 * message descriptors is 20 bits and it's an index into the table of
800 * RENDER_SURFACE_STATE structs that starts at bindless surface base
801 * address. This means that we can have at must 1M surface states
802 * allocated at any given time. Since most image views take two
803 * descriptors, this means we have a limit of about 500K image views.
804 *
805 * However, since we allocate surface states at vkCreateImageView time,
806 * this means our limit is actually something on the order of 500K image
807 * views allocated at any time. The actual limit describe by Vulkan, on
808 * the other hand, is a limit of how many you can have in a descriptor set.
809 * Assuming anyone using 1M descriptors will be using the same image view
810 * twice a bunch of times (or a bunch of null descriptors), we can safely
811 * advertise a larger limit here.
812 */
813 const unsigned max_bindless_views = 1 << 20;
814 p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
815 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
816 p->shaderSampledImageArrayNonUniformIndexingNative = false;
817 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
818 p->shaderStorageImageArrayNonUniformIndexingNative = false;
819 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
820 p->robustBufferAccessUpdateAfterBind = true;
821 p->quadDivergentImplicitLod = false;
822 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
823 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
824 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
825 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
826 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
827 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
828 p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
829 p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
830 p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
831 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
832 p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
833 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
834 p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
835 p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
836 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
837
838 /* We support all of the depth resolve modes */
839 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
840 VK_RESOLVE_MODE_AVERAGE_BIT |
841 VK_RESOLVE_MODE_MIN_BIT |
842 VK_RESOLVE_MODE_MAX_BIT;
843 /* Average doesn't make sense for stencil so we don't support that */
844 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
845 if (pdevice->info.ver >= 8) {
846 /* The advanced stencil resolve modes currently require stencil
847 * sampling be supported by the hardware.
848 */
849 p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT |
850 VK_RESOLVE_MODE_MAX_BIT;
851 }
852 p->independentResolveNone = true;
853 p->independentResolve = true;
854
855 p->filterMinmaxSingleComponentFormats = false;
856 p->filterMinmaxImageComponentMapping = false;
857
858 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
859
860 p->framebufferIntegerColorSampleCounts =
861 pdevice->info.ver == 7 ? VK_SAMPLE_COUNT_1_BIT : isl_device_get_sample_counts(&pdevice->isl_dev);
862 }
863
864 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)865 get_properties_1_3(const struct anv_physical_device *pdevice,
866 struct vk_properties *p)
867 {
868 p->minSubgroupSize = 8;
869 p->maxSubgroupSize = 32;
870 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
871 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
872
873 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
874 p->maxPerStageDescriptorInlineUniformBlocks =
875 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
876 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
877 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
878 p->maxDescriptorSetInlineUniformBlocks =
879 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
880 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
881 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
882 p->maxInlineUniformTotalSize = UINT16_MAX;
883
884 p->integerDotProduct8BitUnsignedAccelerated = false;
885 p->integerDotProduct8BitSignedAccelerated = false;
886 p->integerDotProduct8BitMixedSignednessAccelerated = false;
887 p->integerDotProduct4x8BitPackedUnsignedAccelerated = false;
888 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
889 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
890 p->integerDotProduct16BitUnsignedAccelerated = false;
891 p->integerDotProduct16BitSignedAccelerated = false;
892 p->integerDotProduct16BitMixedSignednessAccelerated = false;
893 p->integerDotProduct32BitUnsignedAccelerated = false;
894 p->integerDotProduct32BitSignedAccelerated = false;
895 p->integerDotProduct32BitMixedSignednessAccelerated = false;
896 p->integerDotProduct64BitUnsignedAccelerated = false;
897 p->integerDotProduct64BitSignedAccelerated = false;
898 p->integerDotProduct64BitMixedSignednessAccelerated = false;
899 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
900 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
901 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
902 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false;
903 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
904 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
905 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
906 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
907 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
908 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
909 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
910 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
911 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
912 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
913 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
914
915 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
916 * Base Address:
917 *
918 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
919 * specifies the base address of the first element of the surface,
920 * computed in software by adding the surface base address to the
921 * byte offset of the element in the buffer. The base address must
922 * be aligned to element size."
923 *
924 * The typed dataport messages require that things be texel aligned.
925 * Otherwise, we may just load/store the wrong data or, in the worst
926 * case, there may be hangs.
927 */
928 p->storageTexelBufferOffsetAlignmentBytes = 16;
929 p->storageTexelBufferOffsetSingleTexelAlignment = true;
930
931 /* The sampler, however, is much more forgiving and it can handle
932 * arbitrary byte alignment for linear and buffer surfaces. It's
933 * hard to find a good PRM citation for this but years of empirical
934 * experience demonstrate that this is true.
935 */
936 p->uniformTexelBufferOffsetAlignmentBytes = 1;
937 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
938
939 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
940 }
941
942 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)943 get_properties(const struct anv_physical_device *pdevice,
944 struct vk_properties *props)
945 {
946 const struct intel_device_info *devinfo = &pdevice->info;
947
948 const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
949 const uint32_t max_textures = 128;
950 const uint32_t max_samplers =
951 pdevice->has_bindless_samplers ? UINT16_MAX :
952 (devinfo->verx10 >= 75) ? 128 : 16;
953 const uint32_t max_images = MAX_IMAGES;
954
955 /* If we can use bindless for everything, claim a high per-stage limit,
956 * otherwise use the binding table size, minus the slots reserved for
957 * render targets and one slot for the descriptor buffer. */
958 const uint32_t max_per_stage = MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
959
960 const uint32_t max_workgroup_size =
961 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
962
963 VkSampleCountFlags sample_counts =
964 isl_device_get_sample_counts(&pdevice->isl_dev);
965
966 *props = (struct vk_properties) {
967 #if DETECT_OS_ANDROID
968 .apiVersion = ANV_API_VERSION,
969 #else
970 .apiVersion = pdevice->use_softpin ? ANV_API_VERSION_1_3 : ANV_API_VERSION_1_2,
971 #endif /* DETECT_OS_ANDROID */
972 .driverVersion = vk_get_driver_version(),
973 .vendorID = 0x8086,
974 .deviceID = pdevice->info.pci_device_id,
975 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
976
977 /* Limits: */
978 .maxImageDimension1D = (1 << 14),
979 /* Gfx7 doesn't support 8xMSAA with depth/stencil images when their width
980 * is greater than 8192 pixels. */
981 .maxImageDimension2D = devinfo->ver == 7 ? (1 << 13) : (1 << 14),
982 .maxImageDimension3D = (1 << 11),
983 .maxImageDimensionCube = (1 << 14),
984 .maxImageArrayLayers = (1 << 11),
985 .maxTexelBufferElements = 128 * 1024 * 1024,
986 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
987 .maxStorageBufferRange = MIN2(pdevice->isl_dev.max_buffer_size, UINT32_MAX),
988 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
989 .maxMemoryAllocationCount = UINT32_MAX,
990 .maxSamplerAllocationCount = 64 * 1024,
991 .bufferImageGranularity = 1,
992 .sparseAddressSpaceSize = 0,
993 .maxBoundDescriptorSets = MAX_SETS,
994 .maxPerStageDescriptorSamplers = max_samplers,
995 .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
996 .maxPerStageDescriptorStorageBuffers = max_ssbos,
997 .maxPerStageDescriptorSampledImages = max_textures,
998 .maxPerStageDescriptorStorageImages = max_images,
999 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1000 .maxPerStageResources = max_per_stage,
1001 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1002 .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1003 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1004 .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1005 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1006 .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1007 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1008 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1009 .maxVertexInputAttributes = MAX_VES,
1010 .maxVertexInputBindings = MAX_VBS,
1011 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1012 *
1013 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1014 */
1015 .maxVertexInputAttributeOffset = 2047,
1016 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1017 *
1018 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,2048]
1019 *
1020 * Skylake PRMs: Volume 2d: Command Reference: Structures:
1021 *
1022 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1023 */
1024 .maxVertexInputBindingStride = devinfo->ver < 9 ? 2048 : 4095,
1025 .maxVertexOutputComponents = 128,
1026 .maxTessellationGenerationLevel = 64,
1027 .maxTessellationPatchSize = 32,
1028 .maxTessellationControlPerVertexInputComponents = 128,
1029 .maxTessellationControlPerVertexOutputComponents = 128,
1030 .maxTessellationControlPerPatchOutputComponents = 128,
1031 .maxTessellationControlTotalOutputComponents = 2048,
1032 .maxTessellationEvaluationInputComponents = 128,
1033 .maxTessellationEvaluationOutputComponents = 128,
1034 .maxGeometryShaderInvocations = 32,
1035 .maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64,
1036 .maxGeometryOutputComponents = 128,
1037 .maxGeometryOutputVertices = 256,
1038 .maxGeometryTotalOutputComponents = 1024,
1039 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1040 .maxFragmentOutputAttachments = 8,
1041 .maxFragmentDualSrcAttachments = 1,
1042 .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1043 .maxComputeSharedMemorySize = 64 * 1024,
1044 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1045 .maxComputeWorkGroupInvocations = max_workgroup_size,
1046 .maxComputeWorkGroupSize = {
1047 max_workgroup_size,
1048 max_workgroup_size,
1049 max_workgroup_size,
1050 },
1051 .subPixelPrecisionBits = 8,
1052 .subTexelPrecisionBits = 8,
1053 .mipmapPrecisionBits = 8,
1054 .maxDrawIndexedIndexValue = UINT32_MAX,
1055 .maxDrawIndirectCount = UINT32_MAX,
1056 .maxSamplerLodBias = 16,
1057 .maxSamplerAnisotropy = 16,
1058 .maxViewports = MAX_VIEWPORTS,
1059 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1060 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1061 .viewportSubPixelBits = 13, /* We take a float? */
1062 .minMemoryMapAlignment = 4096, /* A page */
1063 /* The dataport requires texel alignment so we need to assume a worst
1064 * case of R32G32B32A32 which is 16 bytes.
1065 */
1066 .minTexelBufferOffsetAlignment = 16,
1067 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1068 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1069 .minTexelOffset = -8,
1070 .maxTexelOffset = 7,
1071 .minTexelGatherOffset = -32,
1072 .maxTexelGatherOffset = 31,
1073 .minInterpolationOffset = -0.5,
1074 .maxInterpolationOffset = 0.4375,
1075 .subPixelInterpolationOffsetBits = 4,
1076 .maxFramebufferWidth = (1 << 14),
1077 .maxFramebufferHeight = (1 << 14),
1078 .maxFramebufferLayers = (1 << 11),
1079 .framebufferColorSampleCounts = sample_counts,
1080 .framebufferDepthSampleCounts = sample_counts,
1081 .framebufferStencilSampleCounts = sample_counts,
1082 .framebufferNoAttachmentsSampleCounts = sample_counts,
1083 .maxColorAttachments = MAX_RTS,
1084 .sampledImageColorSampleCounts = sample_counts,
1085 /* Multisampling with SINT formats is not supported on gfx7 */
1086 .sampledImageIntegerSampleCounts = devinfo->ver == 7 ? VK_SAMPLE_COUNT_1_BIT : sample_counts,
1087 .sampledImageDepthSampleCounts = sample_counts,
1088 .sampledImageStencilSampleCounts = sample_counts,
1089 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1090 .maxSampleMaskWords = 1,
1091 .timestampComputeAndGraphics = true,
1092 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1093 .maxClipDistances = 8,
1094 .maxCullDistances = 8,
1095 .maxCombinedClipAndCullDistances = 8,
1096 .discreteQueuePriorities = 2,
1097 .pointSizeRange = { 0.125, 255.875 },
1098 /* While SKL and up support much wider lines than we are setting here,
1099 * in practice we run into conformance issues if we go past this limit.
1100 * Since the Windows driver does the same, it's probably fair to assume
1101 * that no one needs more than this.
1102 */
1103 .lineWidthRange = { 0.0, devinfo->ver >= 9 ? 8.0 : 7.9921875 },
1104 .pointSizeGranularity = (1.0 / 8.0),
1105 .lineWidthGranularity = (1.0 / 128.0),
1106 .strictLines = false,
1107 .standardSampleLocations = true,
1108 .optimalBufferCopyOffsetAlignment = 128,
1109 .optimalBufferCopyRowPitchAlignment = 128,
1110 .nonCoherentAtomSize = 64,
1111
1112 /* Broadwell doesn't do sparse. */
1113 .sparseResidencyStandard2DBlockShape = false,
1114 .sparseResidencyStandard2DMultisampleBlockShape = false,
1115 .sparseResidencyStandard3DBlockShape = false,
1116 .sparseResidencyAlignedMipSize = false,
1117 .sparseResidencyNonResidentStrict = false,
1118 };
1119
1120 snprintf(props->deviceName, sizeof(props->deviceName),
1121 "%s", pdevice->info.name);
1122 memcpy(props->pipelineCacheUUID,
1123 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1124
1125 get_properties_1_1(pdevice, props);
1126 get_properties_1_2(pdevice, props);
1127 get_properties_1_3(pdevice, props);
1128
1129 /* VK_KHR_performance_query */
1130 {
1131 /* We could support this by spawning a shader to do the equation normalization. */
1132 props->allowCommandBufferQueryCopies = false;
1133 }
1134
1135 /* VK_KHR_push_descriptor */
1136 {
1137 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1138 }
1139
1140 /* VK_KHR_vertex_attribute_divisor */
1141 {
1142 /* We have to restrict this a bit for multiview */
1143 props->maxVertexAttribDivisor = UINT32_MAX / 16;
1144 }
1145
1146 /* VK_EXT_custom_border_color */
1147 {
1148 props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1149 }
1150
1151 /* VK_EXT_external_memory_host */
1152 {
1153 /* Userptr needs page aligned memory. */
1154 props->minImportedHostPointerAlignment = 4096;
1155 }
1156
1157 /* VK_EXT_line_rasterization */
1158 {
1159 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1160 * Rules - Legacy Mode", it says the following:
1161 *
1162 * "Note that the device divides a pixel into a 16x16 array of
1163 * subpixels, referenced by their upper left corners."
1164 *
1165 * This is the only known reference in the PRMs to the subpixel
1166 * precision of line rasterization and a "16x16 array of subpixels"
1167 * implies 4 subpixel precision bits. Empirical testing has shown that 4
1168 * subpixel precision bits applies to all line rasterization types.
1169 */
1170 props->lineSubPixelPrecisionBits = 4;
1171 }
1172
1173 /* VK_EXT_multi_draw */
1174 {
1175 props->maxMultiDrawCount = 2048;
1176 }
1177
1178 /* VK_EXT_pci_bus_info */
1179 {
1180 props->pciDomain = pdevice->info.pci_domain;
1181 props->pciBus = pdevice->info.pci_bus;
1182 props->pciDevice = pdevice->info.pci_dev;
1183 props->pciFunction = pdevice->info.pci_func;
1184 }
1185
1186 /* VK_EXT_physical_device_drm */
1187 {
1188 props->drmHasPrimary = pdevice->has_master;
1189 props->drmPrimaryMajor = pdevice->master_major;
1190 props->drmPrimaryMinor = pdevice->master_minor;
1191 props->drmHasRender = pdevice->has_local;
1192 props->drmRenderMajor = pdevice->local_major;
1193 props->drmRenderMinor = pdevice->local_minor;
1194 }
1195
1196 /* VK_EXT_provoking_vertex */
1197 {
1198 props->provokingVertexModePerPipeline = true;
1199 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1200 }
1201
1202 /* VK_EXT_robustness2 */
1203 {
1204 props->robustStorageBufferAccessSizeAlignment =
1205 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1206 props->robustUniformBufferAccessSizeAlignment =
1207 ANV_UBO_ALIGNMENT;
1208 }
1209
1210 /* VK_EXT_sample_locations */
1211 {
1212 props->sampleLocationSampleCounts =
1213 isl_device_get_sample_counts(&pdevice->isl_dev);
1214
1215 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1216 props->maxSampleLocationGridSize.width = 1;
1217 props->maxSampleLocationGridSize.height = 1;
1218
1219 props->sampleLocationCoordinateRange[0] = 0;
1220 props->sampleLocationCoordinateRange[1] = 0.9375;
1221 props->sampleLocationSubPixelBits = 4;
1222
1223 props->variableSampleLocations = true;
1224 }
1225
1226 /* VK_EXT_shader_module_identifier */
1227 {
1228 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1229 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1230 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1231 vk_shaderModuleIdentifierAlgorithmUUID,
1232 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1233 }
1234
1235 /* VK_EXT_transform_feedback */
1236 {
1237 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1238 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1239 props->maxTransformFeedbackBufferSize = (1ull << 32);
1240 props->maxTransformFeedbackStreamDataSize = 128 * 4;
1241 props->maxTransformFeedbackBufferDataSize = 128 * 4;
1242 props->maxTransformFeedbackBufferDataStride = 2048;
1243 props->transformFeedbackQueries = true;
1244 props->transformFeedbackStreamsLinesTriangles = false;
1245 props->transformFeedbackRasterizationStreamSelect = false;
1246 /* This requires MI_MATH */
1247 props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
1248 }
1249
1250 /* VK_ANDROID_native_buffer */
1251 #if DETECT_OS_ANDROID
1252 {
1253 props->sharedImage = VK_FALSE;
1254 }
1255 #endif /* DETECT_OS_ANDROID */
1256
1257 }
1258
1259 static uint64_t
anv_compute_sys_heap_size(struct anv_physical_device * device,uint64_t available_ram)1260 anv_compute_sys_heap_size(struct anv_physical_device *device,
1261 uint64_t available_ram)
1262 {
1263 /* We want to leave some padding for things we allocate in the driver,
1264 * so don't go over 3/4 of the GTT either.
1265 */
1266 available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
1267
1268 if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
1269 /* When running with an overridden PCI ID, we may get a GTT size from
1270 * the kernel that is greater than 2 GiB but the execbuf check for 48bit
1271 * address support can still fail. Just clamp the address space size to
1272 * 2 GiB if we don't have 48-bit support.
1273 */
1274 mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
1275 "not support for 48-bit addresses",
1276 __FILE__, __LINE__);
1277 available_ram = 2ull << 30;
1278 }
1279
1280 return available_ram;
1281 }
1282
1283 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1284 anv_init_meminfo(struct anv_physical_device *device, int fd)
1285 {
1286 const struct intel_device_info *devinfo = &device->info;
1287
1288 device->sys.size =
1289 anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
1290 device->sys.available = devinfo->mem.sram.mappable.free;
1291
1292 return VK_SUCCESS;
1293 }
1294
1295 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1296 anv_update_meminfo(struct anv_physical_device *device, int fd)
1297 {
1298 if (!intel_device_info_update_memory_info(&device->info, fd))
1299 return;
1300
1301 const struct intel_device_info *devinfo = &device->info;
1302 device->sys.available = devinfo->mem.sram.mappable.free;
1303 }
1304
1305 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1306 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1307 {
1308 VkResult result = anv_init_meminfo(device, fd);
1309 if (result != VK_SUCCESS)
1310 return result;
1311
1312 assert(device->sys.size != 0);
1313
1314 if (device->info.has_llc) {
1315 device->memory.heap_count = 1;
1316 device->memory.heaps[0] = (struct anv_memory_heap) {
1317 .size = device->sys.size,
1318 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1319 };
1320
1321 /* Big core GPUs share LLC with the CPU and thus one memory type can be
1322 * both cached and coherent at the same time.
1323 *
1324 * But some game engines can't handle single type well
1325 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438
1326 *
1327 * And Intel on Windows uses 3 types so it's better to add extra one here
1328 */
1329 device->memory.type_count = 2;
1330 device->memory.types[0] = (struct anv_memory_type) {
1331 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1332 .heapIndex = 0,
1333 };
1334 device->memory.types[1] = (struct anv_memory_type) {
1335 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1336 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1337 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1338 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1339 .heapIndex = 0,
1340 };
1341 } else {
1342 device->memory.heap_count = 1;
1343 device->memory.heaps[0] = (struct anv_memory_heap) {
1344 .size = device->sys.size,
1345 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1346 };
1347
1348 /* The spec requires that we expose a host-visible, coherent memory
1349 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
1350 * to give the application a choice between cached, but not coherent and
1351 * coherent but uncached (WC though).
1352 */
1353 device->memory.type_count = 2;
1354 device->memory.types[0] = (struct anv_memory_type) {
1355 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1356 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1357 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1358 .heapIndex = 0,
1359 };
1360 device->memory.types[1] = (struct anv_memory_type) {
1361 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1362 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1363 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1364 .heapIndex = 0,
1365 };
1366 }
1367
1368 device->memory.need_flush = false;
1369 for (unsigned i = 0; i < device->memory.type_count; i++) {
1370 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1371 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1372 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1373 device->memory.need_flush = true;
1374 }
1375
1376 return VK_SUCCESS;
1377 }
1378
1379 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1380 anv_physical_device_init_uuids(struct anv_physical_device *device)
1381 {
1382 const struct build_id_note *note =
1383 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1384 if (!note) {
1385 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1386 "Failed to find build-id");
1387 }
1388
1389 unsigned build_id_len = build_id_length(note);
1390 if (build_id_len < 20) {
1391 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1392 "build-id too short. It needs to be a SHA");
1393 }
1394
1395 memcpy(device->driver_build_sha1, build_id_data(note), 20);
1396
1397 struct mesa_sha1 sha1_ctx;
1398 uint8_t sha1[20];
1399 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1400
1401 /* The pipeline cache UUID is used for determining when a pipeline cache is
1402 * invalid. It needs both a driver build and the PCI ID of the device.
1403 */
1404 _mesa_sha1_init(&sha1_ctx);
1405 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1406 _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
1407 sizeof(device->info.pci_device_id));
1408 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1409 sizeof(device->always_use_bindless));
1410 _mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
1411 sizeof(device->has_a64_buffer_access));
1412 _mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
1413 sizeof(device->has_bindless_samplers));
1414 _mesa_sha1_final(&sha1_ctx, sha1);
1415 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1416
1417 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1418 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1419
1420 return VK_SUCCESS;
1421 }
1422
1423 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1424 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1425 {
1426 #ifdef ENABLE_SHADER_CACHE
1427 char renderer[10];
1428 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
1429 device->info.pci_device_id);
1430 assert(len == sizeof(renderer) - 2);
1431
1432 char timestamp[41];
1433 _mesa_sha1_format(timestamp, device->driver_build_sha1);
1434
1435 const uint64_t driver_flags =
1436 elk_get_compiler_config_value(device->compiler);
1437 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1438 #endif
1439 }
1440
1441 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)1442 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
1443 {
1444 #ifdef ENABLE_SHADER_CACHE
1445 if (device->vk.disk_cache) {
1446 disk_cache_destroy(device->vk.disk_cache);
1447 device->vk.disk_cache = NULL;
1448 }
1449 #else
1450 assert(device->vk.disk_cache == NULL);
1451 #endif
1452 }
1453
1454 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
1455 * queue overrides.
1456 *
1457 * To override the number queues:
1458 * * "gc" is for graphics queues with compute support
1459 * * "g" is for graphics queues with no compute support
1460 * * "c" is for compute queues with no graphics support
1461 *
1462 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
1463 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
1464 * with compute-only support.
1465 *
1466 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
1467 * include 1 queue with compute-only support, but it will not change the
1468 * number of graphics+compute queues.
1469 *
1470 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
1471 * to include 1 queue with compute-only support, and it would override the
1472 * number of graphics+compute queues to be 0.
1473 */
1474 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count)1475 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
1476 {
1477 int gc_override = -1;
1478 int g_override = -1;
1479 int c_override = -1;
1480 const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
1481
1482 if (env_ == NULL)
1483 return;
1484
1485 char *env = strdup(env_);
1486 char *save = NULL;
1487 char *next = strtok_r(env, ",", &save);
1488 while (next != NULL) {
1489 if (strncmp(next, "gc=", 3) == 0) {
1490 gc_override = strtol(next + 3, NULL, 0);
1491 } else if (strncmp(next, "g=", 2) == 0) {
1492 g_override = strtol(next + 2, NULL, 0);
1493 } else if (strncmp(next, "c=", 2) == 0) {
1494 c_override = strtol(next + 2, NULL, 0);
1495 } else {
1496 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
1497 }
1498 next = strtok_r(NULL, ",", &save);
1499 }
1500 free(env);
1501 if (gc_override >= 0)
1502 *gc_count = gc_override;
1503 if (g_override >= 0)
1504 *g_count = g_override;
1505 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
1506 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
1507 "Vulkan specification");
1508 if (c_override >= 0)
1509 *c_count = c_override;
1510 }
1511
1512 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)1513 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
1514 {
1515 uint32_t family_count = 0;
1516
1517 if (pdevice->engine_info) {
1518 int gc_count =
1519 intel_engines_count(pdevice->engine_info,
1520 INTEL_ENGINE_CLASS_RENDER);
1521 int g_count = 0;
1522 int c_count = 0;
1523
1524 anv_override_engine_counts(&gc_count, &g_count, &c_count);
1525
1526 if (gc_count > 0) {
1527 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1528 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1529 VK_QUEUE_COMPUTE_BIT |
1530 VK_QUEUE_TRANSFER_BIT,
1531 .queueCount = gc_count,
1532 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1533 };
1534 }
1535 if (g_count > 0) {
1536 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1537 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1538 VK_QUEUE_TRANSFER_BIT,
1539 .queueCount = g_count,
1540 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1541 };
1542 }
1543 if (c_count > 0) {
1544 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1545 .queueFlags = VK_QUEUE_COMPUTE_BIT |
1546 VK_QUEUE_TRANSFER_BIT,
1547 .queueCount = c_count,
1548 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1549 };
1550 }
1551 /* Increase count below when other families are added as a reminder to
1552 * increase the ANV_MAX_QUEUE_FAMILIES value.
1553 */
1554 STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
1555 } else {
1556 /* Default to a single render queue */
1557 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
1558 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1559 VK_QUEUE_COMPUTE_BIT |
1560 VK_QUEUE_TRANSFER_BIT,
1561 .queueCount = 1,
1562 .engine_class = INTEL_ENGINE_CLASS_RENDER,
1563 };
1564 family_count = 1;
1565 }
1566 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
1567 pdevice->queue.family_count = family_count;
1568 }
1569
1570 static VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)1571 anv_physical_device_try_create(struct vk_instance *vk_instance,
1572 struct _drmDevice *drm_device,
1573 struct vk_physical_device **out)
1574 {
1575 struct anv_instance *instance =
1576 container_of(vk_instance, struct anv_instance, vk);
1577
1578 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1579 drm_device->bustype != DRM_BUS_PCI ||
1580 drm_device->deviceinfo.pci->vendor_id != 0x8086)
1581 return VK_ERROR_INCOMPATIBLE_DRIVER;
1582
1583 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
1584 const char *path = drm_device->nodes[DRM_NODE_RENDER];
1585 VkResult result;
1586 int fd;
1587 int master_fd = -1;
1588
1589 process_intel_debug_variable();
1590
1591 fd = open(path, O_RDWR | O_CLOEXEC);
1592 if (fd < 0) {
1593 if (errno == ENOMEM) {
1594 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1595 "Unable to open device %s: out of memory", path);
1596 }
1597 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1598 "Unable to open device %s: %m", path);
1599 }
1600
1601 struct intel_device_info devinfo;
1602 if (!intel_get_device_info_from_fd(fd, &devinfo, 7, 8)) {
1603 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1604 goto fail_fd;
1605 }
1606
1607 bool is_alpha = true;
1608 bool warn = !debug_get_bool_option("MESA_VK_IGNORE_CONFORMANCE_WARNING", false);
1609 if (devinfo.platform == INTEL_PLATFORM_HSW) {
1610 if (warn)
1611 mesa_logw("Haswell Vulkan support is incomplete");
1612 } else if (devinfo.platform == INTEL_PLATFORM_IVB) {
1613 if (warn)
1614 mesa_logw("Ivy Bridge Vulkan support is incomplete");
1615 } else if (devinfo.platform == INTEL_PLATFORM_BYT) {
1616 if (warn)
1617 mesa_logw("Bay Trail Vulkan support is incomplete");
1618 } else if (devinfo.ver == 8) {
1619 /* Gfx8 fully supported */
1620 is_alpha = false;
1621 } else {
1622 /* Silently fail here, anv will either pick up this device or display an
1623 * error message.
1624 */
1625 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1626 goto fail_fd;
1627 }
1628
1629 struct anv_physical_device *device =
1630 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1631 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1632 if (device == NULL) {
1633 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1634 goto fail_fd;
1635 }
1636
1637 struct vk_physical_device_dispatch_table dispatch_table;
1638 vk_physical_device_dispatch_table_from_entrypoints(
1639 &dispatch_table, &anv_physical_device_entrypoints, true);
1640 vk_physical_device_dispatch_table_from_entrypoints(
1641 &dispatch_table, &wsi_physical_device_entrypoints, false);
1642
1643 result = vk_physical_device_init(&device->vk, &instance->vk,
1644 NULL, NULL, NULL, /* We set up extensions later */
1645 &dispatch_table);
1646 if (result != VK_SUCCESS) {
1647 vk_error(instance, result);
1648 goto fail_alloc;
1649 }
1650 device->instance = instance;
1651
1652 assert(strlen(path) < ARRAY_SIZE(device->path));
1653 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
1654
1655 device->info = devinfo;
1656 device->is_alpha = is_alpha;
1657
1658 device->cmd_parser_version = -1;
1659 if (device->info.ver == 7) {
1660 if (!intel_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION, &device->cmd_parser_version) ||
1661 device->cmd_parser_version == -1) {
1662 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1663 "failed to get command parser version");
1664 goto fail_base;
1665 }
1666 }
1667
1668 int val;
1669 if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
1670 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1671 "kernel missing gem wait");
1672 goto fail_base;
1673 }
1674
1675 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
1676 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1677 "kernel missing execbuf2");
1678 goto fail_base;
1679 }
1680
1681 if (!device->info.has_llc &&
1682 (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
1683 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1684 "kernel missing wc mmap");
1685 goto fail_base;
1686 }
1687
1688 device->use_relocations = device->info.ver < 8 ||
1689 device->info.platform == INTEL_PLATFORM_CHV;
1690
1691 if (!device->use_relocations &&
1692 (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val)) {
1693 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1694 "kernel missing softpin");
1695 goto fail_alloc;
1696 }
1697
1698 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
1699 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1700 "kernel missing syncobj support");
1701 goto fail_base;
1702 }
1703
1704 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
1705 device->has_exec_async = val;
1706 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
1707 device->has_exec_capture = val;
1708
1709 /* Start with medium; sorted low to high */
1710 const int priorities[] = {
1711 INTEL_CONTEXT_MEDIUM_PRIORITY,
1712 INTEL_CONTEXT_HIGH_PRIORITY,
1713 INTEL_CONTEXT_REALTIME_PRIORITY,
1714 };
1715 device->max_context_priority = INT_MIN;
1716 for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
1717 if (!anv_gem_has_context_priority(fd, priorities[i]))
1718 break;
1719 device->max_context_priority = priorities[i];
1720 }
1721
1722 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
1723 device->info.aperture_bytes;
1724
1725 /* We only allow 48-bit addresses with softpin because knowing the actual
1726 * address is required for the vertex cache flush workaround.
1727 */
1728 device->supports_48bit_addresses = (device->info.ver >= 8) &&
1729 device->gtt_size > (4ULL << 30 /* GiB */);
1730
1731 result = anv_physical_device_init_heaps(device, fd);
1732 if (result != VK_SUCCESS)
1733 goto fail_base;
1734
1735 assert(device->supports_48bit_addresses == !device->use_relocations);
1736 device->use_softpin = !device->use_relocations;
1737
1738 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
1739 device->has_exec_timeline = val;
1740 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
1741 device->has_exec_timeline = false;
1742
1743 unsigned st_idx = 0;
1744
1745 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
1746 if (!device->has_exec_timeline)
1747 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1748 device->sync_types[st_idx++] = &device->sync_syncobj_type;
1749
1750 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
1751 device->sync_types[st_idx++] = &anv_bo_sync_type;
1752
1753 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
1754 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
1755 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
1756 }
1757
1758 device->sync_types[st_idx++] = NULL;
1759 assert(st_idx <= ARRAY_SIZE(device->sync_types));
1760 device->vk.supported_sync_types = device->sync_types;
1761
1762 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
1763
1764 device->always_use_bindless =
1765 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
1766
1767 device->use_call_secondary =
1768 device->use_softpin &&
1769 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
1770
1771 /* We first got the A64 messages on broadwell and we can only use them if
1772 * we can pass addresses directly into the shader which requires softpin.
1773 */
1774 device->has_a64_buffer_access = device->info.ver >= 8 &&
1775 device->use_softpin;
1776
1777 /* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
1778 * because it's just a matter of setting the sampler address in the sample
1779 * message header. However, we've not bothered to wire it up for vec4 so
1780 * we leave it disabled on gfx7.
1781 */
1782 device->has_bindless_samplers = device->info.ver >= 8;
1783
1784 /* Check if we can read the GPU timestamp register from the CPU */
1785 uint64_t u64_ignore;
1786 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
1787 device->info.kmd_type,
1788 &u64_ignore);
1789
1790 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
1791 driQueryOptionb(&instance->dri_options, "always_flush_cache");
1792
1793 device->compiler = elk_compiler_create(NULL, &device->info);
1794 if (device->compiler == NULL) {
1795 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1796 goto fail_base;
1797 }
1798 device->compiler->shader_debug_log = compiler_debug_log;
1799 device->compiler->shader_perf_log = compiler_perf_log;
1800 device->compiler->constant_buffer_0_is_relative =
1801 device->info.ver < 8 || !device->info.has_context_isolation;
1802 device->compiler->supports_shader_constants = true;
1803
1804 isl_device_init(&device->isl_dev, &device->info);
1805
1806 result = anv_physical_device_init_uuids(device);
1807 if (result != VK_SUCCESS)
1808 goto fail_compiler;
1809
1810 anv_physical_device_init_disk_cache(device);
1811
1812 if (instance->vk.enabled_extensions.KHR_display) {
1813 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1814 if (master_fd >= 0) {
1815 /* fail if we don't have permission to even render on this device */
1816 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
1817 close(master_fd);
1818 master_fd = -1;
1819 }
1820 }
1821 }
1822 device->master_fd = master_fd;
1823
1824 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
1825 anv_physical_device_init_queue_families(device);
1826
1827 device->local_fd = fd;
1828
1829 anv_physical_device_init_perf(device, fd);
1830
1831 /* Gather major/minor before WSI. */
1832 struct stat st;
1833
1834 if (stat(primary_path, &st) == 0) {
1835 device->has_master = true;
1836 device->master_major = major(st.st_rdev);
1837 device->master_minor = minor(st.st_rdev);
1838 } else {
1839 device->has_master = false;
1840 device->master_major = 0;
1841 device->master_minor = 0;
1842 }
1843
1844 if (stat(path, &st) == 0) {
1845 device->has_local = true;
1846 device->local_major = major(st.st_rdev);
1847 device->local_minor = minor(st.st_rdev);
1848 } else {
1849 device->has_local = false;
1850 device->local_major = 0;
1851 device->local_minor = 0;
1852 }
1853
1854 get_device_extensions(device, &device->vk.supported_extensions);
1855 get_features(device, &device->vk.supported_features);
1856 get_properties(device, &device->vk.properties);
1857
1858 result = anv_init_wsi(device);
1859 if (result != VK_SUCCESS)
1860 goto fail_perf;
1861
1862 anv_measure_device_init(device);
1863
1864 anv_genX(&device->info, init_physical_device_state)(device);
1865
1866 *out = &device->vk;
1867
1868 return VK_SUCCESS;
1869
1870 fail_perf:
1871 intel_perf_free(device->perf);
1872 free(device->engine_info);
1873 anv_physical_device_free_disk_cache(device);
1874 fail_compiler:
1875 ralloc_free(device->compiler);
1876 fail_base:
1877 vk_physical_device_finish(&device->vk);
1878 fail_alloc:
1879 vk_free(&instance->vk.alloc, device);
1880 fail_fd:
1881 close(fd);
1882 if (master_fd != -1)
1883 close(master_fd);
1884 return result;
1885 }
1886
1887 static void
anv_physical_device_destroy(struct vk_physical_device * vk_device)1888 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1889 {
1890 struct anv_physical_device *device =
1891 container_of(vk_device, struct anv_physical_device, vk);
1892
1893 anv_finish_wsi(device);
1894 anv_measure_device_destroy(device);
1895 free(device->engine_info);
1896 anv_physical_device_free_disk_cache(device);
1897 ralloc_free(device->compiler);
1898 intel_perf_free(device->perf);
1899 close(device->local_fd);
1900 if (device->master_fd >= 0)
1901 close(device->master_fd);
1902 vk_physical_device_finish(&device->vk);
1903 vk_free(&device->instance->vk.alloc, device);
1904 }
1905
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1906 VkResult anv_EnumerateInstanceExtensionProperties(
1907 const char* pLayerName,
1908 uint32_t* pPropertyCount,
1909 VkExtensionProperties* pProperties)
1910 {
1911 if (pLayerName)
1912 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1913
1914 return vk_enumerate_instance_extension_properties(
1915 &instance_extensions, pPropertyCount, pProperties);
1916 }
1917
1918 static void
anv_init_dri_options(struct anv_instance * instance)1919 anv_init_dri_options(struct anv_instance *instance)
1920 {
1921 driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1922 ARRAY_SIZE(anv_dri_options));
1923 driParseConfigFiles(&instance->dri_options,
1924 &instance->available_dri_options, 0, "anv", NULL, NULL,
1925 instance->vk.app_info.app_name,
1926 instance->vk.app_info.app_version,
1927 instance->vk.app_info.engine_name,
1928 instance->vk.app_info.engine_version);
1929
1930 instance->assume_full_subgroups =
1931 driQueryOptioni(&instance->dri_options, "anv_assume_full_subgroups");
1932 instance->limit_trig_input_range =
1933 driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1934 instance->sample_mask_out_opengl_behaviour =
1935 driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1936 instance->lower_depth_range_rate =
1937 driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1938 instance->no_16bit =
1939 driQueryOptionb(&instance->dri_options, "no_16bit");
1940 instance->report_vk_1_3 =
1941 driQueryOptionb(&instance->dri_options, "hasvk_report_vk_1_3_version");
1942 }
1943
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1944 VkResult anv_CreateInstance(
1945 const VkInstanceCreateInfo* pCreateInfo,
1946 const VkAllocationCallbacks* pAllocator,
1947 VkInstance* pInstance)
1948 {
1949 struct anv_instance *instance;
1950 VkResult result;
1951
1952 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1953
1954 if (pAllocator == NULL)
1955 pAllocator = vk_default_allocator();
1956
1957 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1958 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1959 if (!instance)
1960 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1961
1962 struct vk_instance_dispatch_table dispatch_table;
1963 vk_instance_dispatch_table_from_entrypoints(
1964 &dispatch_table, &anv_instance_entrypoints, true);
1965 vk_instance_dispatch_table_from_entrypoints(
1966 &dispatch_table, &wsi_instance_entrypoints, false);
1967
1968 result = vk_instance_init(&instance->vk, &instance_extensions,
1969 &dispatch_table, pCreateInfo, pAllocator);
1970 if (result != VK_SUCCESS) {
1971 vk_free(pAllocator, instance);
1972 return vk_error(NULL, result);
1973 }
1974
1975 instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1976 instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1977
1978 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1979
1980 anv_init_dri_options(instance);
1981
1982 intel_driver_ds_init();
1983
1984 *pInstance = anv_instance_to_handle(instance);
1985
1986 return VK_SUCCESS;
1987 }
1988
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1989 void anv_DestroyInstance(
1990 VkInstance _instance,
1991 const VkAllocationCallbacks* pAllocator)
1992 {
1993 ANV_FROM_HANDLE(anv_instance, instance, _instance);
1994
1995 if (!instance)
1996 return;
1997
1998 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1999
2000 driDestroyOptionCache(&instance->dri_options);
2001 driDestroyOptionInfo(&instance->available_dri_options);
2002
2003 vk_instance_finish(&instance->vk);
2004 vk_free(&instance->vk.alloc, instance);
2005 }
2006
2007 static int
vk_priority_to_gen(int priority)2008 vk_priority_to_gen(int priority)
2009 {
2010 switch (priority) {
2011 case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
2012 return INTEL_CONTEXT_LOW_PRIORITY;
2013 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
2014 return INTEL_CONTEXT_MEDIUM_PRIORITY;
2015 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
2016 return INTEL_CONTEXT_HIGH_PRIORITY;
2017 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
2018 return INTEL_CONTEXT_REALTIME_PRIORITY;
2019 default:
2020 unreachable("Invalid priority");
2021 }
2022 }
2023
2024 static const VkQueueFamilyProperties
2025 anv_queue_family_properties_template = {
2026 .timestampValidBits = 36, /* XXX: Real value here */
2027 .minImageTransferGranularity = { 1, 1, 1 },
2028 };
2029
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2030 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2031 VkPhysicalDevice physicalDevice,
2032 uint32_t* pQueueFamilyPropertyCount,
2033 VkQueueFamilyProperties2* pQueueFamilyProperties)
2034 {
2035 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2036 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2037 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2038
2039 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2040 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2041 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2042 p->queueFamilyProperties = anv_queue_family_properties_template;
2043 p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2044 p->queueFamilyProperties.queueCount = queue_family->queueCount;
2045
2046 vk_foreach_struct(ext, p->pNext) {
2047 switch (ext->sType) {
2048 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2049 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2050 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2051
2052 /* Deliberately sorted low to high */
2053 VkQueueGlobalPriorityKHR all_priorities[] = {
2054 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2055 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2056 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2057 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2058 };
2059
2060 uint32_t count = 0;
2061 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2062 if (vk_priority_to_gen(all_priorities[i]) >
2063 pdevice->max_context_priority)
2064 break;
2065
2066 properties->priorities[count++] = all_priorities[i];
2067 }
2068 properties->priorityCount = count;
2069 break;
2070 }
2071
2072 default:
2073 vk_debug_ignored_stype(ext->sType);
2074 }
2075 }
2076 }
2077 }
2078 }
2079
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2080 void anv_GetPhysicalDeviceMemoryProperties(
2081 VkPhysicalDevice physicalDevice,
2082 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2083 {
2084 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2085
2086 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2087 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2088 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2089 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2090 .heapIndex = physical_device->memory.types[i].heapIndex,
2091 };
2092 }
2093
2094 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2095 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2096 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2097 .size = physical_device->memory.heaps[i].size,
2098 .flags = physical_device->memory.heaps[i].flags,
2099 };
2100 }
2101 }
2102
2103 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2104 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2105 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2106 {
2107 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2108
2109 if (!device->vk.supported_extensions.EXT_memory_budget)
2110 return;
2111
2112 anv_update_meminfo(device, device->local_fd);
2113
2114 VkDeviceSize total_sys_heaps_size = 0;
2115 for (size_t i = 0; i < device->memory.heap_count; i++)
2116 total_sys_heaps_size += device->memory.heaps[i].size;
2117
2118 for (size_t i = 0; i < device->memory.heap_count; i++) {
2119 VkDeviceSize heap_size = device->memory.heaps[i].size;
2120 VkDeviceSize heap_used = device->memory.heaps[i].used;
2121 VkDeviceSize heap_budget, total_heaps_size;
2122 uint64_t mem_available = 0;
2123
2124 total_heaps_size = total_sys_heaps_size;
2125 mem_available = device->sys.available;
2126
2127 double heap_proportion = (double) heap_size / total_heaps_size;
2128 VkDeviceSize available_prop = mem_available * heap_proportion;
2129
2130 /*
2131 * Let's not incite the app to starve the system: report at most 90% of
2132 * the available heap memory.
2133 */
2134 uint64_t heap_available = available_prop * 9 / 10;
2135 heap_budget = MIN2(heap_size, heap_used + heap_available);
2136
2137 /*
2138 * Round down to the nearest MB
2139 */
2140 heap_budget &= ~((1ull << 20) - 1);
2141
2142 /*
2143 * The heapBudget value must be non-zero for array elements less than
2144 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2145 * value must be less than or equal to VkMemoryHeap::size for each heap.
2146 */
2147 assert(0 < heap_budget && heap_budget <= heap_size);
2148
2149 memoryBudget->heapUsage[i] = heap_used;
2150 memoryBudget->heapBudget[i] = heap_budget;
2151 }
2152
2153 /* The heapBudget and heapUsage values must be zero for array elements
2154 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2155 */
2156 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2157 memoryBudget->heapBudget[i] = 0;
2158 memoryBudget->heapUsage[i] = 0;
2159 }
2160 }
2161
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2162 void anv_GetPhysicalDeviceMemoryProperties2(
2163 VkPhysicalDevice physicalDevice,
2164 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2165 {
2166 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2167 &pMemoryProperties->memoryProperties);
2168
2169 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2170 switch (ext->sType) {
2171 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2172 anv_get_memory_budget(physicalDevice, (void*)ext);
2173 break;
2174 default:
2175 vk_debug_ignored_stype(ext->sType);
2176 break;
2177 }
2178 }
2179 }
2180
anv_GetInstanceProcAddr(VkInstance _instance,const char * pName)2181 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2182 VkInstance _instance,
2183 const char* pName)
2184 {
2185 ANV_FROM_HANDLE(anv_instance, instance, _instance);
2186 return vk_instance_get_proc_addr(&instance->vk,
2187 &anv_instance_entrypoints,
2188 pName);
2189 }
2190
2191 /* With version 1+ of the loader interface the ICD should expose
2192 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2193 */
2194 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2195 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2196 VkInstance instance,
2197 const char* pName)
2198 {
2199 return anv_GetInstanceProcAddr(instance, pName);
2200 }
2201 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)2202 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2203 {
2204 struct anv_state state;
2205
2206 state = anv_state_pool_alloc(pool, size, align);
2207 memcpy(state.map, p, size);
2208
2209 return state;
2210 }
2211
2212 static void
anv_device_init_border_colors(struct anv_device * device)2213 anv_device_init_border_colors(struct anv_device *device)
2214 {
2215 if (device->info->platform == INTEL_PLATFORM_HSW) {
2216 static const struct hsw_border_color border_colors[] = {
2217 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2218 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2219 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2220 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2221 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2222 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2223 };
2224
2225 device->border_colors =
2226 anv_state_pool_emit_data(&device->dynamic_state_pool,
2227 sizeof(border_colors), 512, border_colors);
2228 } else {
2229 static const struct gfx8_border_color border_colors[] = {
2230 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2231 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2232 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2233 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2234 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2235 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2236 };
2237
2238 device->border_colors =
2239 anv_state_pool_emit_data(&device->dynamic_state_pool,
2240 sizeof(border_colors), 64, border_colors);
2241 }
2242 }
2243
2244 static VkResult
anv_device_init_trivial_batch(struct anv_device * device)2245 anv_device_init_trivial_batch(struct anv_device *device)
2246 {
2247 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2248 ANV_BO_ALLOC_MAPPED,
2249 0 /* explicit_address */,
2250 &device->trivial_batch_bo);
2251 if (result != VK_SUCCESS)
2252 return result;
2253
2254 struct anv_batch batch = {
2255 .start = device->trivial_batch_bo->map,
2256 .next = device->trivial_batch_bo->map,
2257 .end = device->trivial_batch_bo->map + 4096,
2258 };
2259
2260 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2261 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2262
2263 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
2264 if (device->physical->memory.need_flush)
2265 intel_flush_range(batch.start, batch.next - batch.start);
2266 #endif
2267
2268 return VK_SUCCESS;
2269 }
2270
2271 static bool
get_bo_from_pool(struct intel_batch_decode_bo * ret,struct anv_block_pool * pool,uint64_t address)2272 get_bo_from_pool(struct intel_batch_decode_bo *ret,
2273 struct anv_block_pool *pool,
2274 uint64_t address)
2275 {
2276 anv_block_pool_foreach_bo(bo, pool) {
2277 uint64_t bo_address = intel_48b_address(bo->offset);
2278 if (address >= bo_address && address < (bo_address + bo->size)) {
2279 *ret = (struct intel_batch_decode_bo) {
2280 .addr = bo_address,
2281 .size = bo->size,
2282 .map = bo->map,
2283 };
2284 return true;
2285 }
2286 }
2287 return false;
2288 }
2289
2290 /* Finding a buffer for batch decoding */
2291 static struct intel_batch_decode_bo
decode_get_bo(void * v_batch,bool ppgtt,uint64_t address)2292 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2293 {
2294 struct anv_device *device = v_batch;
2295 struct intel_batch_decode_bo ret_bo = {};
2296
2297 assert(ppgtt);
2298
2299 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2300 return ret_bo;
2301 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2302 return ret_bo;
2303 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2304 return ret_bo;
2305 if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2306 return ret_bo;
2307
2308 if (!device->cmd_buffer_being_decoded)
2309 return (struct intel_batch_decode_bo) { };
2310
2311 struct anv_batch_bo **bo;
2312
2313 u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2314 /* The decoder zeroes out the top 16 bits, so we need to as well */
2315 uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2316
2317 if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2318 return (struct intel_batch_decode_bo) {
2319 .addr = bo_address,
2320 .size = (*bo)->bo->size,
2321 .map = (*bo)->bo->map,
2322 };
2323 }
2324 }
2325
2326 return (struct intel_batch_decode_bo) { };
2327 }
2328
2329 static VkResult anv_device_check_status(struct vk_device *vk_device);
2330
2331 static VkResult
anv_device_setup_context(struct anv_device * device,const VkDeviceCreateInfo * pCreateInfo,const uint32_t num_queues)2332 anv_device_setup_context(struct anv_device *device,
2333 const VkDeviceCreateInfo *pCreateInfo,
2334 const uint32_t num_queues)
2335 {
2336 struct anv_physical_device *physical_device = device->physical;
2337 VkResult result = VK_SUCCESS;
2338
2339 if (device->physical->engine_info) {
2340 /* The kernel API supports at most 64 engines */
2341 assert(num_queues <= 64);
2342 enum intel_engine_class engine_classes[64];
2343 int engine_count = 0;
2344 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2345 const VkDeviceQueueCreateInfo *queueCreateInfo =
2346 &pCreateInfo->pQueueCreateInfos[i];
2347
2348 assert(queueCreateInfo->queueFamilyIndex <
2349 physical_device->queue.family_count);
2350 struct anv_queue_family *queue_family =
2351 &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
2352
2353 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
2354 engine_classes[engine_count++] = queue_family->engine_class;
2355 }
2356 if (!intel_gem_create_context_engines(device->fd, 0 /* flags */,
2357 physical_device->engine_info,
2358 engine_count, engine_classes,
2359 0 /* vm_id */,
2360 (uint32_t *)&device->context_id))
2361 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
2362 "kernel context creation failed");
2363 } else {
2364 assert(num_queues == 1);
2365 if (!intel_gem_create_context(device->fd, &device->context_id))
2366 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2367 }
2368
2369 if (result != VK_SUCCESS)
2370 return result;
2371
2372 /* Here we tell the kernel not to attempt to recover our context but
2373 * immediately (on the next batchbuffer submission) report that the
2374 * context is lost, and we will do the recovery ourselves. In the case
2375 * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
2376 * the client clean up the pieces.
2377 */
2378 anv_gem_set_context_param(device->fd, device->context_id,
2379 I915_CONTEXT_PARAM_RECOVERABLE, false);
2380
2381 /* Check if client specified queue priority. */
2382 const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
2383 vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
2384 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
2385
2386 VkQueueGlobalPriorityKHR priority =
2387 queue_priority ? queue_priority->globalPriority :
2388 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
2389
2390 /* As per spec, the driver implementation may deny requests to acquire
2391 * a priority above the default priority (MEDIUM) if the caller does not
2392 * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
2393 * is returned.
2394 */
2395 if (physical_device->max_context_priority >= INTEL_CONTEXT_MEDIUM_PRIORITY) {
2396 int err = anv_gem_set_context_param(device->fd, device->context_id,
2397 I915_CONTEXT_PARAM_PRIORITY,
2398 vk_priority_to_gen(priority));
2399 if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
2400 result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
2401 goto fail_context;
2402 }
2403 }
2404
2405 return result;
2406
2407 fail_context:
2408 intel_gem_destroy_context(device->fd, device->context_id);
2409 return result;
2410 }
2411
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2412 VkResult anv_CreateDevice(
2413 VkPhysicalDevice physicalDevice,
2414 const VkDeviceCreateInfo* pCreateInfo,
2415 const VkAllocationCallbacks* pAllocator,
2416 VkDevice* pDevice)
2417 {
2418 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2419 VkResult result;
2420 struct anv_device *device;
2421
2422 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
2423
2424 /* Check requested queues and fail if we are requested to create any
2425 * queues with flags we don't support.
2426 */
2427 assert(pCreateInfo->queueCreateInfoCount > 0);
2428 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2429 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
2430 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
2431 }
2432
2433 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
2434 sizeof(*device), 8,
2435 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2436 if (!device)
2437 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
2438
2439 struct vk_device_dispatch_table dispatch_table;
2440
2441 bool override_initial_entrypoints = true;
2442 if (physical_device->instance->vk.app_info.app_name &&
2443 !strcmp(physical_device->instance->vk.app_info.app_name, "DOOM 64")) {
2444 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &doom64_device_entrypoints, true);
2445 override_initial_entrypoints = false;
2446 }
2447 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2448 anv_genX(&physical_device->info, device_entrypoints),
2449 override_initial_entrypoints);
2450 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2451 &anv_device_entrypoints, false);
2452 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
2453 &wsi_device_entrypoints, false);
2454
2455 result = vk_device_init(&device->vk, &physical_device->vk,
2456 &dispatch_table, pCreateInfo, pAllocator);
2457 if (result != VK_SUCCESS)
2458 goto fail_alloc;
2459
2460 if (INTEL_DEBUG(DEBUG_BATCH)) {
2461 const unsigned decode_flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
2462
2463 intel_batch_decode_ctx_init_elk(&device->decoder_ctx,
2464 &physical_device->compiler->isa,
2465 &physical_device->info,
2466 stderr, decode_flags, NULL,
2467 decode_get_bo, NULL, device);
2468
2469 device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
2470 device->decoder_ctx.surface_base = SURFACE_STATE_POOL_MIN_ADDRESS;
2471 device->decoder_ctx.instruction_base =
2472 INSTRUCTION_STATE_POOL_MIN_ADDRESS;
2473 }
2474
2475 anv_device_set_physical(device, physical_device);
2476
2477 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
2478 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
2479 if (device->fd == -1) {
2480 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2481 goto fail_device;
2482 }
2483
2484 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
2485 device->vk.check_status = anv_device_check_status;
2486 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
2487 vk_device_set_drm_fd(&device->vk, device->fd);
2488
2489 uint32_t num_queues = 0;
2490 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
2491 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
2492
2493 result = anv_device_setup_context(device, pCreateInfo, num_queues);
2494 if (result != VK_SUCCESS)
2495 goto fail_fd;
2496
2497 device->queues =
2498 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
2499 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2500 if (device->queues == NULL) {
2501 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2502 goto fail_context_id;
2503 }
2504
2505 device->queue_count = 0;
2506 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2507 const VkDeviceQueueCreateInfo *queueCreateInfo =
2508 &pCreateInfo->pQueueCreateInfos[i];
2509
2510 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
2511 /* When using legacy contexts, we use I915_EXEC_RENDER but, with
2512 * engine-based contexts, the bottom 6 bits of exec_flags are used
2513 * for the engine ID.
2514 */
2515 uint32_t exec_flags = device->physical->engine_info ?
2516 device->queue_count : I915_EXEC_RENDER;
2517
2518 result = anv_queue_init(device, &device->queues[device->queue_count],
2519 exec_flags, queueCreateInfo, j);
2520 if (result != VK_SUCCESS)
2521 goto fail_queues;
2522
2523 device->queue_count++;
2524 }
2525 }
2526
2527 if (!anv_use_relocations(physical_device)) {
2528 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
2529 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2530 goto fail_queues;
2531 }
2532
2533 /* keep the page with address zero out of the allocator */
2534 util_vma_heap_init(&device->vma_lo,
2535 LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
2536
2537 util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
2538 CLIENT_VISIBLE_HEAP_SIZE);
2539
2540 /* Leave the last 4GiB out of the high vma range, so that no state
2541 * base address + size can overflow 48 bits. For more information see
2542 * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
2543 */
2544 util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
2545 physical_device->gtt_size - (1ull << 32) -
2546 HIGH_HEAP_MIN_ADDRESS);
2547 }
2548
2549 list_inithead(&device->memory_objects);
2550
2551 /* On Broadwell and later, we can use batch chaining to more efficiently
2552 * implement growing command buffers. Prior to Haswell, the kernel
2553 * command parser gets in the way and we have to fall back to growing
2554 * the batch.
2555 */
2556 device->can_chain_batches = device->info->ver >= 8;
2557
2558 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
2559 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2560 goto fail_vmas;
2561 }
2562
2563 pthread_condattr_t condattr;
2564 if (pthread_condattr_init(&condattr) != 0) {
2565 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2566 goto fail_mutex;
2567 }
2568 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2569 pthread_condattr_destroy(&condattr);
2570 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2571 goto fail_mutex;
2572 }
2573 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
2574 pthread_condattr_destroy(&condattr);
2575 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2576 goto fail_mutex;
2577 }
2578 pthread_condattr_destroy(&condattr);
2579
2580 result = anv_bo_cache_init(&device->bo_cache, device);
2581 if (result != VK_SUCCESS)
2582 goto fail_queue_cond;
2583
2584 anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
2585
2586 /* Because scratch is also relative to General State Base Address, we leave
2587 * the base address 0 and start the pool memory at an offset. This way we
2588 * get the correct offsets in the anv_states that get allocated from it.
2589 */
2590 result = anv_state_pool_init(&device->general_state_pool, device,
2591 "general pool",
2592 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
2593 if (result != VK_SUCCESS)
2594 goto fail_batch_bo_pool;
2595
2596 result = anv_state_pool_init(&device->dynamic_state_pool, device,
2597 "dynamic pool",
2598 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
2599 if (result != VK_SUCCESS)
2600 goto fail_general_state_pool;
2601
2602 if (device->info->ver >= 8) {
2603 /* The border color pointer is limited to 24 bits, so we need to make
2604 * sure that any such color used at any point in the program doesn't
2605 * exceed that limit.
2606 * We achieve that by reserving all the custom border colors we support
2607 * right off the bat, so they are close to the base address.
2608 */
2609 anv_state_reserved_pool_init(&device->custom_border_colors,
2610 &device->dynamic_state_pool,
2611 MAX_CUSTOM_BORDER_COLORS,
2612 sizeof(struct gfx8_border_color), 64);
2613 }
2614
2615 result = anv_state_pool_init(&device->instruction_state_pool, device,
2616 "instruction pool",
2617 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
2618 if (result != VK_SUCCESS)
2619 goto fail_dynamic_state_pool;
2620
2621 result = anv_state_pool_init(&device->surface_state_pool, device,
2622 "surface state pool",
2623 SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
2624 if (result != VK_SUCCESS)
2625 goto fail_instruction_state_pool;
2626
2627 if (!anv_use_relocations(physical_device)) {
2628 int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
2629 (int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
2630 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
2631 result = anv_state_pool_init(&device->binding_table_pool, device,
2632 "binding table pool",
2633 SURFACE_STATE_POOL_MIN_ADDRESS,
2634 bt_pool_offset,
2635 BINDING_TABLE_POOL_BLOCK_SIZE);
2636 }
2637 if (result != VK_SUCCESS)
2638 goto fail_surface_state_pool;
2639
2640 result = anv_device_alloc_bo(device, "workaround", 4096,
2641 ANV_BO_ALLOC_CAPTURE |
2642 ANV_BO_ALLOC_MAPPED,
2643 0 /* explicit_address */,
2644 &device->workaround_bo);
2645 if (result != VK_SUCCESS)
2646 goto fail_binding_table_pool;
2647
2648 device->workaround_address = (struct anv_address) {
2649 .bo = device->workaround_bo,
2650 .offset = align(intel_debug_write_identifiers(device->workaround_bo->map,
2651 device->workaround_bo->size,
2652 "hasvk"), 32),
2653 };
2654
2655 device->workarounds.doom64_images = NULL;
2656
2657 device->debug_frame_desc =
2658 intel_debug_get_identifier_block(device->workaround_bo->map,
2659 device->workaround_bo->size,
2660 INTEL_DEBUG_BLOCK_TYPE_FRAME);
2661
2662 result = anv_device_init_trivial_batch(device);
2663 if (result != VK_SUCCESS)
2664 goto fail_workaround_bo;
2665
2666 /* Allocate a null surface state at surface state offset 0. This makes
2667 * NULL descriptor handling trivial because we can just memset structures
2668 * to zero and they have a valid descriptor.
2669 */
2670 device->null_surface_state =
2671 anv_state_pool_alloc(&device->surface_state_pool,
2672 device->isl_dev.ss.size,
2673 device->isl_dev.ss.align);
2674 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
2675 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
2676 assert(device->null_surface_state.offset == 0);
2677
2678 anv_scratch_pool_init(device, &device->scratch_pool);
2679
2680 result = anv_genX(device->info, init_device_state)(device);
2681 if (result != VK_SUCCESS)
2682 goto fail_trivial_batch_bo_and_scratch_pool;
2683
2684 struct vk_pipeline_cache_create_info pcc_info = { };
2685 device->default_pipeline_cache =
2686 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2687 if (!device->default_pipeline_cache) {
2688 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2689 goto fail_trivial_batch_bo_and_scratch_pool;
2690 }
2691
2692 /* Internal shaders need their own pipeline cache because, unlike the rest
2693 * of ANV, it won't work at all without the cache. It depends on it for
2694 * shaders to remain resident while it runs. Therefore, we need a special
2695 * cache just for BLORP/RT that's forced to always be enabled.
2696 */
2697 pcc_info.force_enable = true;
2698 device->internal_cache =
2699 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
2700 if (device->internal_cache == NULL) {
2701 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2702 goto fail_default_pipeline_cache;
2703 }
2704
2705 device->robust_buffer_access =
2706 device->vk.enabled_features.robustBufferAccess ||
2707 device->vk.enabled_features.nullDescriptor;
2708
2709 anv_device_init_blorp(device);
2710
2711 anv_device_init_border_colors(device);
2712
2713 anv_device_perf_init(device);
2714
2715 anv_device_utrace_init(device);
2716
2717 *pDevice = anv_device_to_handle(device);
2718
2719 return VK_SUCCESS;
2720
2721 fail_default_pipeline_cache:
2722 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2723 fail_trivial_batch_bo_and_scratch_pool:
2724 anv_scratch_pool_finish(device, &device->scratch_pool);
2725 anv_device_release_bo(device, device->trivial_batch_bo);
2726 fail_workaround_bo:
2727 anv_device_release_bo(device, device->workaround_bo);
2728 fail_binding_table_pool:
2729 if (!anv_use_relocations(physical_device))
2730 anv_state_pool_finish(&device->binding_table_pool);
2731 fail_surface_state_pool:
2732 anv_state_pool_finish(&device->surface_state_pool);
2733 fail_instruction_state_pool:
2734 anv_state_pool_finish(&device->instruction_state_pool);
2735 fail_dynamic_state_pool:
2736 if (device->info->ver >= 8)
2737 anv_state_reserved_pool_finish(&device->custom_border_colors);
2738 anv_state_pool_finish(&device->dynamic_state_pool);
2739 fail_general_state_pool:
2740 anv_state_pool_finish(&device->general_state_pool);
2741 fail_batch_bo_pool:
2742 anv_bo_pool_finish(&device->batch_bo_pool);
2743 anv_bo_cache_finish(&device->bo_cache);
2744 fail_queue_cond:
2745 pthread_cond_destroy(&device->queue_submit);
2746 fail_mutex:
2747 pthread_mutex_destroy(&device->mutex);
2748 fail_vmas:
2749 if (!anv_use_relocations(physical_device)) {
2750 util_vma_heap_finish(&device->vma_hi);
2751 util_vma_heap_finish(&device->vma_cva);
2752 util_vma_heap_finish(&device->vma_lo);
2753 }
2754 fail_queues:
2755 for (uint32_t i = 0; i < device->queue_count; i++)
2756 anv_queue_finish(&device->queues[i]);
2757 vk_free(&device->vk.alloc, device->queues);
2758 fail_context_id:
2759 intel_gem_destroy_context(device->fd, device->context_id);
2760 fail_fd:
2761 close(device->fd);
2762 fail_device:
2763 vk_device_finish(&device->vk);
2764 fail_alloc:
2765 vk_free(&device->vk.alloc, device);
2766
2767 return result;
2768 }
2769
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2770 void anv_DestroyDevice(
2771 VkDevice _device,
2772 const VkAllocationCallbacks* pAllocator)
2773 {
2774 ANV_FROM_HANDLE(anv_device, device, _device);
2775
2776 if (!device)
2777 return;
2778
2779 anv_device_utrace_finish(device);
2780
2781 anv_device_finish_blorp(device);
2782
2783 vk_pipeline_cache_destroy(device->internal_cache, NULL);
2784 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
2785
2786 #ifdef HAVE_VALGRIND
2787 /* We only need to free these to prevent valgrind errors. The backing
2788 * BO will go away in a couple of lines so we don't actually leak.
2789 */
2790 if (device->info->ver >= 8)
2791 anv_state_reserved_pool_finish(&device->custom_border_colors);
2792 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
2793 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
2794 #endif
2795
2796 anv_scratch_pool_finish(device, &device->scratch_pool);
2797
2798 anv_device_release_bo(device, device->workaround_bo);
2799 anv_device_release_bo(device, device->trivial_batch_bo);
2800
2801 if (!anv_use_relocations(device->physical))
2802 anv_state_pool_finish(&device->binding_table_pool);
2803 anv_state_pool_finish(&device->surface_state_pool);
2804 anv_state_pool_finish(&device->instruction_state_pool);
2805 anv_state_pool_finish(&device->dynamic_state_pool);
2806 anv_state_pool_finish(&device->general_state_pool);
2807
2808 anv_bo_pool_finish(&device->batch_bo_pool);
2809
2810 anv_bo_cache_finish(&device->bo_cache);
2811
2812 if (!anv_use_relocations(device->physical)) {
2813 util_vma_heap_finish(&device->vma_hi);
2814 util_vma_heap_finish(&device->vma_cva);
2815 util_vma_heap_finish(&device->vma_lo);
2816 }
2817
2818 pthread_cond_destroy(&device->queue_submit);
2819 pthread_mutex_destroy(&device->mutex);
2820
2821 for (uint32_t i = 0; i < device->queue_count; i++)
2822 anv_queue_finish(&device->queues[i]);
2823 vk_free(&device->vk.alloc, device->queues);
2824
2825 intel_gem_destroy_context(device->fd, device->context_id);
2826
2827 if (INTEL_DEBUG(DEBUG_BATCH))
2828 intel_batch_decode_ctx_finish(&device->decoder_ctx);
2829
2830 close(device->fd);
2831
2832 vk_device_finish(&device->vk);
2833 vk_free(&device->vk.alloc, device);
2834 }
2835
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2836 VkResult anv_EnumerateInstanceLayerProperties(
2837 uint32_t* pPropertyCount,
2838 VkLayerProperties* pProperties)
2839 {
2840 if (pProperties == NULL) {
2841 *pPropertyCount = 0;
2842 return VK_SUCCESS;
2843 }
2844
2845 /* None supported at this time */
2846 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2847 }
2848
2849 static VkResult
anv_device_check_status(struct vk_device * vk_device)2850 anv_device_check_status(struct vk_device *vk_device)
2851 {
2852 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
2853
2854 uint32_t active, pending;
2855 int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
2856 &active, &pending);
2857 if (ret == -1) {
2858 /* We don't know the real error. */
2859 return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
2860 }
2861
2862 if (active) {
2863 return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
2864 } else if (pending) {
2865 return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
2866 }
2867
2868 return VK_SUCCESS;
2869 }
2870
2871 VkResult
anv_device_wait(struct anv_device * device,struct anv_bo * bo,int64_t timeout)2872 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
2873 int64_t timeout)
2874 {
2875 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
2876 if (ret == -1 && errno == ETIME) {
2877 return VK_TIMEOUT;
2878 } else if (ret == -1) {
2879 /* We don't know the real error. */
2880 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
2881 } else {
2882 return VK_SUCCESS;
2883 }
2884 }
2885
2886 uint64_t
anv_vma_alloc(struct anv_device * device,uint64_t size,uint64_t align,enum anv_bo_alloc_flags alloc_flags,uint64_t client_address)2887 anv_vma_alloc(struct anv_device *device,
2888 uint64_t size, uint64_t align,
2889 enum anv_bo_alloc_flags alloc_flags,
2890 uint64_t client_address)
2891 {
2892 pthread_mutex_lock(&device->vma_mutex);
2893
2894 uint64_t addr = 0;
2895
2896 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
2897 if (client_address) {
2898 if (util_vma_heap_alloc_addr(&device->vma_cva,
2899 client_address, size)) {
2900 addr = client_address;
2901 }
2902 } else {
2903 addr = util_vma_heap_alloc(&device->vma_cva, size, align);
2904 }
2905 /* We don't want to fall back to other heaps */
2906 goto done;
2907 }
2908
2909 assert(client_address == 0);
2910
2911 if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
2912 addr = util_vma_heap_alloc(&device->vma_hi, size, align);
2913
2914 if (addr == 0)
2915 addr = util_vma_heap_alloc(&device->vma_lo, size, align);
2916
2917 done:
2918 pthread_mutex_unlock(&device->vma_mutex);
2919
2920 assert(addr == intel_48b_address(addr));
2921 return intel_canonical_address(addr);
2922 }
2923
2924 void
anv_vma_free(struct anv_device * device,uint64_t address,uint64_t size)2925 anv_vma_free(struct anv_device *device,
2926 uint64_t address, uint64_t size)
2927 {
2928 const uint64_t addr_48b = intel_48b_address(address);
2929
2930 pthread_mutex_lock(&device->vma_mutex);
2931
2932 if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
2933 addr_48b <= LOW_HEAP_MAX_ADDRESS) {
2934 util_vma_heap_free(&device->vma_lo, addr_48b, size);
2935 } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
2936 addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
2937 util_vma_heap_free(&device->vma_cva, addr_48b, size);
2938 } else {
2939 assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
2940 util_vma_heap_free(&device->vma_hi, addr_48b, size);
2941 }
2942
2943 pthread_mutex_unlock(&device->vma_mutex);
2944 }
2945
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2946 VkResult anv_AllocateMemory(
2947 VkDevice _device,
2948 const VkMemoryAllocateInfo* pAllocateInfo,
2949 const VkAllocationCallbacks* pAllocator,
2950 VkDeviceMemory* pMem)
2951 {
2952 ANV_FROM_HANDLE(anv_device, device, _device);
2953 struct anv_physical_device *pdevice = device->physical;
2954 struct anv_device_memory *mem;
2955 VkResult result = VK_SUCCESS;
2956
2957 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2958
2959 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
2960 assert(pAllocateInfo->allocationSize > 0);
2961
2962 VkDeviceSize aligned_alloc_size =
2963 align64(pAllocateInfo->allocationSize, 4096);
2964
2965 if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
2966 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2967
2968 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
2969 struct anv_memory_type *mem_type =
2970 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
2971 assert(mem_type->heapIndex < pdevice->memory.heap_count);
2972 struct anv_memory_heap *mem_heap =
2973 &pdevice->memory.heaps[mem_type->heapIndex];
2974
2975 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2976 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
2977 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2978
2979 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2980 VK_OBJECT_TYPE_DEVICE_MEMORY);
2981 if (mem == NULL)
2982 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2983
2984 mem->type = mem_type;
2985 mem->map = NULL;
2986 mem->map_size = 0;
2987 mem->map_delta = 0;
2988 mem->ahw = NULL;
2989 mem->host_ptr = NULL;
2990
2991 enum anv_bo_alloc_flags alloc_flags = 0;
2992
2993 const VkExportMemoryAllocateInfo *export_info = NULL;
2994 const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
2995 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2996 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
2997 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
2998 VkMemoryAllocateFlags vk_flags = 0;
2999 uint64_t client_address = 0;
3000
3001 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3002 switch (ext->sType) {
3003 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3004 export_info = (void *)ext;
3005 break;
3006
3007 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3008 ahw_import_info = (void *)ext;
3009 break;
3010
3011 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3012 fd_info = (void *)ext;
3013 break;
3014
3015 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3016 host_ptr_info = (void *)ext;
3017 break;
3018
3019 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3020 const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3021 vk_flags = flags_info->flags;
3022 break;
3023 }
3024
3025 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3026 dedicated_info = (void *)ext;
3027 break;
3028
3029 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
3030 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
3031 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
3032 client_address = addr_info->opaqueCaptureAddress;
3033 break;
3034 }
3035
3036 default:
3037 if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
3038 /* this isn't a real enum value,
3039 * so use conditional to avoid compiler warn
3040 */
3041 vk_debug_ignored_stype(ext->sType);
3042 break;
3043 }
3044 }
3045
3046 if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
3047 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3048
3049 if ((export_info && export_info->handleTypes) ||
3050 (fd_info && fd_info->handleType) ||
3051 (host_ptr_info && host_ptr_info->handleType)) {
3052 /* Anything imported or exported is EXTERNAL */
3053 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3054 }
3055
3056 /* Check if we need to support Android HW buffer export. If so,
3057 * create AHardwareBuffer and import memory from it.
3058 */
3059 bool android_export = false;
3060 if (export_info && export_info->handleTypes &
3061 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3062 android_export = true;
3063
3064 if (ahw_import_info) {
3065 result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3066 if (result != VK_SUCCESS)
3067 goto fail;
3068
3069 goto success;
3070 } else if (android_export) {
3071 result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3072 if (result != VK_SUCCESS)
3073 goto fail;
3074
3075 goto success;
3076 }
3077
3078 /* The Vulkan spec permits handleType to be 0, in which case the struct is
3079 * ignored.
3080 */
3081 if (fd_info && fd_info->handleType) {
3082 /* At the moment, we support only the below handle types. */
3083 assert(fd_info->handleType ==
3084 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3085 fd_info->handleType ==
3086 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3087
3088 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3089 client_address, &mem->bo);
3090 if (result != VK_SUCCESS)
3091 goto fail;
3092
3093 /* For security purposes, we reject importing the bo if it's smaller
3094 * than the requested allocation size. This prevents a malicious client
3095 * from passing a buffer to a trusted client, lying about the size, and
3096 * telling the trusted client to try and texture from an image that goes
3097 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
3098 * in the trusted client. The trusted client can protect itself against
3099 * this sort of attack but only if it can trust the buffer size.
3100 */
3101 if (mem->bo->size < aligned_alloc_size) {
3102 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
3103 "aligned allocationSize too large for "
3104 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3105 "%"PRIu64"B > %"PRIu64"B",
3106 aligned_alloc_size, mem->bo->size);
3107 anv_device_release_bo(device, mem->bo);
3108 goto fail;
3109 }
3110
3111 /* From the Vulkan spec:
3112 *
3113 * "Importing memory from a file descriptor transfers ownership of
3114 * the file descriptor from the application to the Vulkan
3115 * implementation. The application must not perform any operations on
3116 * the file descriptor after a successful import."
3117 *
3118 * If the import fails, we leave the file descriptor open.
3119 */
3120 close(fd_info->fd);
3121 goto success;
3122 }
3123
3124 if (host_ptr_info && host_ptr_info->handleType) {
3125 if (host_ptr_info->handleType ==
3126 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
3127 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3128 goto fail;
3129 }
3130
3131 assert(host_ptr_info->handleType ==
3132 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
3133
3134 result = anv_device_import_bo_from_host_ptr(device,
3135 host_ptr_info->pHostPointer,
3136 pAllocateInfo->allocationSize,
3137 alloc_flags,
3138 client_address,
3139 &mem->bo);
3140 if (result != VK_SUCCESS)
3141 goto fail;
3142
3143 mem->host_ptr = host_ptr_info->pHostPointer;
3144 goto success;
3145 }
3146
3147 /* Regular allocate (not importing memory). */
3148
3149 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
3150 alloc_flags, client_address, &mem->bo);
3151 if (result != VK_SUCCESS)
3152 goto fail;
3153
3154 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
3155 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
3156
3157 /* Some legacy (non-modifiers) consumers need the tiling to be set on
3158 * the BO. In this case, we have a dedicated allocation.
3159 */
3160 if (image->vk.wsi_legacy_scanout) {
3161 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
3162 result = anv_device_set_bo_tiling(device, mem->bo,
3163 surf->row_pitch_B,
3164 surf->tiling);
3165 if (result != VK_SUCCESS) {
3166 anv_device_release_bo(device, mem->bo);
3167 goto fail;
3168 }
3169 }
3170 }
3171
3172 success:
3173 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3174 if (mem_heap_used > mem_heap->size) {
3175 p_atomic_add(&mem_heap->used, -mem->bo->size);
3176 anv_device_release_bo(device, mem->bo);
3177 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3178 "Out of heap memory");
3179 goto fail;
3180 }
3181
3182 pthread_mutex_lock(&device->mutex);
3183 list_addtail(&mem->link, &device->memory_objects);
3184 pthread_mutex_unlock(&device->mutex);
3185
3186 *pMem = anv_device_memory_to_handle(mem);
3187
3188 return VK_SUCCESS;
3189
3190 fail:
3191 vk_object_free(&device->vk, pAllocator, mem);
3192
3193 return result;
3194 }
3195
anv_GetMemoryFdKHR(VkDevice device_h,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3196 VkResult anv_GetMemoryFdKHR(
3197 VkDevice device_h,
3198 const VkMemoryGetFdInfoKHR* pGetFdInfo,
3199 int* pFd)
3200 {
3201 ANV_FROM_HANDLE(anv_device, dev, device_h);
3202 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
3203
3204 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3205
3206 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3207 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3208
3209 return anv_device_export_bo(dev, mem->bo, pFd);
3210 }
3211
anv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3212 VkResult anv_GetMemoryFdPropertiesKHR(
3213 VkDevice _device,
3214 VkExternalMemoryHandleTypeFlagBits handleType,
3215 int fd,
3216 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
3217 {
3218 ANV_FROM_HANDLE(anv_device, device, _device);
3219
3220 switch (handleType) {
3221 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
3222 /* dma-buf can be imported as any memory type */
3223 pMemoryFdProperties->memoryTypeBits =
3224 (1 << device->physical->memory.type_count) - 1;
3225 return VK_SUCCESS;
3226
3227 default:
3228 /* The valid usage section for this function says:
3229 *
3230 * "handleType must not be one of the handle types defined as
3231 * opaque."
3232 *
3233 * So opaque handle types fall into the default "unsupported" case.
3234 */
3235 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
3236 }
3237 }
3238
anv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)3239 VkResult anv_GetMemoryHostPointerPropertiesEXT(
3240 VkDevice _device,
3241 VkExternalMemoryHandleTypeFlagBits handleType,
3242 const void* pHostPointer,
3243 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
3244 {
3245 ANV_FROM_HANDLE(anv_device, device, _device);
3246
3247 assert(pMemoryHostPointerProperties->sType ==
3248 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
3249
3250 switch (handleType) {
3251 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
3252 /* Host memory can be imported as any memory type. */
3253 pMemoryHostPointerProperties->memoryTypeBits =
3254 (1ull << device->physical->memory.type_count) - 1;
3255
3256 return VK_SUCCESS;
3257
3258 default:
3259 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
3260 }
3261 }
3262
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3263 void anv_FreeMemory(
3264 VkDevice _device,
3265 VkDeviceMemory _mem,
3266 const VkAllocationCallbacks* pAllocator)
3267 {
3268 ANV_FROM_HANDLE(anv_device, device, _device);
3269 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
3270
3271 if (mem == NULL)
3272 return;
3273
3274 pthread_mutex_lock(&device->mutex);
3275 list_del(&mem->link);
3276 pthread_mutex_unlock(&device->mutex);
3277
3278 if (mem->map)
3279 anv_UnmapMemory(_device, _mem);
3280
3281 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
3282 -mem->bo->size);
3283
3284 anv_device_release_bo(device, mem->bo);
3285
3286 #if DETECT_OS_ANDROID && ANDROID_API_LEVEL >= 26
3287 if (mem->ahw)
3288 AHardwareBuffer_release(mem->ahw);
3289 #endif
3290
3291 vk_object_free(&device->vk, pAllocator, mem);
3292 }
3293
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)3294 VkResult anv_MapMemory(
3295 VkDevice _device,
3296 VkDeviceMemory _memory,
3297 VkDeviceSize offset,
3298 VkDeviceSize size,
3299 VkMemoryMapFlags flags,
3300 void** ppData)
3301 {
3302 ANV_FROM_HANDLE(anv_device, device, _device);
3303 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3304
3305 if (mem == NULL) {
3306 *ppData = NULL;
3307 return VK_SUCCESS;
3308 }
3309
3310 if (mem->host_ptr) {
3311 *ppData = mem->host_ptr + offset;
3312 return VK_SUCCESS;
3313 }
3314
3315 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3316 *
3317 * * memory must have been created with a memory type that reports
3318 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
3319 */
3320 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
3321 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3322 "Memory object not mappable.");
3323 }
3324
3325 if (size == VK_WHOLE_SIZE)
3326 size = mem->bo->size - offset;
3327
3328 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
3329 *
3330 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
3331 * assert(size != 0);
3332 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
3333 * equal to the size of the memory minus offset
3334 */
3335 assert(size > 0);
3336 assert(offset + size <= mem->bo->size);
3337
3338 if (size != (size_t)size) {
3339 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3340 "requested size 0x%"PRIx64" does not fit in %u bits",
3341 size, (unsigned)(sizeof(size_t) * 8));
3342 }
3343
3344 /* From the Vulkan 1.2.194 spec:
3345 *
3346 * "memory must not be currently host mapped"
3347 */
3348 if (mem->map != NULL) {
3349 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
3350 "Memory object already mapped.");
3351 }
3352
3353 uint32_t gem_flags = 0;
3354
3355 if (!device->info->has_llc &&
3356 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
3357 gem_flags |= I915_MMAP_WC;
3358
3359 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
3360 uint64_t map_offset;
3361 if (!device->physical->info.has_mmap_offset)
3362 map_offset = offset & ~4095ull;
3363 else
3364 map_offset = 0;
3365 assert(offset >= map_offset);
3366 uint64_t map_size = (offset + size) - map_offset;
3367
3368 /* Let's map whole pages */
3369 map_size = align64(map_size, 4096);
3370
3371 void *map;
3372 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
3373 map_size, gem_flags, &map);
3374 if (result != VK_SUCCESS)
3375 return result;
3376
3377 mem->map = map;
3378 mem->map_size = map_size;
3379 mem->map_delta = (offset - map_offset);
3380 *ppData = mem->map + mem->map_delta;
3381
3382 return VK_SUCCESS;
3383 }
3384
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)3385 void anv_UnmapMemory(
3386 VkDevice _device,
3387 VkDeviceMemory _memory)
3388 {
3389 ANV_FROM_HANDLE(anv_device, device, _device);
3390 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
3391
3392 if (mem == NULL || mem->host_ptr)
3393 return;
3394
3395 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
3396
3397 mem->map = NULL;
3398 mem->map_size = 0;
3399 mem->map_delta = 0;
3400 }
3401
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3402 VkResult anv_FlushMappedMemoryRanges(
3403 VkDevice _device,
3404 uint32_t memoryRangeCount,
3405 const VkMappedMemoryRange* pMemoryRanges)
3406 {
3407 ANV_FROM_HANDLE(anv_device, device, _device);
3408
3409 if (!device->physical->memory.need_flush)
3410 return VK_SUCCESS;
3411
3412 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3413 /* Make sure the writes we're flushing have landed. */
3414 __builtin_ia32_mfence();
3415 #endif
3416
3417 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3418 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3419 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3420 continue;
3421
3422 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3423 if (map_offset >= mem->map_size)
3424 continue;
3425
3426 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3427 intel_flush_range(mem->map + map_offset,
3428 MIN2(pMemoryRanges[i].size,
3429 mem->map_size - map_offset));
3430 #endif
3431 }
3432
3433 return VK_SUCCESS;
3434 }
3435
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3436 VkResult anv_InvalidateMappedMemoryRanges(
3437 VkDevice _device,
3438 uint32_t memoryRangeCount,
3439 const VkMappedMemoryRange* pMemoryRanges)
3440 {
3441 ANV_FROM_HANDLE(anv_device, device, _device);
3442
3443 if (!device->physical->memory.need_flush)
3444 return VK_SUCCESS;
3445
3446 for (uint32_t i = 0; i < memoryRangeCount; i++) {
3447 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
3448 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3449 continue;
3450
3451 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
3452 if (map_offset >= mem->map_size)
3453 continue;
3454
3455 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3456 intel_invalidate_range(mem->map + map_offset,
3457 MIN2(pMemoryRanges[i].size,
3458 mem->map_size - map_offset));
3459 #endif
3460 }
3461
3462 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3463 /* Make sure no reads get moved up above the invalidate. */
3464 __builtin_ia32_mfence();
3465 #endif
3466
3467 return VK_SUCCESS;
3468 }
3469
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3470 void anv_GetDeviceMemoryCommitment(
3471 VkDevice device,
3472 VkDeviceMemory memory,
3473 VkDeviceSize* pCommittedMemoryInBytes)
3474 {
3475 *pCommittedMemoryInBytes = 0;
3476 }
3477
3478 static void
anv_bind_buffer_memory(const VkBindBufferMemoryInfo * pBindInfo)3479 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
3480 {
3481 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
3482 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
3483
3484 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
3485
3486 if (mem) {
3487 assert(pBindInfo->memoryOffset < mem->bo->size);
3488 assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
3489 buffer->address = (struct anv_address) {
3490 .bo = mem->bo,
3491 .offset = pBindInfo->memoryOffset,
3492 };
3493 } else {
3494 buffer->address = ANV_NULL_ADDRESS;
3495 }
3496 }
3497
anv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)3498 VkResult anv_BindBufferMemory2(
3499 VkDevice device,
3500 uint32_t bindInfoCount,
3501 const VkBindBufferMemoryInfo* pBindInfos)
3502 {
3503 for (uint32_t i = 0; i < bindInfoCount; i++)
3504 anv_bind_buffer_memory(&pBindInfos[i]);
3505
3506 return VK_SUCCESS;
3507 }
3508
anv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)3509 VkResult anv_QueueBindSparse(
3510 VkQueue _queue,
3511 uint32_t bindInfoCount,
3512 const VkBindSparseInfo* pBindInfo,
3513 VkFence fence)
3514 {
3515 ANV_FROM_HANDLE(anv_queue, queue, _queue);
3516 if (vk_device_is_lost(&queue->device->vk))
3517 return VK_ERROR_DEVICE_LOST;
3518
3519 return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
3520 }
3521
3522 // Event functions
3523
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)3524 VkResult anv_CreateEvent(
3525 VkDevice _device,
3526 const VkEventCreateInfo* pCreateInfo,
3527 const VkAllocationCallbacks* pAllocator,
3528 VkEvent* pEvent)
3529 {
3530 ANV_FROM_HANDLE(anv_device, device, _device);
3531 struct anv_event *event;
3532
3533 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
3534
3535 event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
3536 VK_OBJECT_TYPE_EVENT);
3537 if (event == NULL)
3538 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3539
3540 event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
3541 sizeof(uint64_t), 8);
3542 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3543
3544 *pEvent = anv_event_to_handle(event);
3545
3546 return VK_SUCCESS;
3547 }
3548
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3549 void anv_DestroyEvent(
3550 VkDevice _device,
3551 VkEvent _event,
3552 const VkAllocationCallbacks* pAllocator)
3553 {
3554 ANV_FROM_HANDLE(anv_device, device, _device);
3555 ANV_FROM_HANDLE(anv_event, event, _event);
3556
3557 if (!event)
3558 return;
3559
3560 anv_state_pool_free(&device->dynamic_state_pool, event->state);
3561
3562 vk_object_free(&device->vk, pAllocator, event);
3563 }
3564
anv_GetEventStatus(VkDevice _device,VkEvent _event)3565 VkResult anv_GetEventStatus(
3566 VkDevice _device,
3567 VkEvent _event)
3568 {
3569 ANV_FROM_HANDLE(anv_device, device, _device);
3570 ANV_FROM_HANDLE(anv_event, event, _event);
3571
3572 if (vk_device_is_lost(&device->vk))
3573 return VK_ERROR_DEVICE_LOST;
3574
3575 return *(uint64_t *)event->state.map;
3576 }
3577
anv_SetEvent(VkDevice _device,VkEvent _event)3578 VkResult anv_SetEvent(
3579 VkDevice _device,
3580 VkEvent _event)
3581 {
3582 ANV_FROM_HANDLE(anv_event, event, _event);
3583
3584 *(uint64_t *)event->state.map = VK_EVENT_SET;
3585
3586 return VK_SUCCESS;
3587 }
3588
anv_ResetEvent(VkDevice _device,VkEvent _event)3589 VkResult anv_ResetEvent(
3590 VkDevice _device,
3591 VkEvent _event)
3592 {
3593 ANV_FROM_HANDLE(anv_event, event, _event);
3594
3595 *(uint64_t *)event->state.map = VK_EVENT_RESET;
3596
3597 return VK_SUCCESS;
3598 }
3599
3600 // Buffer functions
3601
3602 static void
anv_get_buffer_memory_requirements(struct anv_device * device,VkDeviceSize size,VkBufferUsageFlags usage,VkMemoryRequirements2 * pMemoryRequirements)3603 anv_get_buffer_memory_requirements(struct anv_device *device,
3604 VkDeviceSize size,
3605 VkBufferUsageFlags usage,
3606 VkMemoryRequirements2* pMemoryRequirements)
3607 {
3608 /* The Vulkan spec (git aaed022) says:
3609 *
3610 * memoryTypeBits is a bitfield and contains one bit set for every
3611 * supported memory type for the resource. The bit `1<<i` is set if and
3612 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
3613 * structure for the physical device is supported.
3614 */
3615 uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
3616
3617 /* Base alignment requirement of a cache line */
3618 uint32_t alignment = 16;
3619
3620 if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
3621 alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
3622
3623 pMemoryRequirements->memoryRequirements.size = size;
3624 pMemoryRequirements->memoryRequirements.alignment = alignment;
3625
3626 /* Storage and Uniform buffers should have their size aligned to
3627 * 32-bits to avoid boundary checks when last DWord is not complete.
3628 * This would ensure that not internal padding would be needed for
3629 * 16-bit types.
3630 */
3631 if (device->vk.enabled_features.robustBufferAccess &&
3632 (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
3633 usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
3634 pMemoryRequirements->memoryRequirements.size = align64(size, 4);
3635
3636 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3637
3638 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
3639 switch (ext->sType) {
3640 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
3641 VkMemoryDedicatedRequirements *requirements = (void *)ext;
3642 requirements->prefersDedicatedAllocation = false;
3643 requirements->requiresDedicatedAllocation = false;
3644 break;
3645 }
3646
3647 default:
3648 vk_debug_ignored_stype(ext->sType);
3649 break;
3650 }
3651 }
3652 }
3653
anv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3654 void anv_GetBufferMemoryRequirements2(
3655 VkDevice _device,
3656 const VkBufferMemoryRequirementsInfo2* pInfo,
3657 VkMemoryRequirements2* pMemoryRequirements)
3658 {
3659 ANV_FROM_HANDLE(anv_device, device, _device);
3660 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3661
3662 anv_get_buffer_memory_requirements(device,
3663 buffer->vk.size,
3664 buffer->vk.usage,
3665 pMemoryRequirements);
3666 }
3667
anv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3668 void anv_GetDeviceBufferMemoryRequirements(
3669 VkDevice _device,
3670 const VkDeviceBufferMemoryRequirements* pInfo,
3671 VkMemoryRequirements2* pMemoryRequirements)
3672 {
3673 ANV_FROM_HANDLE(anv_device, device, _device);
3674
3675 anv_get_buffer_memory_requirements(device,
3676 pInfo->pCreateInfo->size,
3677 pInfo->pCreateInfo->usage,
3678 pMemoryRequirements);
3679 }
3680
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3681 VkResult anv_CreateBuffer(
3682 VkDevice _device,
3683 const VkBufferCreateInfo* pCreateInfo,
3684 const VkAllocationCallbacks* pAllocator,
3685 VkBuffer* pBuffer)
3686 {
3687 ANV_FROM_HANDLE(anv_device, device, _device);
3688 struct anv_buffer *buffer;
3689
3690 /* Don't allow creating buffers bigger than our address space. The real
3691 * issue here is that we may align up the buffer size and we don't want
3692 * doing so to cause roll-over. However, no one has any business
3693 * allocating a buffer larger than our GTT size.
3694 */
3695 if (pCreateInfo->size > device->physical->gtt_size)
3696 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3697
3698 buffer = vk_buffer_create(&device->vk, pCreateInfo,
3699 pAllocator, sizeof(*buffer));
3700 if (buffer == NULL)
3701 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3702
3703 buffer->address = ANV_NULL_ADDRESS;
3704
3705 *pBuffer = anv_buffer_to_handle(buffer);
3706
3707 return VK_SUCCESS;
3708 }
3709
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3710 void anv_DestroyBuffer(
3711 VkDevice _device,
3712 VkBuffer _buffer,
3713 const VkAllocationCallbacks* pAllocator)
3714 {
3715 ANV_FROM_HANDLE(anv_device, device, _device);
3716 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
3717
3718 if (!buffer)
3719 return;
3720
3721 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3722 }
3723
anv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3724 VkDeviceAddress anv_GetBufferDeviceAddress(
3725 VkDevice device,
3726 const VkBufferDeviceAddressInfo* pInfo)
3727 {
3728 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
3729
3730 assert(!anv_address_is_null(buffer->address));
3731 assert(anv_bo_is_pinned(buffer->address.bo));
3732
3733 return anv_address_physical(buffer->address);
3734 }
3735
anv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3736 uint64_t anv_GetBufferOpaqueCaptureAddress(
3737 VkDevice device,
3738 const VkBufferDeviceAddressInfo* pInfo)
3739 {
3740 return 0;
3741 }
3742
anv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3743 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
3744 VkDevice device,
3745 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3746 {
3747 ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
3748
3749 assert(anv_bo_is_pinned(memory->bo));
3750 assert(memory->bo->has_client_visible_address);
3751
3752 return intel_48b_address(memory->bo->offset);
3753 }
3754
3755 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,struct isl_swizzle swizzle,isl_surf_usage_flags_t usage,struct anv_address address,uint32_t range,uint32_t stride)3756 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
3757 enum isl_format format,
3758 struct isl_swizzle swizzle,
3759 isl_surf_usage_flags_t usage,
3760 struct anv_address address,
3761 uint32_t range, uint32_t stride)
3762 {
3763 isl_buffer_fill_state(&device->isl_dev, state.map,
3764 .address = anv_address_physical(address),
3765 .mocs = isl_mocs(&device->isl_dev, usage,
3766 address.bo && address.bo->is_external),
3767 .size_B = range,
3768 .format = format,
3769 .swizzle = swizzle,
3770 .stride_B = stride);
3771 }
3772
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3773 void anv_DestroySampler(
3774 VkDevice _device,
3775 VkSampler _sampler,
3776 const VkAllocationCallbacks* pAllocator)
3777 {
3778 ANV_FROM_HANDLE(anv_device, device, _device);
3779 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
3780
3781 if (!sampler)
3782 return;
3783
3784 if (sampler->bindless_state.map) {
3785 anv_state_pool_free(&device->dynamic_state_pool,
3786 sampler->bindless_state);
3787 }
3788
3789 if (sampler->custom_border_color.map) {
3790 anv_state_reserved_pool_free(&device->custom_border_colors,
3791 sampler->custom_border_color);
3792 }
3793
3794 vk_object_free(&device->vk, pAllocator, sampler);
3795 }
3796
3797 static const VkTimeDomainEXT anv_time_domains[] = {
3798 VK_TIME_DOMAIN_DEVICE_EXT,
3799 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
3800 #ifdef CLOCK_MONOTONIC_RAW
3801 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
3802 #endif
3803 };
3804
anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)3805 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
3806 VkPhysicalDevice physicalDevice,
3807 uint32_t *pTimeDomainCount,
3808 VkTimeDomainEXT *pTimeDomains)
3809 {
3810 int d;
3811 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
3812
3813 for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
3814 vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
3815 *i = anv_time_domains[d];
3816 }
3817 }
3818
3819 return vk_outarray_status(&out);
3820 }
3821
3822 static uint64_t
anv_clock_gettime(clockid_t clock_id)3823 anv_clock_gettime(clockid_t clock_id)
3824 {
3825 struct timespec current;
3826 int ret;
3827
3828 ret = clock_gettime(clock_id, ¤t);
3829 #ifdef CLOCK_MONOTONIC_RAW
3830 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
3831 ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
3832 #endif
3833 if (ret < 0)
3834 return 0;
3835
3836 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
3837 }
3838
anv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)3839 VkResult anv_GetCalibratedTimestampsEXT(
3840 VkDevice _device,
3841 uint32_t timestampCount,
3842 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
3843 uint64_t *pTimestamps,
3844 uint64_t *pMaxDeviation)
3845 {
3846 ANV_FROM_HANDLE(anv_device, device, _device);
3847 uint64_t timestamp_frequency = device->info->timestamp_frequency;
3848 int d;
3849 uint64_t begin, end;
3850 uint64_t max_clock_period = 0;
3851
3852 #ifdef CLOCK_MONOTONIC_RAW
3853 begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3854 #else
3855 begin = anv_clock_gettime(CLOCK_MONOTONIC);
3856 #endif
3857
3858 for (d = 0; d < timestampCount; d++) {
3859 switch (pTimestampInfos[d].timeDomain) {
3860 case VK_TIME_DOMAIN_DEVICE_EXT:
3861 if (!intel_gem_read_render_timestamp(device->fd,
3862 device->info->kmd_type,
3863 &pTimestamps[d])) {
3864 return vk_device_set_lost(&device->vk, "Failed to read the "
3865 "TIMESTAMP register: %m");
3866 }
3867 uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
3868 max_clock_period = MAX2(max_clock_period, device_period);
3869 break;
3870 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
3871 pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
3872 max_clock_period = MAX2(max_clock_period, 1);
3873 break;
3874
3875 #ifdef CLOCK_MONOTONIC_RAW
3876 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
3877 pTimestamps[d] = begin;
3878 break;
3879 #endif
3880 default:
3881 pTimestamps[d] = 0;
3882 break;
3883 }
3884 }
3885
3886 #ifdef CLOCK_MONOTONIC_RAW
3887 end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
3888 #else
3889 end = anv_clock_gettime(CLOCK_MONOTONIC);
3890 #endif
3891
3892 /*
3893 * The maximum deviation is the sum of the interval over which we
3894 * perform the sampling and the maximum period of any sampled
3895 * clock. That's because the maximum skew between any two sampled
3896 * clock edges is when the sampled clock with the largest period is
3897 * sampled at the end of that period but right at the beginning of the
3898 * sampling interval and some other clock is sampled right at the
3899 * beginning of its sampling period and right at the end of the
3900 * sampling interval. Let's assume the GPU has the longest clock
3901 * period and that the application is sampling GPU and monotonic:
3902 *
3903 * s e
3904 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
3905 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3906 *
3907 * g
3908 * 0 1 2 3
3909 * GPU -----_____-----_____-----_____-----_____
3910 *
3911 * m
3912 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
3913 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
3914 *
3915 * Interval <----------------->
3916 * Deviation <-------------------------->
3917 *
3918 * s = read(raw) 2
3919 * g = read(GPU) 1
3920 * m = read(monotonic) 2
3921 * e = read(raw) b
3922 *
3923 * We round the sample interval up by one tick to cover sampling error
3924 * in the interval clock
3925 */
3926
3927 uint64_t sample_interval = end - begin + 1;
3928
3929 *pMaxDeviation = sample_interval + max_clock_period;
3930
3931 return VK_SUCCESS;
3932 }
3933
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3934 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
3935 VkPhysicalDevice physicalDevice,
3936 VkSampleCountFlagBits samples,
3937 VkMultisamplePropertiesEXT* pMultisampleProperties)
3938 {
3939 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3940
3941 assert(pMultisampleProperties->sType ==
3942 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
3943
3944 VkExtent2D grid_size;
3945 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
3946 grid_size.width = 1;
3947 grid_size.height = 1;
3948 } else {
3949 grid_size.width = 0;
3950 grid_size.height = 0;
3951 }
3952 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
3953
3954 vk_foreach_struct(ext, pMultisampleProperties->pNext)
3955 vk_debug_ignored_stype(ext->sType);
3956 }
3957