1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7 #include "hk_physical_device.h"
8
9 #include "asahi/lib/agx_device.h"
10 #include "asahi/lib/agx_nir_lower_vbo.h"
11 #include "asahi/lib/agx_nir_passes.h"
12 #include "util/disk_cache.h"
13 #include "util/mesa-sha1.h"
14 #include "git_sha1.h"
15 #include "hk_buffer.h"
16 #include "hk_entrypoints.h"
17 #include "hk_image.h"
18 #include "hk_instance.h"
19 #include "hk_private.h"
20 #include "hk_shader.h"
21 #include "hk_wsi.h"
22
23 #include "util/u_debug.h"
24 #include "vulkan/vulkan_core.h"
25 #include "vulkan/wsi/wsi_common.h"
26 #include "vk_device.h"
27 #include "vk_drm_syncobj.h"
28 #include "vk_shader_module.h"
29
30 #include <fcntl.h>
31 #include <string.h>
32 #include <xf86drm.h>
33 #include <sys/stat.h>
34 #include <sys/sysmacros.h>
35
36 static uint32_t
hk_get_vk_version()37 hk_get_vk_version()
38 {
39 /* Version override takes priority */
40 const uint32_t version_override = vk_get_version_override();
41 if (version_override)
42 return version_override;
43
44 return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
45 }
46
47 static void
hk_get_device_extensions(const struct hk_instance * instance,struct vk_device_extension_table * ext)48 hk_get_device_extensions(const struct hk_instance *instance,
49 struct vk_device_extension_table *ext)
50 {
51 *ext = (struct vk_device_extension_table){
52 .KHR_8bit_storage = true,
53 .KHR_16bit_storage = true,
54 .KHR_bind_memory2 = true,
55 .KHR_buffer_device_address = true,
56 .KHR_calibrated_timestamps = false,
57 .KHR_copy_commands2 = true,
58 .KHR_create_renderpass2 = true,
59 .KHR_dedicated_allocation = true,
60 .KHR_depth_stencil_resolve = true,
61 .KHR_descriptor_update_template = true,
62 .KHR_device_group = true,
63 .KHR_draw_indirect_count = true,
64 .KHR_driver_properties = true,
65 .KHR_dynamic_rendering = true,
66 // TODO
67 .KHR_dynamic_rendering_local_read = false,
68 .KHR_external_fence = true,
69 .KHR_external_fence_fd = true,
70 .KHR_external_memory = true,
71 .KHR_external_memory_fd = true,
72 /* XXX: External timeline semaphores maybe broken in kernel, see
73 * dEQP-VK.synchronization.signal_order.shared_timeline_semaphore.write_copy_buffer_to_image_read_image_compute.image_128_r32_uint_opaque_fd
74 */
75 .KHR_external_semaphore = false,
76 .KHR_external_semaphore_fd = false,
77 .KHR_format_feature_flags2 = true,
78 .KHR_fragment_shader_barycentric = false,
79 .KHR_get_memory_requirements2 = true,
80 .KHR_global_priority = true,
81 .KHR_image_format_list = true,
82 .KHR_imageless_framebuffer = true,
83 #ifdef HK_USE_WSI_PLATFORM
84 .KHR_incremental_present = true,
85 #endif
86 .KHR_index_type_uint8 = true,
87 .KHR_line_rasterization = true,
88 .KHR_load_store_op_none = true,
89 .KHR_maintenance1 = true,
90 .KHR_maintenance2 = true,
91 .KHR_maintenance3 = true,
92 .KHR_maintenance4 = true,
93 .KHR_maintenance5 = true,
94 .KHR_maintenance6 = true,
95 .KHR_map_memory2 = true,
96 .KHR_multiview = true,
97 .KHR_pipeline_executable_properties = true,
98 .KHR_pipeline_library = true,
99 .KHR_push_descriptor = true,
100 .KHR_relaxed_block_layout = true,
101 .KHR_sampler_mirror_clamp_to_edge = true,
102 .KHR_sampler_ycbcr_conversion = true,
103 .KHR_separate_depth_stencil_layouts = true,
104 .KHR_shader_atomic_int64 = false,
105 .KHR_shader_clock = false,
106 .KHR_shader_draw_parameters = true,
107 .KHR_shader_expect_assume = true,
108 .KHR_shader_float_controls = true,
109 // TODO: wait for nvk
110 .KHR_shader_float_controls2 = true,
111 .KHR_shader_float16_int8 = true,
112 .KHR_shader_integer_dot_product = true,
113 .KHR_shader_maximal_reconvergence = true,
114 .KHR_shader_non_semantic_info = true,
115 .KHR_shader_relaxed_extended_instruction = true,
116 .KHR_shader_subgroup_extended_types = true,
117 .KHR_shader_subgroup_rotate = true,
118 .KHR_shader_subgroup_uniform_control_flow = true,
119 .KHR_shader_terminate_invocation = true,
120 .KHR_spirv_1_4 = true,
121 .KHR_storage_buffer_storage_class = true,
122 .KHR_timeline_semaphore = true,
123 #ifdef HK_USE_WSI_PLATFORM
124 .KHR_swapchain = true,
125 .KHR_swapchain_mutable_format = true,
126 #endif
127 .KHR_synchronization2 = true,
128 .KHR_uniform_buffer_standard_layout = true,
129 .KHR_variable_pointers = true,
130 .KHR_vertex_attribute_divisor = true,
131 .KHR_vulkan_memory_model = true,
132 .KHR_workgroup_memory_explicit_layout = true,
133 .KHR_zero_initialize_workgroup_memory = true,
134 .EXT_4444_formats = true,
135 .EXT_attachment_feedback_loop_layout = true,
136 .EXT_border_color_swizzle = true,
137 .EXT_buffer_device_address = true,
138 .EXT_calibrated_timestamps = false,
139 .EXT_conditional_rendering = false,
140 .EXT_color_write_enable = true,
141 .EXT_custom_border_color = true,
142 .EXT_depth_bias_control = false,
143 .EXT_depth_clip_control = false,
144 .EXT_depth_clip_enable = true,
145 .EXT_descriptor_indexing = true,
146 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
147 .EXT_display_control = false,
148 #endif
149 .EXT_dynamic_rendering_unused_attachments = true,
150 .EXT_extended_dynamic_state = true,
151 .EXT_extended_dynamic_state2 = true,
152 .EXT_extended_dynamic_state3 = true,
153 .EXT_external_memory_dma_buf = true,
154 // TODO
155 .EXT_global_priority = false,
156 // TODO
157 .EXT_global_priority_query = false,
158 .EXT_graphics_pipeline_library = true,
159 .EXT_host_query_reset = true,
160 .EXT_host_image_copy = true,
161 .EXT_image_2d_view_of_3d = true,
162 .EXT_image_robustness = true,
163 .EXT_image_sliced_view_of_3d = false,
164 .EXT_image_view_min_lod = false,
165 .EXT_index_type_uint8 = true,
166 .EXT_inline_uniform_block = true,
167 .EXT_line_rasterization = true,
168 .EXT_load_store_op_none = true,
169 .EXT_map_memory_placed = false,
170 .EXT_memory_budget = false,
171 .EXT_multi_draw = true,
172 .EXT_mutable_descriptor_type = true,
173 .EXT_non_seamless_cube_map = true,
174 .EXT_pipeline_creation_cache_control = true,
175 .EXT_pipeline_creation_feedback = true,
176 .EXT_pipeline_protected_access = true,
177 .EXT_pipeline_robustness = true,
178 .EXT_physical_device_drm = true,
179 .EXT_primitive_topology_list_restart = true,
180 .EXT_private_data = true,
181 .EXT_primitives_generated_query = false,
182 .EXT_provoking_vertex = true,
183 .EXT_robustness2 = true,
184 .EXT_sample_locations = true,
185 .EXT_sampler_filter_minmax = false,
186 .EXT_scalar_block_layout = true,
187 .EXT_separate_stencil_usage = true,
188 .EXT_shader_image_atomic_int64 = false,
189 .EXT_shader_demote_to_helper_invocation = true,
190 .EXT_shader_module_identifier = true,
191 .EXT_shader_object = true,
192 .EXT_shader_replicated_composites = true,
193 .EXT_shader_stencil_export = true,
194 .EXT_shader_subgroup_ballot = true,
195 .EXT_shader_subgroup_vote = true,
196 .EXT_shader_viewport_index_layer = true,
197 .EXT_subgroup_size_control = true,
198 #ifdef HK_USE_WSI_PLATFORM
199 .EXT_swapchain_maintenance1 = true,
200 #endif
201 .EXT_texel_buffer_alignment = true,
202 .EXT_tooling_info = true,
203 .EXT_transform_feedback = true,
204 .EXT_vertex_attribute_divisor = true,
205 .EXT_vertex_input_dynamic_state = true,
206 .EXT_ycbcr_2plane_444_formats = false,
207 .EXT_ycbcr_image_arrays = false,
208 .GOOGLE_decorate_string = true,
209 .GOOGLE_hlsl_functionality1 = true,
210 .GOOGLE_user_type = true,
211 .VALVE_mutable_descriptor_type = true,
212 };
213 }
214
215 static void
hk_get_device_features(const struct vk_device_extension_table * supported_extensions,struct vk_features * features)216 hk_get_device_features(
217 const struct vk_device_extension_table *supported_extensions,
218 struct vk_features *features)
219 {
220 *features = (struct vk_features){
221 /* Vulkan 1.0 */
222 .robustBufferAccess = true,
223 .fullDrawIndexUint32 = true,
224 .imageCubeArray = true,
225 .independentBlend = true,
226 .geometryShader = true,
227 .tessellationShader = true,
228 .sampleRateShading = true,
229 .dualSrcBlend = true,
230 .logicOp = true,
231 .multiDrawIndirect = true,
232 .drawIndirectFirstInstance = true,
233 .depthClamp = true,
234 .depthBiasClamp = true,
235 .fillModeNonSolid = true,
236 .depthBounds = false,
237 .wideLines = true,
238 .largePoints = true,
239 .alphaToOne = true,
240 .multiViewport = true,
241 .samplerAnisotropy = true,
242 .textureCompressionETC2 = false,
243 .textureCompressionBC = true,
244 .textureCompressionASTC_LDR = false,
245 .occlusionQueryPrecise = true,
246 .pipelineStatisticsQuery = true,
247 .vertexPipelineStoresAndAtomics = true,
248 .fragmentStoresAndAtomics = true,
249 .shaderTessellationAndGeometryPointSize = true,
250 .shaderImageGatherExtended = true,
251 .shaderStorageImageExtendedFormats = true,
252 /* TODO: hitting the vertex shader timeout in CTS, but should work */
253 .shaderStorageImageMultisample = false,
254 .shaderStorageImageReadWithoutFormat = true,
255 .shaderStorageImageWriteWithoutFormat = true,
256 .shaderUniformBufferArrayDynamicIndexing = true,
257 .shaderSampledImageArrayDynamicIndexing = true,
258 .shaderStorageBufferArrayDynamicIndexing = true,
259 .shaderStorageImageArrayDynamicIndexing = true,
260 .shaderClipDistance = true,
261 .shaderCullDistance = true,
262 .shaderFloat64 = false,
263 .shaderInt64 = true,
264 .shaderInt16 = true,
265 .shaderResourceResidency = false,
266 .shaderResourceMinLod = true,
267 .sparseBinding = false,
268 .sparseResidency2Samples = false,
269 .sparseResidency4Samples = false,
270 .sparseResidency8Samples = false,
271 .sparseResidencyAliased = false,
272 .sparseResidencyBuffer = false,
273 .sparseResidencyImage2D = false,
274 .sparseResidencyImage3D = false,
275 .variableMultisampleRate = false,
276 .inheritedQueries = true,
277
278 /* Vulkan 1.1 */
279 .storageBuffer16BitAccess = true,
280 .uniformAndStorageBuffer16BitAccess = true,
281 .storagePushConstant16 = true,
282 .storageInputOutput16 = false,
283 .multiview = true,
284 .multiviewGeometryShader = false,
285 .multiviewTessellationShader = false,
286 .variablePointersStorageBuffer = true,
287 .variablePointers = true,
288 .shaderDrawParameters = true,
289 .samplerYcbcrConversion = true,
290
291 /* Vulkan 1.2 */
292 .samplerMirrorClampToEdge = true,
293 .drawIndirectCount = true,
294 .storageBuffer8BitAccess = true,
295 .uniformAndStorageBuffer8BitAccess = true,
296 .storagePushConstant8 = true,
297 .shaderBufferInt64Atomics = false,
298 .shaderSharedInt64Atomics = false,
299 .shaderFloat16 = true,
300 .shaderInt8 = true,
301 .descriptorIndexing = true,
302 .shaderInputAttachmentArrayDynamicIndexing = true,
303 .shaderUniformTexelBufferArrayDynamicIndexing = true,
304 .shaderStorageTexelBufferArrayDynamicIndexing = true,
305 .shaderUniformBufferArrayNonUniformIndexing = true,
306 .shaderSampledImageArrayNonUniformIndexing = true,
307 .shaderStorageBufferArrayNonUniformIndexing = true,
308 .shaderStorageImageArrayNonUniformIndexing = true,
309 .shaderInputAttachmentArrayNonUniformIndexing = true,
310 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
311 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
312 .descriptorBindingUniformBufferUpdateAfterBind = true,
313 .descriptorBindingSampledImageUpdateAfterBind = true,
314 .descriptorBindingStorageImageUpdateAfterBind = true,
315 .descriptorBindingStorageBufferUpdateAfterBind = true,
316 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
317 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
318 .descriptorBindingUpdateUnusedWhilePending = true,
319 .descriptorBindingPartiallyBound = true,
320 .descriptorBindingVariableDescriptorCount = true,
321 .runtimeDescriptorArray = true,
322 .samplerFilterMinmax = false,
323 .scalarBlockLayout = true,
324 .imagelessFramebuffer = true,
325 .uniformBufferStandardLayout = true,
326 .shaderSubgroupExtendedTypes = true,
327 .separateDepthStencilLayouts = true,
328 .hostQueryReset = true,
329 .timelineSemaphore = true,
330 .bufferDeviceAddress = true,
331 .bufferDeviceAddressCaptureReplay = false,
332 .bufferDeviceAddressMultiDevice = false,
333 .vulkanMemoryModel = true,
334 .vulkanMemoryModelDeviceScope = true,
335 .vulkanMemoryModelAvailabilityVisibilityChains = false,
336 .shaderOutputViewportIndex = true,
337 .shaderOutputLayer = true,
338 .subgroupBroadcastDynamicId = true,
339
340 /* Vulkan 1.3 */
341 .robustImageAccess = true,
342 .inlineUniformBlock = true,
343 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
344 .pipelineCreationCacheControl = true,
345 .privateData = true,
346 .shaderDemoteToHelperInvocation = true,
347 .shaderTerminateInvocation = true,
348 .subgroupSizeControl = true,
349 .computeFullSubgroups = true,
350 .synchronization2 = true,
351 .shaderZeroInitializeWorkgroupMemory = true,
352 .dynamicRendering = true,
353 .shaderIntegerDotProduct = true,
354 .maintenance4 = true,
355
356 /* VK_KHR_dynamic_rendering_local_read */
357 .dynamicRenderingLocalRead = true,
358
359 /* VK_KHR_fragment_shader_barycentric */
360 .fragmentShaderBarycentric = false,
361
362 /* VK_KHR_global_priority */
363 .globalPriorityQuery = true,
364
365 /* VK_KHR_index_type_uint8 */
366 .indexTypeUint8 = true,
367
368 /* VK_KHR_line_rasterization */
369 .rectangularLines = false,
370 .bresenhamLines = true,
371 .smoothLines = false,
372 .stippledRectangularLines = false,
373 .stippledBresenhamLines = false,
374 .stippledSmoothLines = false,
375
376 /* VK_KHR_maintenance5 */
377 .maintenance5 = true,
378
379 /* VK_KHR_maintenance6 */
380 .maintenance6 = true,
381
382 /* VK_KHR_pipeline_executable_properties */
383 .pipelineExecutableInfo = true,
384
385 /* VK_KHR_present_id */
386 .presentId = false,
387
388 /* VK_KHR_present_wait */
389 .presentWait = false,
390
391 /* VK_KHR_shader_clock */
392 .shaderSubgroupClock = false,
393 .shaderDeviceClock = false,
394
395 /* VK_KHR_shader_expect_assume */
396 .shaderExpectAssume = true,
397
398 /* VK_KHR_shader_float_controls2 */
399 .shaderFloatControls2 = true,
400
401 /* VK_KHR_shader_maximal_reconvergence */
402 .shaderMaximalReconvergence = true,
403
404 /* VK_KHR_shader_subgroup_rotate */
405 .shaderSubgroupRotate = true,
406 .shaderSubgroupRotateClustered = true,
407
408 /* VK_KHR_vertex_attribute_divisor */
409 .vertexAttributeInstanceRateDivisor = true,
410 .vertexAttributeInstanceRateZeroDivisor = true,
411
412 /* VK_KHR_workgroup_memory_explicit_layout */
413 .workgroupMemoryExplicitLayout = true,
414 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
415 .workgroupMemoryExplicitLayout8BitAccess = true,
416 .workgroupMemoryExplicitLayout16BitAccess = true,
417
418 /* VK_EXT_4444_formats */
419 .formatA4R4G4B4 = true,
420 .formatA4B4G4R4 = true,
421
422 /* VK_EXT_attachment_feedback_loop_layout */
423 .attachmentFeedbackLoopLayout = true,
424
425 /* VK_EXT_border_color_swizzle */
426 .borderColorSwizzle = true,
427 .borderColorSwizzleFromImage = false,
428
429 /* VK_EXT_buffer_device_address */
430 .bufferDeviceAddressCaptureReplayEXT = false,
431
432 /* VK_EXT_color_write_enable */
433 .colorWriteEnable = true,
434
435 /* VK_EXT_conditional_rendering */
436 .conditionalRendering = false,
437 .inheritedConditionalRendering = false,
438
439 /* VK_EXT_custom_border_color */
440 .customBorderColors = true,
441 .customBorderColorWithoutFormat = true,
442
443 /* VK_EXT_depth_bias_control */
444 .depthBiasControl = false,
445 .leastRepresentableValueForceUnormRepresentation = false,
446 .floatRepresentation = false,
447 .depthBiasExact = false,
448
449 /* VK_EXT_depth_clip_control */
450 .depthClipControl = false,
451
452 /* VK_EXT_depth_clip_enable */
453 .depthClipEnable = true,
454
455 /* VK_EXT_dynamic_rendering_unused_attachments */
456 .dynamicRenderingUnusedAttachments = true,
457
458 /* VK_EXT_extended_dynamic_state */
459 .extendedDynamicState = true,
460
461 /* VK_EXT_extended_dynamic_state2 */
462 .extendedDynamicState2 = true,
463 .extendedDynamicState2LogicOp = true,
464 .extendedDynamicState2PatchControlPoints = true,
465
466 /* VK_EXT_extended_dynamic_state3 */
467 .extendedDynamicState3TessellationDomainOrigin = true,
468 .extendedDynamicState3DepthClampEnable = true,
469 .extendedDynamicState3PolygonMode = true,
470 .extendedDynamicState3RasterizationSamples = true,
471 .extendedDynamicState3SampleMask = true,
472 .extendedDynamicState3AlphaToCoverageEnable = true,
473 .extendedDynamicState3AlphaToOneEnable = true,
474 .extendedDynamicState3LogicOpEnable = true,
475 .extendedDynamicState3ColorBlendEnable = true,
476 .extendedDynamicState3ColorBlendEquation = true,
477 .extendedDynamicState3ColorWriteMask = true,
478 .extendedDynamicState3RasterizationStream = false,
479 .extendedDynamicState3ConservativeRasterizationMode = false,
480 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
481 .extendedDynamicState3DepthClipEnable = true,
482 .extendedDynamicState3SampleLocationsEnable = false,
483 .extendedDynamicState3ColorBlendAdvanced = false,
484 .extendedDynamicState3ProvokingVertexMode = true,
485 .extendedDynamicState3LineRasterizationMode = true,
486 .extendedDynamicState3LineStippleEnable = false,
487 .extendedDynamicState3DepthClipNegativeOneToOne = false,
488 .extendedDynamicState3ViewportWScalingEnable = false,
489 .extendedDynamicState3ViewportSwizzle = false,
490 .extendedDynamicState3CoverageToColorEnable = false,
491 .extendedDynamicState3CoverageToColorLocation = false,
492 .extendedDynamicState3CoverageModulationMode = false,
493 .extendedDynamicState3CoverageModulationTableEnable = false,
494 .extendedDynamicState3CoverageModulationTable = false,
495 .extendedDynamicState3CoverageReductionMode = false,
496 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
497 .extendedDynamicState3ShadingRateImageEnable = false,
498
499 /* VK_EXT_graphics_pipeline_library */
500 .graphicsPipelineLibrary = true,
501
502 /* VK_EXT_host_image_copy */
503 .hostImageCopy = true,
504
505 /* VK_EXT_image_2d_view_of_3d */
506 .image2DViewOf3D = true,
507 .sampler2DViewOf3D = true,
508
509 /* VK_EXT_image_sliced_view_of_3d */
510 .imageSlicedViewOf3D = false,
511
512 #ifdef HK_USE_WSI_PLATFORM
513 /* VK_EXT_swapchain_maintenance1 */
514 .swapchainMaintenance1 = false,
515 #endif
516
517 /* VK_EXT_image_view_min_lod */
518 .minLod = false,
519
520 /* VK_EXT_map_memory_placed */
521 .memoryMapPlaced = false,
522 .memoryMapRangePlaced = false,
523 .memoryUnmapReserve = false,
524
525 /* VK_EXT_multi_draw */
526 .multiDraw = true,
527
528 /* VK_EXT_mutable_descriptor_type */
529 .mutableDescriptorType = true,
530
531 /* VK_EXT_non_seamless_cube_map */
532 .nonSeamlessCubeMap = true,
533
534 /* VK_EXT_pipeline_protected_access */
535 .pipelineProtectedAccess = true,
536
537 /* VK_EXT_pipeline_robustness */
538 .pipelineRobustness = true,
539
540 /* VK_EXT_primitive_topology_list_restart */
541 .primitiveTopologyListRestart = true,
542 .primitiveTopologyPatchListRestart = false,
543
544 /* VK_EXT_primitives_generated_query */
545 .primitivesGeneratedQuery = false,
546 .primitivesGeneratedQueryWithNonZeroStreams = false,
547 .primitivesGeneratedQueryWithRasterizerDiscard = false,
548
549 /* VK_EXT_provoking_vertex */
550 .provokingVertexLast = true,
551 .transformFeedbackPreservesProvokingVertex = true,
552
553 /* VK_EXT_robustness2 */
554 .robustBufferAccess2 = true,
555 .robustImageAccess2 = true,
556 .nullDescriptor = true,
557
558 /* VK_EXT_shader_image_atomic_int64 */
559 .shaderImageInt64Atomics = false,
560 .sparseImageInt64Atomics = false,
561
562 /* VK_EXT_shader_module_identifier */
563 .shaderModuleIdentifier = true,
564
565 /* VK_EXT_shader_object */
566 .shaderObject = true,
567
568 /* VK_EXT_shader_replicated_composites */
569 .shaderReplicatedComposites = true,
570
571 /* VK_KHR_shader_subgroup_uniform_control_flow */
572 .shaderSubgroupUniformControlFlow = true,
573
574 /* VK_EXT_texel_buffer_alignment */
575 .texelBufferAlignment = true,
576
577 /* VK_EXT_transform_feedback */
578 .transformFeedback = true,
579 .geometryStreams = true,
580
581 /* VK_EXT_vertex_input_dynamic_state */
582 .vertexInputDynamicState = true,
583
584 /* VK_EXT_ycbcr_2plane_444_formats */
585 .ycbcr2plane444Formats = false,
586
587 /* VK_EXT_ycbcr_image_arrays */
588 .ycbcrImageArrays = false,
589
590 /* VK_KHR_shader_relaxed_extended_instruction */
591 .shaderRelaxedExtendedInstruction = true,
592 };
593 }
594
595 static void
hk_get_device_properties(const struct agx_device * dev,const struct hk_instance * instance,struct vk_properties * properties)596 hk_get_device_properties(const struct agx_device *dev,
597 const struct hk_instance *instance,
598 struct vk_properties *properties)
599 {
600 const VkSampleCountFlagBits sample_counts =
601 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
602
603 uint64_t os_page_size = 16384;
604 os_get_page_size(&os_page_size);
605
606 *properties = (struct vk_properties){
607 .apiVersion = hk_get_vk_version(),
608 .driverVersion = vk_get_driver_version(),
609 .vendorID = instance->force_vk_vendor ?: VK_VENDOR_ID_MESA,
610 .deviceID = 0,
611 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
612
613 /* Vulkan 1.0 limits */
614 .maxImageDimension1D = 16384,
615 .maxImageDimension2D = 16384,
616 .maxImageDimension3D = 16384,
617 .maxImageDimensionCube = 16384,
618 .maxImageArrayLayers = 2048,
619 .maxTexelBufferElements = AGX_TEXTURE_BUFFER_MAX_SIZE,
620 .maxUniformBufferRange = 65536,
621 .maxStorageBufferRange = UINT32_MAX,
622 .maxPushConstantsSize = HK_MAX_PUSH_SIZE,
623 .maxMemoryAllocationCount = 4096,
624 .maxSamplerAllocationCount = 4000,
625 .bufferImageGranularity = 0x400,
626 .sparseAddressSpaceSize = HK_SPARSE_ADDR_SPACE_SIZE,
627 .maxBoundDescriptorSets = HK_MAX_SETS,
628 .maxPerStageDescriptorSamplers = HK_MAX_DESCRIPTORS,
629 .maxPerStageDescriptorUniformBuffers = HK_MAX_DESCRIPTORS,
630 .maxPerStageDescriptorStorageBuffers = HK_MAX_DESCRIPTORS,
631 .maxPerStageDescriptorSampledImages = HK_MAX_DESCRIPTORS,
632 .maxPerStageDescriptorStorageImages = HK_MAX_DESCRIPTORS,
633 .maxPerStageDescriptorInputAttachments = HK_MAX_DESCRIPTORS,
634 .maxPerStageResources = UINT32_MAX,
635 .maxDescriptorSetSamplers = HK_MAX_DESCRIPTORS,
636 .maxDescriptorSetUniformBuffers = HK_MAX_DESCRIPTORS,
637 .maxDescriptorSetUniformBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
638 .maxDescriptorSetStorageBuffers = HK_MAX_DESCRIPTORS,
639 .maxDescriptorSetStorageBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
640 .maxDescriptorSetSampledImages = HK_MAX_DESCRIPTORS,
641 .maxDescriptorSetStorageImages = HK_MAX_DESCRIPTORS,
642 .maxDescriptorSetInputAttachments = HK_MAX_DESCRIPTORS,
643 .maxVertexInputAttributes = AGX_MAX_VBUFS,
644 .maxVertexInputBindings = AGX_MAX_ATTRIBS,
645 .maxVertexInputAttributeOffset = 65535,
646 .maxVertexInputBindingStride = 2048,
647 .maxVertexOutputComponents = 64,
648 .maxGeometryShaderInvocations = 32,
649 .maxGeometryInputComponents = 128,
650 .maxGeometryOutputComponents = 128,
651 .maxGeometryOutputVertices = 1024,
652 .maxGeometryTotalOutputComponents = 1024,
653 .maxTessellationGenerationLevel = 64,
654 .maxTessellationPatchSize = 32,
655 .maxTessellationControlPerVertexInputComponents = 128,
656 .maxTessellationControlPerVertexOutputComponents = 128,
657 .maxTessellationControlPerPatchOutputComponents = 120,
658 .maxTessellationControlTotalOutputComponents = 4216,
659 .maxTessellationEvaluationInputComponents = 128,
660 .maxTessellationEvaluationOutputComponents = 128,
661 .maxFragmentInputComponents = 64,
662 .maxFragmentOutputAttachments = HK_MAX_RTS,
663 .maxFragmentDualSrcAttachments = 1,
664 .maxFragmentCombinedOutputResources = 16,
665 .maxComputeSharedMemorySize = HK_MAX_SHARED_SIZE,
666 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
667 .maxComputeWorkGroupInvocations = 1024,
668 .maxComputeWorkGroupSize = {1024, 1024, 64},
669 .subPixelPrecisionBits = 8,
670 .subTexelPrecisionBits = 8,
671 .mipmapPrecisionBits = 8,
672 .maxDrawIndexedIndexValue = UINT32_MAX,
673 .maxDrawIndirectCount = UINT16_MAX,
674 .maxSamplerLodBias = 15,
675 .maxSamplerAnisotropy = 16,
676 .maxViewports = HK_MAX_VIEWPORTS,
677 .maxViewportDimensions = {32768, 32768},
678 .viewportBoundsRange = {-65536, 65536},
679 .viewportSubPixelBits = 8,
680 .minMemoryMapAlignment = os_page_size,
681 .minTexelBufferOffsetAlignment = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
682 .minUniformBufferOffsetAlignment = HK_MIN_UBO_ALIGNMENT,
683 .minStorageBufferOffsetAlignment = HK_MIN_SSBO_ALIGNMENT,
684 .minTexelOffset = -8,
685 .maxTexelOffset = 7,
686 .minTexelGatherOffset = -8,
687 .maxTexelGatherOffset = 7,
688 .minInterpolationOffset = -0.5,
689 .maxInterpolationOffset = 0.4375,
690 .subPixelInterpolationOffsetBits = 4,
691 .maxFramebufferHeight = 16384,
692 .maxFramebufferWidth = 16384,
693 .maxFramebufferLayers = 2048,
694 .framebufferColorSampleCounts = sample_counts,
695 .framebufferDepthSampleCounts = sample_counts,
696 .framebufferNoAttachmentsSampleCounts = sample_counts,
697 .framebufferStencilSampleCounts = sample_counts,
698 .maxColorAttachments = HK_MAX_RTS,
699 .sampledImageColorSampleCounts = sample_counts,
700 .sampledImageIntegerSampleCounts = sample_counts,
701 .sampledImageDepthSampleCounts = sample_counts,
702 .sampledImageStencilSampleCounts = sample_counts,
703 .storageImageSampleCounts = sample_counts,
704 .maxSampleMaskWords = 1,
705 .timestampComputeAndGraphics = false,
706 .timestampPeriod = 1,
707 .maxClipDistances = 8,
708 .maxCullDistances = 8,
709 .maxCombinedClipAndCullDistances = 8,
710 .discreteQueuePriorities = 2,
711 .pointSizeRange = {1.0, 512.f - 0.0625f},
712 .lineWidthRange = {1.0, 16.0f},
713 .pointSizeGranularity = 0.0625,
714 .lineWidthGranularity = 1.0f / 16.0f,
715 .strictLines = false,
716 .standardSampleLocations = true,
717 .optimalBufferCopyOffsetAlignment = 1,
718 .optimalBufferCopyRowPitchAlignment = 1,
719 .nonCoherentAtomSize = 64,
720
721 /* Vulkan 1.0 sparse properties */
722 .sparseResidencyNonResidentStrict = false,
723 .sparseResidencyAlignedMipSize = false,
724 .sparseResidencyStandard2DBlockShape = false,
725 .sparseResidencyStandard2DMultisampleBlockShape = false,
726 .sparseResidencyStandard3DBlockShape = false,
727
728 /* Vulkan 1.1 properties */
729 .subgroupSize = 32,
730 .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT |
731 VK_SHADER_STAGE_FRAGMENT_BIT |
732 VK_SHADER_STAGE_VERTEX_BIT,
733 .subgroupSupportedOperations =
734 VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
735 VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
736 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
737 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
738 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
739 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
740 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
741 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR,
742 .subgroupQuadOperationsInAllStages = true,
743 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
744 .maxMultiviewViewCount = HK_MAX_MULTIVIEW_VIEW_COUNT,
745 .maxMultiviewInstanceIndex = UINT32_MAX,
746 .maxPerSetDescriptors = UINT32_MAX,
747 .maxMemoryAllocationSize = (1u << 31),
748
749 /* Vulkan 1.2 properties */
750 .supportedDepthResolveModes =
751 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
752 VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
753 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
754 VK_RESOLVE_MODE_MIN_BIT |
755 VK_RESOLVE_MODE_MAX_BIT,
756 .independentResolveNone = true,
757 .independentResolve = true,
758 .driverID = VK_DRIVER_ID_MESA_HONEYKRISP,
759 .conformanceVersion = (VkConformanceVersion){1, 3, 8, 3},
760 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
761 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
762 .shaderSignedZeroInfNanPreserveFloat16 = true,
763 .shaderSignedZeroInfNanPreserveFloat32 = true,
764 .shaderSignedZeroInfNanPreserveFloat64 = false,
765 .shaderDenormPreserveFloat16 = true,
766 .shaderDenormPreserveFloat32 = false,
767 .shaderDenormPreserveFloat64 = false,
768 .shaderDenormFlushToZeroFloat16 = false,
769 .shaderDenormFlushToZeroFloat32 = true,
770 .shaderDenormFlushToZeroFloat64 = false,
771 .shaderRoundingModeRTEFloat16 = true,
772 .shaderRoundingModeRTEFloat32 = true,
773 .shaderRoundingModeRTEFloat64 = false,
774 .shaderRoundingModeRTZFloat16 = false,
775 .shaderRoundingModeRTZFloat32 = false,
776 .shaderRoundingModeRTZFloat64 = false,
777 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
778 .shaderUniformBufferArrayNonUniformIndexingNative = true,
779 .shaderSampledImageArrayNonUniformIndexingNative = true,
780 .shaderStorageBufferArrayNonUniformIndexingNative = true,
781 .shaderStorageImageArrayNonUniformIndexingNative = true,
782 .shaderInputAttachmentArrayNonUniformIndexingNative = true,
783 .robustBufferAccessUpdateAfterBind = true,
784 .quadDivergentImplicitLod = false,
785 .maxPerStageDescriptorUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
786 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
787 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
788 .maxPerStageDescriptorUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
789 .maxPerStageDescriptorUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
790 .maxPerStageDescriptorUpdateAfterBindInputAttachments =
791 HK_MAX_DESCRIPTORS,
792 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
793 .maxDescriptorSetUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
794 .maxDescriptorSetUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
795 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic =
796 HK_MAX_DYNAMIC_BUFFERS / 2,
797 .maxDescriptorSetUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
798 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic =
799 HK_MAX_DYNAMIC_BUFFERS / 2,
800 .maxDescriptorSetUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
801 .maxDescriptorSetUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
802 .maxDescriptorSetUpdateAfterBindInputAttachments = HK_MAX_DESCRIPTORS,
803 .filterMinmaxSingleComponentFormats = false,
804 .filterMinmaxImageComponentMapping = false,
805 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
806 .framebufferIntegerColorSampleCounts = sample_counts,
807
808 /* Vulkan 1.3 properties */
809 .minSubgroupSize = 32,
810 .maxSubgroupSize = 32,
811 .maxComputeWorkgroupSubgroups = 1024 / 32,
812 .requiredSubgroupSizeStages = 0,
813 .maxInlineUniformBlockSize = 1 << 16,
814 .maxPerStageDescriptorInlineUniformBlocks = 32,
815 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
816 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
817 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
818 .maxInlineUniformTotalSize = 1 << 16,
819 .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
820 .integerDotProduct4x8BitPackedSignedAccelerated = false,
821 .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
822 .storageTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
823 .storageTexelBufferOffsetSingleTexelAlignment = true,
824 .uniformTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
825 .uniformTexelBufferOffsetSingleTexelAlignment = true,
826 .maxBufferSize = HK_MAX_BUFFER_SIZE,
827
828 /* VK_KHR_push_descriptor */
829 .maxPushDescriptors = HK_MAX_PUSH_DESCRIPTORS,
830
831 /* VK_EXT_custom_border_color */
832 .maxCustomBorderColorSamplers = 4000,
833
834 /* VK_EXT_extended_dynamic_state3 */
835 .dynamicPrimitiveTopologyUnrestricted = true,
836
837 /* VK_EXT_graphics_pipeline_library */
838 .graphicsPipelineLibraryFastLinking = true,
839 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
840
841 /* VK_EXT_host_image_copy */
842
843 /* VK_KHR_line_rasterization */
844 .lineSubPixelPrecisionBits = 8,
845
846 /* VK_KHR_maintenance5 */
847 .earlyFragmentMultisampleCoverageAfterSampleCounting = false,
848 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
849 .depthStencilSwizzleOneSupport = true,
850 .polygonModePointSize = false,
851 .nonStrictSinglePixelWideLinesUseParallelogram = false,
852 .nonStrictWideLinesUseParallelogram = false,
853
854 /* VK_KHR_maintenance6 */
855 .blockTexelViewCompatibleMultipleLayers = false,
856 .maxCombinedImageSamplerDescriptorCount = 3,
857 .fragmentShadingRateClampCombinerInputs = false, /* TODO */
858
859 /* VK_EXT_map_memory_placed */
860 .minPlacedMemoryMapAlignment = os_page_size,
861
862 /* VK_EXT_multi_draw */
863 .maxMultiDrawCount = UINT16_MAX,
864
865 /* VK_EXT_pipeline_robustness */
866 .defaultRobustnessStorageBuffers =
867 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
868 .defaultRobustnessUniformBuffers =
869 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
870 .defaultRobustnessVertexInputs =
871 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
872 .defaultRobustnessImages =
873 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
874
875 /* VK_EXT_physical_device_drm gets populated later */
876
877 /* VK_EXT_provoking_vertex */
878 .provokingVertexModePerPipeline = true,
879 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
880
881 /* VK_EXT_robustness2 */
882 .robustStorageBufferAccessSizeAlignment = HK_SSBO_BOUNDS_CHECK_ALIGNMENT,
883 .robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
884
885 /* VK_EXT_sample_locations */
886 .sampleLocationSampleCounts = sample_counts,
887 .maxSampleLocationGridSize = (VkExtent2D){1, 1},
888 .sampleLocationCoordinateRange[0] = 0.0f,
889 .sampleLocationCoordinateRange[1] = 0.9375f,
890 .sampleLocationSubPixelBits = 4,
891 .variableSampleLocations = false,
892
893 /* VK_EXT_shader_object */
894 .shaderBinaryVersion = 0,
895
896 /* VK_EXT_transform_feedback */
897 .maxTransformFeedbackStreams = 4,
898 .maxTransformFeedbackBuffers = 4,
899 .maxTransformFeedbackBufferSize = UINT32_MAX,
900 .maxTransformFeedbackStreamDataSize = 2048,
901 .maxTransformFeedbackBufferDataSize = 512,
902 .maxTransformFeedbackBufferDataStride = 2048,
903 .transformFeedbackQueries = true,
904 .transformFeedbackStreamsLinesTriangles = false,
905 .transformFeedbackRasterizationStreamSelect = false,
906 .transformFeedbackDraw = false,
907
908 /* VK_KHR_vertex_attribute_divisor */
909 .maxVertexAttribDivisor = UINT32_MAX,
910 .supportsNonZeroFirstInstance = true,
911
912 /* VK_KHR_fragment_shader_barycentric */
913 .triStripVertexOrderIndependentOfProvokingVertex = false,
914 };
915
916 strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName));
917
918 /* VK_EXT_shader_module_identifier */
919 static_assert(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
920 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
921 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
922 vk_shaderModuleIdentifierAlgorithmUUID,
923 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
924
925 const struct {
926 uint16_t vendor_id;
927 uint16_t device_id;
928 uint8_t pad[12];
929 } dev_uuid = {
930 .vendor_id = 0,
931 .device_id = 0,
932 };
933 static_assert(sizeof(dev_uuid) == VK_UUID_SIZE);
934 memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
935 static_assert(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
936 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
937
938 strncpy(properties->driverName, "Honeykrisp", VK_MAX_DRIVER_NAME_SIZE);
939 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
940 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
941
942 /* We don't use the layouts ATM so just report all layouts from
943 * extensions that we support as compatible.
944 */
945 static const VkImageLayout supported_layouts[] = {
946 VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
947 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
948 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
949 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
950 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
951 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
952 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
953 VK_IMAGE_LAYOUT_PREINITIALIZED,
954 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
955 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
956 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
957 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
958 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
959 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
960 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
961 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
962 // VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
963 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
964 };
965
966 properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
967 properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
968 properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
969 properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
970
971 /* We're a UMR so we can always map every kind of memory */
972 properties->identicalMemoryTypeRequirements = true;
973
974 {
975 struct mesa_sha1 sha1_ctx;
976 uint8_t sha1[20];
977
978 _mesa_sha1_init(&sha1_ctx);
979 /* Make sure we don't match with other vendors */
980 const char *driver = "honeykrisp-v1";
981 _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
982 _mesa_sha1_final(&sha1_ctx, sha1);
983
984 memcpy(properties->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
985 }
986 }
987
988 static void
hk_physical_device_init_pipeline_cache(struct hk_physical_device * pdev)989 hk_physical_device_init_pipeline_cache(struct hk_physical_device *pdev)
990 {
991 struct hk_instance *instance = hk_physical_device_instance(pdev);
992
993 struct mesa_sha1 sha_ctx;
994 _mesa_sha1_init(&sha_ctx);
995
996 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
997 sizeof(instance->driver_build_sha));
998
999 const uint64_t compiler_flags = hk_physical_device_compiler_flags(pdev);
1000 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1001
1002 unsigned char sha[SHA1_DIGEST_LENGTH];
1003 _mesa_sha1_final(&sha_ctx, sha);
1004
1005 static_assert(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1006 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1007 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1008
1009 #ifdef ENABLE_SHADER_CACHE
1010 char renderer[10];
1011 ASSERTED int len = snprintf(renderer, sizeof(renderer), "hk_g13g_");
1012 assert(len == sizeof(renderer) - 2);
1013
1014 char timestamp[41];
1015 _mesa_sha1_format(timestamp, instance->driver_build_sha);
1016
1017 const uint64_t driver_flags = hk_physical_device_compiler_flags(pdev);
1018 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1019 #endif
1020 }
1021
1022 static void
hk_physical_device_free_disk_cache(struct hk_physical_device * pdev)1023 hk_physical_device_free_disk_cache(struct hk_physical_device *pdev)
1024 {
1025 #ifdef ENABLE_SHADER_CACHE
1026 if (pdev->vk.disk_cache) {
1027 disk_cache_destroy(pdev->vk.disk_cache);
1028 pdev->vk.disk_cache = NULL;
1029 }
1030 #else
1031 assert(pdev->vk.disk_cache == NULL);
1032 #endif
1033 }
1034
1035 static uint64_t
hk_get_sysmem_heap_size(void)1036 hk_get_sysmem_heap_size(void)
1037 {
1038 uint64_t sysmem_size_B = 0;
1039 if (!os_get_total_physical_memory(&sysmem_size_B))
1040 return 0;
1041
1042 /* Use 3/4 of total size to avoid swapping */
1043 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1044 }
1045
1046 static uint64_t
hk_get_sysmem_heap_available(struct hk_physical_device * pdev)1047 hk_get_sysmem_heap_available(struct hk_physical_device *pdev)
1048 {
1049 uint64_t sysmem_size_B = 0;
1050 if (!os_get_available_system_memory(&sysmem_size_B)) {
1051 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1052 return 0;
1053 }
1054
1055 /* Use 3/4 of available to avoid swapping */
1056 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1057 }
1058
1059 VkResult
hk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)1060 hk_create_drm_physical_device(struct vk_instance *_instance,
1061 drmDevicePtr drm_device,
1062 struct vk_physical_device **pdev_out)
1063 {
1064 struct hk_instance *instance = (struct hk_instance *)_instance;
1065 VkResult result;
1066
1067 /* Blanket refusal to probe due to unstable UAPI. */
1068 return VK_ERROR_INCOMPATIBLE_DRIVER;
1069
1070 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1071 drm_device->bustype != DRM_BUS_PLATFORM)
1072 return VK_ERROR_INCOMPATIBLE_DRIVER;
1073
1074 const char *path = drm_device->nodes[DRM_NODE_RENDER];
1075 int fd = open(path, O_RDWR | O_CLOEXEC);
1076 if (fd < 0) {
1077 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1078 "failed to open device %s", path);
1079 }
1080
1081 drmVersionPtr version = drmGetVersion(fd);
1082 if (!version) {
1083 result =
1084 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1085 "failed to query kernel driver version for device %s", path);
1086 goto fail_fd;
1087 }
1088
1089 bool is_asahi = (strcmp(version->name, "asahi") == 0);
1090 is_asahi |= strcmp(version->name, "virtio_gpu") == 0;
1091 drmFreeVersion(version);
1092
1093 if (!is_asahi) {
1094 result =
1095 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1096 "device %s does not use the asahi kernel driver", path);
1097 goto fail_fd;
1098 }
1099
1100 struct stat st;
1101 if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1102 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1103 "fstat() failed on %s: %m",
1104 drm_device->nodes[DRM_NODE_RENDER]);
1105 goto fail_fd;
1106 }
1107 const dev_t render_dev = st.st_rdev;
1108
1109 struct hk_physical_device *pdev =
1110 vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
1111 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1112
1113 if (pdev == NULL) {
1114 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1115 goto fail_fd;
1116 }
1117
1118 /* TODO: we're render-only, should we be reporting displays anyway in
1119 * KHR_display?
1120 */
1121 pdev->master_fd = -1;
1122
1123 #if 0
1124 if (instance->vk.enabled_extensions.KHR_display) {
1125 int master_fd =
1126 open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
1127
1128 if (master_fd >= 0) {
1129 struct stat st;
1130 if (!stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
1131 pdev->master_fd = master_fd;
1132 properties.drmHasPrimary = true;
1133 properties.drmPrimaryMajor = major(st.st_rdev);
1134 properties.drmPrimaryMinor = minor(st.st_rdev);
1135 }
1136 }
1137 }
1138 #endif
1139
1140 pdev->render_dev = render_dev;
1141 pdev->dev.fd = fd;
1142
1143 if (!agx_open_device(NULL, &pdev->dev)) {
1144 result = vk_error(instance, VK_ERROR_UNKNOWN);
1145 goto fail_pdev_alloc;
1146 }
1147
1148 struct vk_physical_device_dispatch_table dispatch_table;
1149 vk_physical_device_dispatch_table_from_entrypoints(
1150 &dispatch_table, &hk_physical_device_entrypoints, true);
1151 vk_physical_device_dispatch_table_from_entrypoints(
1152 &dispatch_table, &wsi_physical_device_entrypoints, false);
1153
1154 struct vk_device_extension_table supported_extensions;
1155 hk_get_device_extensions(instance, &supported_extensions);
1156
1157 struct vk_features supported_features;
1158 hk_get_device_features(&supported_extensions, &supported_features);
1159
1160 struct vk_properties properties;
1161 hk_get_device_properties(&pdev->dev, instance, &properties);
1162
1163 properties.drmHasRender = true;
1164 properties.drmRenderMajor = major(render_dev);
1165 properties.drmRenderMinor = minor(render_dev);
1166
1167 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1168 &supported_extensions, &supported_features,
1169 &properties, &dispatch_table);
1170 if (result != VK_SUCCESS)
1171 goto fail_agx_device;
1172
1173 hk_physical_device_init_pipeline_cache(pdev);
1174
1175 uint64_t sysmem_size_B = hk_get_sysmem_heap_size();
1176 if (sysmem_size_B == 0) {
1177 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1178 "Failed to query total system memory");
1179 goto fail_disk_cache;
1180 }
1181
1182 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1183 pdev->mem_heaps[sysmem_heap_idx] = (struct hk_memory_heap){
1184 .size = sysmem_size_B,
1185 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1186 .available = hk_get_sysmem_heap_available,
1187 };
1188
1189 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){
1190 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1191 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1192 VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
1193 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1194 .heapIndex = sysmem_heap_idx,
1195 };
1196
1197 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1198 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1199
1200 /* TODO: VK_QUEUE_SPARSE_BINDING_BIT*/
1201 pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
1202 .queue_flags =
1203 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1204
1205 .queue_count = 1,
1206 };
1207 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1208
1209 unsigned st_idx = 0;
1210 pdev->syncobj_sync_type = vk_drm_syncobj_get_type(fd);
1211 pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1212 pdev->sync_types[st_idx++] = NULL;
1213 assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1214 pdev->vk.supported_sync_types = pdev->sync_types;
1215
1216 result = hk_init_wsi(pdev);
1217 if (result != VK_SUCCESS)
1218 goto fail_disk_cache;
1219
1220 *pdev_out = &pdev->vk;
1221
1222 return VK_SUCCESS;
1223
1224 fail_disk_cache:
1225 hk_physical_device_free_disk_cache(pdev);
1226 vk_physical_device_finish(&pdev->vk);
1227 fail_agx_device:
1228 agx_close_device(&pdev->dev);
1229 fail_pdev_alloc:
1230 if (pdev->master_fd)
1231 close(pdev->master_fd);
1232
1233 vk_free(&pdev->vk.instance->alloc, pdev);
1234 fail_fd:
1235 close(fd);
1236 return result;
1237 }
1238
1239 void
hk_physical_device_destroy(struct vk_physical_device * vk_pdev)1240 hk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1241 {
1242 struct hk_physical_device *pdev =
1243 container_of(vk_pdev, struct hk_physical_device, vk);
1244
1245 hk_finish_wsi(pdev);
1246
1247 if (pdev->master_fd >= 0)
1248 close(pdev->master_fd);
1249
1250 hk_physical_device_free_disk_cache(pdev);
1251 agx_close_device(&pdev->dev);
1252 vk_physical_device_finish(&pdev->vk);
1253 vk_free(&pdev->vk.instance->alloc, pdev);
1254 }
1255
1256 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1257 hk_GetPhysicalDeviceMemoryProperties2(
1258 VkPhysicalDevice physicalDevice,
1259 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1260 {
1261 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1262
1263 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1264 for (int i = 0; i < pdev->mem_heap_count; i++) {
1265 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){
1266 .size = pdev->mem_heaps[i].size,
1267 .flags = pdev->mem_heaps[i].flags,
1268 };
1269 }
1270
1271 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1272 for (int i = 0; i < pdev->mem_type_count; i++) {
1273 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1274 }
1275
1276 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1277 switch (ext->sType) {
1278 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1279 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1280
1281 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1282 const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1283 uint64_t used = p_atomic_read(&heap->used);
1284
1285 /* From the Vulkan 1.3.278 spec:
1286 *
1287 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1288 * values in which memory usages are returned, with one element
1289 * for each memory heap. A heap’s usage is an estimate of how
1290 * much memory the process is currently using in that heap."
1291 *
1292 * TODO: Include internal allocations?
1293 */
1294 p->heapUsage[i] = used;
1295
1296 uint64_t available = heap->size;
1297 if (heap->available)
1298 available = heap->available(pdev);
1299
1300 /* From the Vulkan 1.3.278 spec:
1301 *
1302 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1303 * values in which memory budgets are returned, with one
1304 * element for each memory heap. A heap’s budget is a rough
1305 * estimate of how much memory the process can allocate from
1306 * that heap before allocations may fail or cause performance
1307 * degradation. The budget includes any currently allocated
1308 * device memory."
1309 *
1310 * and
1311 *
1312 * "The heapBudget value must be less than or equal to
1313 * VkMemoryHeap::size for each heap."
1314 *
1315 * available (queried above) is the total amount free memory
1316 * system-wide and does not include our allocations so we need
1317 * to add that in.
1318 */
1319 uint64_t budget = MIN2(available + used, heap->size);
1320
1321 /* Set the budget at 90% of available to avoid thrashing */
1322 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1323 }
1324
1325 /* From the Vulkan 1.3.278 spec:
1326 *
1327 * "The heapBudget and heapUsage values must be zero for array
1328 * elements greater than or equal to
1329 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1330 * heapBudget value must be non-zero for array elements less than
1331 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1332 */
1333 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1334 p->heapBudget[i] = 0u;
1335 p->heapUsage[i] = 0u;
1336 }
1337 break;
1338 }
1339 default:
1340 vk_debug_ignored_stype(ext->sType);
1341 break;
1342 }
1343 }
1344 }
1345
1346 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1347 hk_GetPhysicalDeviceQueueFamilyProperties2(
1348 VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
1349 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1350 {
1351 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1352 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1353 pQueueFamilyPropertyCount);
1354
1355 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1356 const struct hk_queue_family *queue_family = &pdev->queue_families[i];
1357
1358 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1359 {
1360 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1361 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1362 p->queueFamilyProperties.timestampValidBits = 0; // TODO 64;
1363 p->queueFamilyProperties.minImageTransferGranularity =
1364 (VkExtent3D){1, 1, 1};
1365
1366 vk_foreach_struct(ext, p->pNext) {
1367 switch (ext->sType) {
1368 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1369 VkQueueFamilyGlobalPriorityPropertiesKHR *props = (void *)ext;
1370
1371 /* TODO: support multiple priorities */
1372 props->priorityCount = 1;
1373 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
1374 break;
1375 }
1376 default:
1377 break;
1378 }
1379 }
1380 }
1381 }
1382 }
1383
1384 static const VkTimeDomainKHR hk_time_domains[] = {
1385 VK_TIME_DOMAIN_DEVICE_KHR,
1386 VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
1387 #ifdef CLOCK_MONOTONIC_RAW
1388 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
1389 #endif
1390 };
1391
1392 VKAPI_ATTR VkResult VKAPI_CALL
hk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)1393 hk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,
1394 uint32_t *pTimeDomainCount,
1395 VkTimeDomainKHR *pTimeDomains)
1396 {
1397 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
1398
1399 for (int d = 0; d < ARRAY_SIZE(hk_time_domains); d++) {
1400 vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
1401 {
1402 *i = hk_time_domains[d];
1403 }
1404 }
1405
1406 return vk_outarray_status(&out);
1407 }
1408
1409 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1410 hk_GetPhysicalDeviceMultisamplePropertiesEXT(
1411 VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
1412 VkMultisamplePropertiesEXT *pMultisampleProperties)
1413 {
1414 VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1415
1416 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1417 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1418 } else {
1419 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1420 }
1421 }
1422