xref: /aosp_15_r20/external/mesa3d/src/asahi/vulkan/hk_physical_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * SPDX-License-Identifier: MIT
6  */
7 #include "hk_physical_device.h"
8 
9 #include "asahi/lib/agx_device.h"
10 #include "asahi/lib/agx_nir_lower_vbo.h"
11 #include "asahi/lib/agx_nir_passes.h"
12 #include "util/disk_cache.h"
13 #include "util/mesa-sha1.h"
14 #include "git_sha1.h"
15 #include "hk_buffer.h"
16 #include "hk_entrypoints.h"
17 #include "hk_image.h"
18 #include "hk_instance.h"
19 #include "hk_private.h"
20 #include "hk_shader.h"
21 #include "hk_wsi.h"
22 
23 #include "util/u_debug.h"
24 #include "vulkan/vulkan_core.h"
25 #include "vulkan/wsi/wsi_common.h"
26 #include "vk_device.h"
27 #include "vk_drm_syncobj.h"
28 #include "vk_shader_module.h"
29 
30 #include <fcntl.h>
31 #include <string.h>
32 #include <xf86drm.h>
33 #include <sys/stat.h>
34 #include <sys/sysmacros.h>
35 
36 static uint32_t
hk_get_vk_version()37 hk_get_vk_version()
38 {
39    /* Version override takes priority */
40    const uint32_t version_override = vk_get_version_override();
41    if (version_override)
42       return version_override;
43 
44    return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
45 }
46 
47 static void
hk_get_device_extensions(const struct hk_instance * instance,struct vk_device_extension_table * ext)48 hk_get_device_extensions(const struct hk_instance *instance,
49                          struct vk_device_extension_table *ext)
50 {
51    *ext = (struct vk_device_extension_table){
52       .KHR_8bit_storage = true,
53       .KHR_16bit_storage = true,
54       .KHR_bind_memory2 = true,
55       .KHR_buffer_device_address = true,
56       .KHR_calibrated_timestamps = false,
57       .KHR_copy_commands2 = true,
58       .KHR_create_renderpass2 = true,
59       .KHR_dedicated_allocation = true,
60       .KHR_depth_stencil_resolve = true,
61       .KHR_descriptor_update_template = true,
62       .KHR_device_group = true,
63       .KHR_draw_indirect_count = true,
64       .KHR_driver_properties = true,
65       .KHR_dynamic_rendering = true,
66       // TODO
67       .KHR_dynamic_rendering_local_read = false,
68       .KHR_external_fence = true,
69       .KHR_external_fence_fd = true,
70       .KHR_external_memory = true,
71       .KHR_external_memory_fd = true,
72       /* XXX: External timeline semaphores maybe broken in kernel, see
73        * dEQP-VK.synchronization.signal_order.shared_timeline_semaphore.write_copy_buffer_to_image_read_image_compute.image_128_r32_uint_opaque_fd
74        */
75       .KHR_external_semaphore = false,
76       .KHR_external_semaphore_fd = false,
77       .KHR_format_feature_flags2 = true,
78       .KHR_fragment_shader_barycentric = false,
79       .KHR_get_memory_requirements2 = true,
80       .KHR_global_priority = true,
81       .KHR_image_format_list = true,
82       .KHR_imageless_framebuffer = true,
83 #ifdef HK_USE_WSI_PLATFORM
84       .KHR_incremental_present = true,
85 #endif
86       .KHR_index_type_uint8 = true,
87       .KHR_line_rasterization = true,
88       .KHR_load_store_op_none = true,
89       .KHR_maintenance1 = true,
90       .KHR_maintenance2 = true,
91       .KHR_maintenance3 = true,
92       .KHR_maintenance4 = true,
93       .KHR_maintenance5 = true,
94       .KHR_maintenance6 = true,
95       .KHR_map_memory2 = true,
96       .KHR_multiview = true,
97       .KHR_pipeline_executable_properties = true,
98       .KHR_pipeline_library = true,
99       .KHR_push_descriptor = true,
100       .KHR_relaxed_block_layout = true,
101       .KHR_sampler_mirror_clamp_to_edge = true,
102       .KHR_sampler_ycbcr_conversion = true,
103       .KHR_separate_depth_stencil_layouts = true,
104       .KHR_shader_atomic_int64 = false,
105       .KHR_shader_clock = false,
106       .KHR_shader_draw_parameters = true,
107       .KHR_shader_expect_assume = true,
108       .KHR_shader_float_controls = true,
109       // TODO: wait for nvk
110       .KHR_shader_float_controls2 = true,
111       .KHR_shader_float16_int8 = true,
112       .KHR_shader_integer_dot_product = true,
113       .KHR_shader_maximal_reconvergence = true,
114       .KHR_shader_non_semantic_info = true,
115       .KHR_shader_relaxed_extended_instruction = true,
116       .KHR_shader_subgroup_extended_types = true,
117       .KHR_shader_subgroup_rotate = true,
118       .KHR_shader_subgroup_uniform_control_flow = true,
119       .KHR_shader_terminate_invocation = true,
120       .KHR_spirv_1_4 = true,
121       .KHR_storage_buffer_storage_class = true,
122       .KHR_timeline_semaphore = true,
123 #ifdef HK_USE_WSI_PLATFORM
124       .KHR_swapchain = true,
125       .KHR_swapchain_mutable_format = true,
126 #endif
127       .KHR_synchronization2 = true,
128       .KHR_uniform_buffer_standard_layout = true,
129       .KHR_variable_pointers = true,
130       .KHR_vertex_attribute_divisor = true,
131       .KHR_vulkan_memory_model = true,
132       .KHR_workgroup_memory_explicit_layout = true,
133       .KHR_zero_initialize_workgroup_memory = true,
134       .EXT_4444_formats = true,
135       .EXT_attachment_feedback_loop_layout = true,
136       .EXT_border_color_swizzle = true,
137       .EXT_buffer_device_address = true,
138       .EXT_calibrated_timestamps = false,
139       .EXT_conditional_rendering = false,
140       .EXT_color_write_enable = true,
141       .EXT_custom_border_color = true,
142       .EXT_depth_bias_control = false,
143       .EXT_depth_clip_control = false,
144       .EXT_depth_clip_enable = true,
145       .EXT_descriptor_indexing = true,
146 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
147       .EXT_display_control = false,
148 #endif
149       .EXT_dynamic_rendering_unused_attachments = true,
150       .EXT_extended_dynamic_state = true,
151       .EXT_extended_dynamic_state2 = true,
152       .EXT_extended_dynamic_state3 = true,
153       .EXT_external_memory_dma_buf = true,
154       // TODO
155       .EXT_global_priority = false,
156       // TODO
157       .EXT_global_priority_query = false,
158       .EXT_graphics_pipeline_library = true,
159       .EXT_host_query_reset = true,
160       .EXT_host_image_copy = true,
161       .EXT_image_2d_view_of_3d = true,
162       .EXT_image_robustness = true,
163       .EXT_image_sliced_view_of_3d = false,
164       .EXT_image_view_min_lod = false,
165       .EXT_index_type_uint8 = true,
166       .EXT_inline_uniform_block = true,
167       .EXT_line_rasterization = true,
168       .EXT_load_store_op_none = true,
169       .EXT_map_memory_placed = false,
170       .EXT_memory_budget = false,
171       .EXT_multi_draw = true,
172       .EXT_mutable_descriptor_type = true,
173       .EXT_non_seamless_cube_map = true,
174       .EXT_pipeline_creation_cache_control = true,
175       .EXT_pipeline_creation_feedback = true,
176       .EXT_pipeline_protected_access = true,
177       .EXT_pipeline_robustness = true,
178       .EXT_physical_device_drm = true,
179       .EXT_primitive_topology_list_restart = true,
180       .EXT_private_data = true,
181       .EXT_primitives_generated_query = false,
182       .EXT_provoking_vertex = true,
183       .EXT_robustness2 = true,
184       .EXT_sample_locations = true,
185       .EXT_sampler_filter_minmax = false,
186       .EXT_scalar_block_layout = true,
187       .EXT_separate_stencil_usage = true,
188       .EXT_shader_image_atomic_int64 = false,
189       .EXT_shader_demote_to_helper_invocation = true,
190       .EXT_shader_module_identifier = true,
191       .EXT_shader_object = true,
192       .EXT_shader_replicated_composites = true,
193       .EXT_shader_stencil_export = true,
194       .EXT_shader_subgroup_ballot = true,
195       .EXT_shader_subgroup_vote = true,
196       .EXT_shader_viewport_index_layer = true,
197       .EXT_subgroup_size_control = true,
198 #ifdef HK_USE_WSI_PLATFORM
199       .EXT_swapchain_maintenance1 = true,
200 #endif
201       .EXT_texel_buffer_alignment = true,
202       .EXT_tooling_info = true,
203       .EXT_transform_feedback = true,
204       .EXT_vertex_attribute_divisor = true,
205       .EXT_vertex_input_dynamic_state = true,
206       .EXT_ycbcr_2plane_444_formats = false,
207       .EXT_ycbcr_image_arrays = false,
208       .GOOGLE_decorate_string = true,
209       .GOOGLE_hlsl_functionality1 = true,
210       .GOOGLE_user_type = true,
211       .VALVE_mutable_descriptor_type = true,
212    };
213 }
214 
215 static void
hk_get_device_features(const struct vk_device_extension_table * supported_extensions,struct vk_features * features)216 hk_get_device_features(
217    const struct vk_device_extension_table *supported_extensions,
218    struct vk_features *features)
219 {
220    *features = (struct vk_features){
221       /* Vulkan 1.0 */
222       .robustBufferAccess = true,
223       .fullDrawIndexUint32 = true,
224       .imageCubeArray = true,
225       .independentBlend = true,
226       .geometryShader = true,
227       .tessellationShader = true,
228       .sampleRateShading = true,
229       .dualSrcBlend = true,
230       .logicOp = true,
231       .multiDrawIndirect = true,
232       .drawIndirectFirstInstance = true,
233       .depthClamp = true,
234       .depthBiasClamp = true,
235       .fillModeNonSolid = true,
236       .depthBounds = false,
237       .wideLines = true,
238       .largePoints = true,
239       .alphaToOne = true,
240       .multiViewport = true,
241       .samplerAnisotropy = true,
242       .textureCompressionETC2 = false,
243       .textureCompressionBC = true,
244       .textureCompressionASTC_LDR = false,
245       .occlusionQueryPrecise = true,
246       .pipelineStatisticsQuery = true,
247       .vertexPipelineStoresAndAtomics = true,
248       .fragmentStoresAndAtomics = true,
249       .shaderTessellationAndGeometryPointSize = true,
250       .shaderImageGatherExtended = true,
251       .shaderStorageImageExtendedFormats = true,
252       /* TODO: hitting the vertex shader timeout in CTS, but should work */
253       .shaderStorageImageMultisample = false,
254       .shaderStorageImageReadWithoutFormat = true,
255       .shaderStorageImageWriteWithoutFormat = true,
256       .shaderUniformBufferArrayDynamicIndexing = true,
257       .shaderSampledImageArrayDynamicIndexing = true,
258       .shaderStorageBufferArrayDynamicIndexing = true,
259       .shaderStorageImageArrayDynamicIndexing = true,
260       .shaderClipDistance = true,
261       .shaderCullDistance = true,
262       .shaderFloat64 = false,
263       .shaderInt64 = true,
264       .shaderInt16 = true,
265       .shaderResourceResidency = false,
266       .shaderResourceMinLod = true,
267       .sparseBinding = false,
268       .sparseResidency2Samples = false,
269       .sparseResidency4Samples = false,
270       .sparseResidency8Samples = false,
271       .sparseResidencyAliased = false,
272       .sparseResidencyBuffer = false,
273       .sparseResidencyImage2D = false,
274       .sparseResidencyImage3D = false,
275       .variableMultisampleRate = false,
276       .inheritedQueries = true,
277 
278       /* Vulkan 1.1 */
279       .storageBuffer16BitAccess = true,
280       .uniformAndStorageBuffer16BitAccess = true,
281       .storagePushConstant16 = true,
282       .storageInputOutput16 = false,
283       .multiview = true,
284       .multiviewGeometryShader = false,
285       .multiviewTessellationShader = false,
286       .variablePointersStorageBuffer = true,
287       .variablePointers = true,
288       .shaderDrawParameters = true,
289       .samplerYcbcrConversion = true,
290 
291       /* Vulkan 1.2 */
292       .samplerMirrorClampToEdge = true,
293       .drawIndirectCount = true,
294       .storageBuffer8BitAccess = true,
295       .uniformAndStorageBuffer8BitAccess = true,
296       .storagePushConstant8 = true,
297       .shaderBufferInt64Atomics = false,
298       .shaderSharedInt64Atomics = false,
299       .shaderFloat16 = true,
300       .shaderInt8 = true,
301       .descriptorIndexing = true,
302       .shaderInputAttachmentArrayDynamicIndexing = true,
303       .shaderUniformTexelBufferArrayDynamicIndexing = true,
304       .shaderStorageTexelBufferArrayDynamicIndexing = true,
305       .shaderUniformBufferArrayNonUniformIndexing = true,
306       .shaderSampledImageArrayNonUniformIndexing = true,
307       .shaderStorageBufferArrayNonUniformIndexing = true,
308       .shaderStorageImageArrayNonUniformIndexing = true,
309       .shaderInputAttachmentArrayNonUniformIndexing = true,
310       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
311       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
312       .descriptorBindingUniformBufferUpdateAfterBind = true,
313       .descriptorBindingSampledImageUpdateAfterBind = true,
314       .descriptorBindingStorageImageUpdateAfterBind = true,
315       .descriptorBindingStorageBufferUpdateAfterBind = true,
316       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
317       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
318       .descriptorBindingUpdateUnusedWhilePending = true,
319       .descriptorBindingPartiallyBound = true,
320       .descriptorBindingVariableDescriptorCount = true,
321       .runtimeDescriptorArray = true,
322       .samplerFilterMinmax = false,
323       .scalarBlockLayout = true,
324       .imagelessFramebuffer = true,
325       .uniformBufferStandardLayout = true,
326       .shaderSubgroupExtendedTypes = true,
327       .separateDepthStencilLayouts = true,
328       .hostQueryReset = true,
329       .timelineSemaphore = true,
330       .bufferDeviceAddress = true,
331       .bufferDeviceAddressCaptureReplay = false,
332       .bufferDeviceAddressMultiDevice = false,
333       .vulkanMemoryModel = true,
334       .vulkanMemoryModelDeviceScope = true,
335       .vulkanMemoryModelAvailabilityVisibilityChains = false,
336       .shaderOutputViewportIndex = true,
337       .shaderOutputLayer = true,
338       .subgroupBroadcastDynamicId = true,
339 
340       /* Vulkan 1.3 */
341       .robustImageAccess = true,
342       .inlineUniformBlock = true,
343       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
344       .pipelineCreationCacheControl = true,
345       .privateData = true,
346       .shaderDemoteToHelperInvocation = true,
347       .shaderTerminateInvocation = true,
348       .subgroupSizeControl = true,
349       .computeFullSubgroups = true,
350       .synchronization2 = true,
351       .shaderZeroInitializeWorkgroupMemory = true,
352       .dynamicRendering = true,
353       .shaderIntegerDotProduct = true,
354       .maintenance4 = true,
355 
356       /* VK_KHR_dynamic_rendering_local_read */
357       .dynamicRenderingLocalRead = true,
358 
359       /* VK_KHR_fragment_shader_barycentric */
360       .fragmentShaderBarycentric = false,
361 
362       /* VK_KHR_global_priority */
363       .globalPriorityQuery = true,
364 
365       /* VK_KHR_index_type_uint8 */
366       .indexTypeUint8 = true,
367 
368       /* VK_KHR_line_rasterization */
369       .rectangularLines = false,
370       .bresenhamLines = true,
371       .smoothLines = false,
372       .stippledRectangularLines = false,
373       .stippledBresenhamLines = false,
374       .stippledSmoothLines = false,
375 
376       /* VK_KHR_maintenance5 */
377       .maintenance5 = true,
378 
379       /* VK_KHR_maintenance6 */
380       .maintenance6 = true,
381 
382       /* VK_KHR_pipeline_executable_properties */
383       .pipelineExecutableInfo = true,
384 
385       /* VK_KHR_present_id */
386       .presentId = false,
387 
388       /* VK_KHR_present_wait */
389       .presentWait = false,
390 
391       /* VK_KHR_shader_clock */
392       .shaderSubgroupClock = false,
393       .shaderDeviceClock = false,
394 
395       /* VK_KHR_shader_expect_assume */
396       .shaderExpectAssume = true,
397 
398       /* VK_KHR_shader_float_controls2 */
399       .shaderFloatControls2 = true,
400 
401       /* VK_KHR_shader_maximal_reconvergence */
402       .shaderMaximalReconvergence = true,
403 
404       /* VK_KHR_shader_subgroup_rotate */
405       .shaderSubgroupRotate = true,
406       .shaderSubgroupRotateClustered = true,
407 
408       /* VK_KHR_vertex_attribute_divisor */
409       .vertexAttributeInstanceRateDivisor = true,
410       .vertexAttributeInstanceRateZeroDivisor = true,
411 
412       /* VK_KHR_workgroup_memory_explicit_layout */
413       .workgroupMemoryExplicitLayout = true,
414       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
415       .workgroupMemoryExplicitLayout8BitAccess = true,
416       .workgroupMemoryExplicitLayout16BitAccess = true,
417 
418       /* VK_EXT_4444_formats */
419       .formatA4R4G4B4 = true,
420       .formatA4B4G4R4 = true,
421 
422       /* VK_EXT_attachment_feedback_loop_layout */
423       .attachmentFeedbackLoopLayout = true,
424 
425       /* VK_EXT_border_color_swizzle */
426       .borderColorSwizzle = true,
427       .borderColorSwizzleFromImage = false,
428 
429       /* VK_EXT_buffer_device_address */
430       .bufferDeviceAddressCaptureReplayEXT = false,
431 
432       /* VK_EXT_color_write_enable */
433       .colorWriteEnable = true,
434 
435       /* VK_EXT_conditional_rendering */
436       .conditionalRendering = false,
437       .inheritedConditionalRendering = false,
438 
439       /* VK_EXT_custom_border_color */
440       .customBorderColors = true,
441       .customBorderColorWithoutFormat = true,
442 
443       /* VK_EXT_depth_bias_control */
444       .depthBiasControl = false,
445       .leastRepresentableValueForceUnormRepresentation = false,
446       .floatRepresentation = false,
447       .depthBiasExact = false,
448 
449       /* VK_EXT_depth_clip_control */
450       .depthClipControl = false,
451 
452       /* VK_EXT_depth_clip_enable */
453       .depthClipEnable = true,
454 
455       /* VK_EXT_dynamic_rendering_unused_attachments */
456       .dynamicRenderingUnusedAttachments = true,
457 
458       /* VK_EXT_extended_dynamic_state */
459       .extendedDynamicState = true,
460 
461       /* VK_EXT_extended_dynamic_state2 */
462       .extendedDynamicState2 = true,
463       .extendedDynamicState2LogicOp = true,
464       .extendedDynamicState2PatchControlPoints = true,
465 
466       /* VK_EXT_extended_dynamic_state3 */
467       .extendedDynamicState3TessellationDomainOrigin = true,
468       .extendedDynamicState3DepthClampEnable = true,
469       .extendedDynamicState3PolygonMode = true,
470       .extendedDynamicState3RasterizationSamples = true,
471       .extendedDynamicState3SampleMask = true,
472       .extendedDynamicState3AlphaToCoverageEnable = true,
473       .extendedDynamicState3AlphaToOneEnable = true,
474       .extendedDynamicState3LogicOpEnable = true,
475       .extendedDynamicState3ColorBlendEnable = true,
476       .extendedDynamicState3ColorBlendEquation = true,
477       .extendedDynamicState3ColorWriteMask = true,
478       .extendedDynamicState3RasterizationStream = false,
479       .extendedDynamicState3ConservativeRasterizationMode = false,
480       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
481       .extendedDynamicState3DepthClipEnable = true,
482       .extendedDynamicState3SampleLocationsEnable = false,
483       .extendedDynamicState3ColorBlendAdvanced = false,
484       .extendedDynamicState3ProvokingVertexMode = true,
485       .extendedDynamicState3LineRasterizationMode = true,
486       .extendedDynamicState3LineStippleEnable = false,
487       .extendedDynamicState3DepthClipNegativeOneToOne = false,
488       .extendedDynamicState3ViewportWScalingEnable = false,
489       .extendedDynamicState3ViewportSwizzle = false,
490       .extendedDynamicState3CoverageToColorEnable = false,
491       .extendedDynamicState3CoverageToColorLocation = false,
492       .extendedDynamicState3CoverageModulationMode = false,
493       .extendedDynamicState3CoverageModulationTableEnable = false,
494       .extendedDynamicState3CoverageModulationTable = false,
495       .extendedDynamicState3CoverageReductionMode = false,
496       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
497       .extendedDynamicState3ShadingRateImageEnable = false,
498 
499       /* VK_EXT_graphics_pipeline_library */
500       .graphicsPipelineLibrary = true,
501 
502       /* VK_EXT_host_image_copy */
503       .hostImageCopy = true,
504 
505       /* VK_EXT_image_2d_view_of_3d */
506       .image2DViewOf3D = true,
507       .sampler2DViewOf3D = true,
508 
509       /* VK_EXT_image_sliced_view_of_3d */
510       .imageSlicedViewOf3D = false,
511 
512 #ifdef HK_USE_WSI_PLATFORM
513       /* VK_EXT_swapchain_maintenance1 */
514       .swapchainMaintenance1 = false,
515 #endif
516 
517       /* VK_EXT_image_view_min_lod */
518       .minLod = false,
519 
520       /* VK_EXT_map_memory_placed */
521       .memoryMapPlaced = false,
522       .memoryMapRangePlaced = false,
523       .memoryUnmapReserve = false,
524 
525       /* VK_EXT_multi_draw */
526       .multiDraw = true,
527 
528       /* VK_EXT_mutable_descriptor_type */
529       .mutableDescriptorType = true,
530 
531       /* VK_EXT_non_seamless_cube_map */
532       .nonSeamlessCubeMap = true,
533 
534       /* VK_EXT_pipeline_protected_access */
535       .pipelineProtectedAccess = true,
536 
537       /* VK_EXT_pipeline_robustness */
538       .pipelineRobustness = true,
539 
540       /* VK_EXT_primitive_topology_list_restart */
541       .primitiveTopologyListRestart = true,
542       .primitiveTopologyPatchListRestart = false,
543 
544       /* VK_EXT_primitives_generated_query */
545       .primitivesGeneratedQuery = false,
546       .primitivesGeneratedQueryWithNonZeroStreams = false,
547       .primitivesGeneratedQueryWithRasterizerDiscard = false,
548 
549       /* VK_EXT_provoking_vertex */
550       .provokingVertexLast = true,
551       .transformFeedbackPreservesProvokingVertex = true,
552 
553       /* VK_EXT_robustness2 */
554       .robustBufferAccess2 = true,
555       .robustImageAccess2 = true,
556       .nullDescriptor = true,
557 
558       /* VK_EXT_shader_image_atomic_int64 */
559       .shaderImageInt64Atomics = false,
560       .sparseImageInt64Atomics = false,
561 
562       /* VK_EXT_shader_module_identifier */
563       .shaderModuleIdentifier = true,
564 
565       /* VK_EXT_shader_object */
566       .shaderObject = true,
567 
568       /* VK_EXT_shader_replicated_composites */
569       .shaderReplicatedComposites = true,
570 
571       /* VK_KHR_shader_subgroup_uniform_control_flow */
572       .shaderSubgroupUniformControlFlow = true,
573 
574       /* VK_EXT_texel_buffer_alignment */
575       .texelBufferAlignment = true,
576 
577       /* VK_EXT_transform_feedback */
578       .transformFeedback = true,
579       .geometryStreams = true,
580 
581       /* VK_EXT_vertex_input_dynamic_state */
582       .vertexInputDynamicState = true,
583 
584       /* VK_EXT_ycbcr_2plane_444_formats */
585       .ycbcr2plane444Formats = false,
586 
587       /* VK_EXT_ycbcr_image_arrays */
588       .ycbcrImageArrays = false,
589 
590       /* VK_KHR_shader_relaxed_extended_instruction */
591       .shaderRelaxedExtendedInstruction = true,
592    };
593 }
594 
595 static void
hk_get_device_properties(const struct agx_device * dev,const struct hk_instance * instance,struct vk_properties * properties)596 hk_get_device_properties(const struct agx_device *dev,
597                          const struct hk_instance *instance,
598                          struct vk_properties *properties)
599 {
600    const VkSampleCountFlagBits sample_counts =
601       VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
602 
603    uint64_t os_page_size = 16384;
604    os_get_page_size(&os_page_size);
605 
606    *properties = (struct vk_properties){
607       .apiVersion = hk_get_vk_version(),
608       .driverVersion = vk_get_driver_version(),
609       .vendorID = instance->force_vk_vendor ?: VK_VENDOR_ID_MESA,
610       .deviceID = 0,
611       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
612 
613       /* Vulkan 1.0 limits */
614       .maxImageDimension1D = 16384,
615       .maxImageDimension2D = 16384,
616       .maxImageDimension3D = 16384,
617       .maxImageDimensionCube = 16384,
618       .maxImageArrayLayers = 2048,
619       .maxTexelBufferElements = AGX_TEXTURE_BUFFER_MAX_SIZE,
620       .maxUniformBufferRange = 65536,
621       .maxStorageBufferRange = UINT32_MAX,
622       .maxPushConstantsSize = HK_MAX_PUSH_SIZE,
623       .maxMemoryAllocationCount = 4096,
624       .maxSamplerAllocationCount = 4000,
625       .bufferImageGranularity = 0x400,
626       .sparseAddressSpaceSize = HK_SPARSE_ADDR_SPACE_SIZE,
627       .maxBoundDescriptorSets = HK_MAX_SETS,
628       .maxPerStageDescriptorSamplers = HK_MAX_DESCRIPTORS,
629       .maxPerStageDescriptorUniformBuffers = HK_MAX_DESCRIPTORS,
630       .maxPerStageDescriptorStorageBuffers = HK_MAX_DESCRIPTORS,
631       .maxPerStageDescriptorSampledImages = HK_MAX_DESCRIPTORS,
632       .maxPerStageDescriptorStorageImages = HK_MAX_DESCRIPTORS,
633       .maxPerStageDescriptorInputAttachments = HK_MAX_DESCRIPTORS,
634       .maxPerStageResources = UINT32_MAX,
635       .maxDescriptorSetSamplers = HK_MAX_DESCRIPTORS,
636       .maxDescriptorSetUniformBuffers = HK_MAX_DESCRIPTORS,
637       .maxDescriptorSetUniformBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
638       .maxDescriptorSetStorageBuffers = HK_MAX_DESCRIPTORS,
639       .maxDescriptorSetStorageBuffersDynamic = HK_MAX_DYNAMIC_BUFFERS / 2,
640       .maxDescriptorSetSampledImages = HK_MAX_DESCRIPTORS,
641       .maxDescriptorSetStorageImages = HK_MAX_DESCRIPTORS,
642       .maxDescriptorSetInputAttachments = HK_MAX_DESCRIPTORS,
643       .maxVertexInputAttributes = AGX_MAX_VBUFS,
644       .maxVertexInputBindings = AGX_MAX_ATTRIBS,
645       .maxVertexInputAttributeOffset = 65535,
646       .maxVertexInputBindingStride = 2048,
647       .maxVertexOutputComponents = 64,
648       .maxGeometryShaderInvocations = 32,
649       .maxGeometryInputComponents = 128,
650       .maxGeometryOutputComponents = 128,
651       .maxGeometryOutputVertices = 1024,
652       .maxGeometryTotalOutputComponents = 1024,
653       .maxTessellationGenerationLevel = 64,
654       .maxTessellationPatchSize = 32,
655       .maxTessellationControlPerVertexInputComponents = 128,
656       .maxTessellationControlPerVertexOutputComponents = 128,
657       .maxTessellationControlPerPatchOutputComponents = 120,
658       .maxTessellationControlTotalOutputComponents = 4216,
659       .maxTessellationEvaluationInputComponents = 128,
660       .maxTessellationEvaluationOutputComponents = 128,
661       .maxFragmentInputComponents = 64,
662       .maxFragmentOutputAttachments = HK_MAX_RTS,
663       .maxFragmentDualSrcAttachments = 1,
664       .maxFragmentCombinedOutputResources = 16,
665       .maxComputeSharedMemorySize = HK_MAX_SHARED_SIZE,
666       .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
667       .maxComputeWorkGroupInvocations = 1024,
668       .maxComputeWorkGroupSize = {1024, 1024, 64},
669       .subPixelPrecisionBits = 8,
670       .subTexelPrecisionBits = 8,
671       .mipmapPrecisionBits = 8,
672       .maxDrawIndexedIndexValue = UINT32_MAX,
673       .maxDrawIndirectCount = UINT16_MAX,
674       .maxSamplerLodBias = 15,
675       .maxSamplerAnisotropy = 16,
676       .maxViewports = HK_MAX_VIEWPORTS,
677       .maxViewportDimensions = {32768, 32768},
678       .viewportBoundsRange = {-65536, 65536},
679       .viewportSubPixelBits = 8,
680       .minMemoryMapAlignment = os_page_size,
681       .minTexelBufferOffsetAlignment = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
682       .minUniformBufferOffsetAlignment = HK_MIN_UBO_ALIGNMENT,
683       .minStorageBufferOffsetAlignment = HK_MIN_SSBO_ALIGNMENT,
684       .minTexelOffset = -8,
685       .maxTexelOffset = 7,
686       .minTexelGatherOffset = -8,
687       .maxTexelGatherOffset = 7,
688       .minInterpolationOffset = -0.5,
689       .maxInterpolationOffset = 0.4375,
690       .subPixelInterpolationOffsetBits = 4,
691       .maxFramebufferHeight = 16384,
692       .maxFramebufferWidth = 16384,
693       .maxFramebufferLayers = 2048,
694       .framebufferColorSampleCounts = sample_counts,
695       .framebufferDepthSampleCounts = sample_counts,
696       .framebufferNoAttachmentsSampleCounts = sample_counts,
697       .framebufferStencilSampleCounts = sample_counts,
698       .maxColorAttachments = HK_MAX_RTS,
699       .sampledImageColorSampleCounts = sample_counts,
700       .sampledImageIntegerSampleCounts = sample_counts,
701       .sampledImageDepthSampleCounts = sample_counts,
702       .sampledImageStencilSampleCounts = sample_counts,
703       .storageImageSampleCounts = sample_counts,
704       .maxSampleMaskWords = 1,
705       .timestampComputeAndGraphics = false,
706       .timestampPeriod = 1,
707       .maxClipDistances = 8,
708       .maxCullDistances = 8,
709       .maxCombinedClipAndCullDistances = 8,
710       .discreteQueuePriorities = 2,
711       .pointSizeRange = {1.0, 512.f - 0.0625f},
712       .lineWidthRange = {1.0, 16.0f},
713       .pointSizeGranularity = 0.0625,
714       .lineWidthGranularity = 1.0f / 16.0f,
715       .strictLines = false,
716       .standardSampleLocations = true,
717       .optimalBufferCopyOffsetAlignment = 1,
718       .optimalBufferCopyRowPitchAlignment = 1,
719       .nonCoherentAtomSize = 64,
720 
721       /* Vulkan 1.0 sparse properties */
722       .sparseResidencyNonResidentStrict = false,
723       .sparseResidencyAlignedMipSize = false,
724       .sparseResidencyStandard2DBlockShape = false,
725       .sparseResidencyStandard2DMultisampleBlockShape = false,
726       .sparseResidencyStandard3DBlockShape = false,
727 
728       /* Vulkan 1.1 properties */
729       .subgroupSize = 32,
730       .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT |
731                                  VK_SHADER_STAGE_FRAGMENT_BIT |
732                                  VK_SHADER_STAGE_VERTEX_BIT,
733       .subgroupSupportedOperations =
734          VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
735          VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
736          VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
737          VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
738          VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
739          VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
740          VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
741          VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR,
742       .subgroupQuadOperationsInAllStages = true,
743       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
744       .maxMultiviewViewCount = HK_MAX_MULTIVIEW_VIEW_COUNT,
745       .maxMultiviewInstanceIndex = UINT32_MAX,
746       .maxPerSetDescriptors = UINT32_MAX,
747       .maxMemoryAllocationSize = (1u << 31),
748 
749       /* Vulkan 1.2 properties */
750       .supportedDepthResolveModes =
751          VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
752          VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
753       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
754                                       VK_RESOLVE_MODE_MIN_BIT |
755                                       VK_RESOLVE_MODE_MAX_BIT,
756       .independentResolveNone = true,
757       .independentResolve = true,
758       .driverID = VK_DRIVER_ID_MESA_HONEYKRISP,
759       .conformanceVersion = (VkConformanceVersion){1, 3, 8, 3},
760       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
761       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
762       .shaderSignedZeroInfNanPreserveFloat16 = true,
763       .shaderSignedZeroInfNanPreserveFloat32 = true,
764       .shaderSignedZeroInfNanPreserveFloat64 = false,
765       .shaderDenormPreserveFloat16 = true,
766       .shaderDenormPreserveFloat32 = false,
767       .shaderDenormPreserveFloat64 = false,
768       .shaderDenormFlushToZeroFloat16 = false,
769       .shaderDenormFlushToZeroFloat32 = true,
770       .shaderDenormFlushToZeroFloat64 = false,
771       .shaderRoundingModeRTEFloat16 = true,
772       .shaderRoundingModeRTEFloat32 = true,
773       .shaderRoundingModeRTEFloat64 = false,
774       .shaderRoundingModeRTZFloat16 = false,
775       .shaderRoundingModeRTZFloat32 = false,
776       .shaderRoundingModeRTZFloat64 = false,
777       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
778       .shaderUniformBufferArrayNonUniformIndexingNative = true,
779       .shaderSampledImageArrayNonUniformIndexingNative = true,
780       .shaderStorageBufferArrayNonUniformIndexingNative = true,
781       .shaderStorageImageArrayNonUniformIndexingNative = true,
782       .shaderInputAttachmentArrayNonUniformIndexingNative = true,
783       .robustBufferAccessUpdateAfterBind = true,
784       .quadDivergentImplicitLod = false,
785       .maxPerStageDescriptorUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
786       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
787       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
788       .maxPerStageDescriptorUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
789       .maxPerStageDescriptorUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
790       .maxPerStageDescriptorUpdateAfterBindInputAttachments =
791          HK_MAX_DESCRIPTORS,
792       .maxPerStageUpdateAfterBindResources = UINT32_MAX,
793       .maxDescriptorSetUpdateAfterBindSamplers = HK_MAX_DESCRIPTORS,
794       .maxDescriptorSetUpdateAfterBindUniformBuffers = HK_MAX_DESCRIPTORS,
795       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic =
796          HK_MAX_DYNAMIC_BUFFERS / 2,
797       .maxDescriptorSetUpdateAfterBindStorageBuffers = HK_MAX_DESCRIPTORS,
798       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic =
799          HK_MAX_DYNAMIC_BUFFERS / 2,
800       .maxDescriptorSetUpdateAfterBindSampledImages = HK_MAX_DESCRIPTORS,
801       .maxDescriptorSetUpdateAfterBindStorageImages = HK_MAX_DESCRIPTORS,
802       .maxDescriptorSetUpdateAfterBindInputAttachments = HK_MAX_DESCRIPTORS,
803       .filterMinmaxSingleComponentFormats = false,
804       .filterMinmaxImageComponentMapping = false,
805       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
806       .framebufferIntegerColorSampleCounts = sample_counts,
807 
808       /* Vulkan 1.3 properties */
809       .minSubgroupSize = 32,
810       .maxSubgroupSize = 32,
811       .maxComputeWorkgroupSubgroups = 1024 / 32,
812       .requiredSubgroupSizeStages = 0,
813       .maxInlineUniformBlockSize = 1 << 16,
814       .maxPerStageDescriptorInlineUniformBlocks = 32,
815       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
816       .maxDescriptorSetInlineUniformBlocks = 6 * 32,
817       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
818       .maxInlineUniformTotalSize = 1 << 16,
819       .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
820       .integerDotProduct4x8BitPackedSignedAccelerated = false,
821       .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
822       .storageTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
823       .storageTexelBufferOffsetSingleTexelAlignment = true,
824       .uniformTexelBufferOffsetAlignmentBytes = HK_MIN_TEXEL_BUFFER_ALIGNMENT,
825       .uniformTexelBufferOffsetSingleTexelAlignment = true,
826       .maxBufferSize = HK_MAX_BUFFER_SIZE,
827 
828       /* VK_KHR_push_descriptor */
829       .maxPushDescriptors = HK_MAX_PUSH_DESCRIPTORS,
830 
831       /* VK_EXT_custom_border_color */
832       .maxCustomBorderColorSamplers = 4000,
833 
834       /* VK_EXT_extended_dynamic_state3 */
835       .dynamicPrimitiveTopologyUnrestricted = true,
836 
837       /* VK_EXT_graphics_pipeline_library */
838       .graphicsPipelineLibraryFastLinking = true,
839       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
840 
841       /* VK_EXT_host_image_copy */
842 
843       /* VK_KHR_line_rasterization */
844       .lineSubPixelPrecisionBits = 8,
845 
846       /* VK_KHR_maintenance5 */
847       .earlyFragmentMultisampleCoverageAfterSampleCounting = false,
848       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
849       .depthStencilSwizzleOneSupport = true,
850       .polygonModePointSize = false,
851       .nonStrictSinglePixelWideLinesUseParallelogram = false,
852       .nonStrictWideLinesUseParallelogram = false,
853 
854       /* VK_KHR_maintenance6 */
855       .blockTexelViewCompatibleMultipleLayers = false,
856       .maxCombinedImageSamplerDescriptorCount = 3,
857       .fragmentShadingRateClampCombinerInputs = false, /* TODO */
858 
859       /* VK_EXT_map_memory_placed */
860       .minPlacedMemoryMapAlignment = os_page_size,
861 
862       /* VK_EXT_multi_draw */
863       .maxMultiDrawCount = UINT16_MAX,
864 
865       /* VK_EXT_pipeline_robustness */
866       .defaultRobustnessStorageBuffers =
867          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
868       .defaultRobustnessUniformBuffers =
869          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
870       .defaultRobustnessVertexInputs =
871          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
872       .defaultRobustnessImages =
873          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
874 
875       /* VK_EXT_physical_device_drm gets populated later */
876 
877       /* VK_EXT_provoking_vertex */
878       .provokingVertexModePerPipeline = true,
879       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
880 
881       /* VK_EXT_robustness2 */
882       .robustStorageBufferAccessSizeAlignment = HK_SSBO_BOUNDS_CHECK_ALIGNMENT,
883       .robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
884 
885       /* VK_EXT_sample_locations */
886       .sampleLocationSampleCounts = sample_counts,
887       .maxSampleLocationGridSize = (VkExtent2D){1, 1},
888       .sampleLocationCoordinateRange[0] = 0.0f,
889       .sampleLocationCoordinateRange[1] = 0.9375f,
890       .sampleLocationSubPixelBits = 4,
891       .variableSampleLocations = false,
892 
893       /* VK_EXT_shader_object */
894       .shaderBinaryVersion = 0,
895 
896       /* VK_EXT_transform_feedback */
897       .maxTransformFeedbackStreams = 4,
898       .maxTransformFeedbackBuffers = 4,
899       .maxTransformFeedbackBufferSize = UINT32_MAX,
900       .maxTransformFeedbackStreamDataSize = 2048,
901       .maxTransformFeedbackBufferDataSize = 512,
902       .maxTransformFeedbackBufferDataStride = 2048,
903       .transformFeedbackQueries = true,
904       .transformFeedbackStreamsLinesTriangles = false,
905       .transformFeedbackRasterizationStreamSelect = false,
906       .transformFeedbackDraw = false,
907 
908       /* VK_KHR_vertex_attribute_divisor */
909       .maxVertexAttribDivisor = UINT32_MAX,
910       .supportsNonZeroFirstInstance = true,
911 
912       /* VK_KHR_fragment_shader_barycentric */
913       .triStripVertexOrderIndependentOfProvokingVertex = false,
914    };
915 
916    strncpy(properties->deviceName, dev->name, sizeof(properties->deviceName));
917 
918    /* VK_EXT_shader_module_identifier */
919    static_assert(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
920                  sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
921    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
922           vk_shaderModuleIdentifierAlgorithmUUID,
923           sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
924 
925    const struct {
926       uint16_t vendor_id;
927       uint16_t device_id;
928       uint8_t pad[12];
929    } dev_uuid = {
930       .vendor_id = 0,
931       .device_id = 0,
932    };
933    static_assert(sizeof(dev_uuid) == VK_UUID_SIZE);
934    memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
935    static_assert(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
936    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
937 
938    strncpy(properties->driverName, "Honeykrisp", VK_MAX_DRIVER_NAME_SIZE);
939    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
940             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
941 
942    /* We don't use the layouts ATM so just report all layouts from
943     * extensions that we support as compatible.
944     */
945    static const VkImageLayout supported_layouts[] = {
946       VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
947       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
948       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
949       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
950       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
951       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
952       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
953       VK_IMAGE_LAYOUT_PREINITIALIZED,
954       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
955       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
956       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
957       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
958       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
959       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
960       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
961       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
962       // VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
963       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
964    };
965 
966    properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
967    properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
968    properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
969    properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
970 
971    /* We're a UMR so we can always map every kind of memory */
972    properties->identicalMemoryTypeRequirements = true;
973 
974    {
975       struct mesa_sha1 sha1_ctx;
976       uint8_t sha1[20];
977 
978       _mesa_sha1_init(&sha1_ctx);
979       /* Make sure we don't match with other vendors */
980       const char *driver = "honeykrisp-v1";
981       _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
982       _mesa_sha1_final(&sha1_ctx, sha1);
983 
984       memcpy(properties->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
985    }
986 }
987 
988 static void
hk_physical_device_init_pipeline_cache(struct hk_physical_device * pdev)989 hk_physical_device_init_pipeline_cache(struct hk_physical_device *pdev)
990 {
991    struct hk_instance *instance = hk_physical_device_instance(pdev);
992 
993    struct mesa_sha1 sha_ctx;
994    _mesa_sha1_init(&sha_ctx);
995 
996    _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
997                      sizeof(instance->driver_build_sha));
998 
999    const uint64_t compiler_flags = hk_physical_device_compiler_flags(pdev);
1000    _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1001 
1002    unsigned char sha[SHA1_DIGEST_LENGTH];
1003    _mesa_sha1_final(&sha_ctx, sha);
1004 
1005    static_assert(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1006    memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1007    memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1008 
1009 #ifdef ENABLE_SHADER_CACHE
1010    char renderer[10];
1011    ASSERTED int len = snprintf(renderer, sizeof(renderer), "hk_g13g_");
1012    assert(len == sizeof(renderer) - 2);
1013 
1014    char timestamp[41];
1015    _mesa_sha1_format(timestamp, instance->driver_build_sha);
1016 
1017    const uint64_t driver_flags = hk_physical_device_compiler_flags(pdev);
1018    pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1019 #endif
1020 }
1021 
1022 static void
hk_physical_device_free_disk_cache(struct hk_physical_device * pdev)1023 hk_physical_device_free_disk_cache(struct hk_physical_device *pdev)
1024 {
1025 #ifdef ENABLE_SHADER_CACHE
1026    if (pdev->vk.disk_cache) {
1027       disk_cache_destroy(pdev->vk.disk_cache);
1028       pdev->vk.disk_cache = NULL;
1029    }
1030 #else
1031    assert(pdev->vk.disk_cache == NULL);
1032 #endif
1033 }
1034 
1035 static uint64_t
hk_get_sysmem_heap_size(void)1036 hk_get_sysmem_heap_size(void)
1037 {
1038    uint64_t sysmem_size_B = 0;
1039    if (!os_get_total_physical_memory(&sysmem_size_B))
1040       return 0;
1041 
1042    /* Use 3/4 of total size to avoid swapping */
1043    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1044 }
1045 
1046 static uint64_t
hk_get_sysmem_heap_available(struct hk_physical_device * pdev)1047 hk_get_sysmem_heap_available(struct hk_physical_device *pdev)
1048 {
1049    uint64_t sysmem_size_B = 0;
1050    if (!os_get_available_system_memory(&sysmem_size_B)) {
1051       vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1052       return 0;
1053    }
1054 
1055    /* Use 3/4 of available to avoid swapping */
1056    return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1057 }
1058 
1059 VkResult
hk_create_drm_physical_device(struct vk_instance * _instance,drmDevicePtr drm_device,struct vk_physical_device ** pdev_out)1060 hk_create_drm_physical_device(struct vk_instance *_instance,
1061                               drmDevicePtr drm_device,
1062                               struct vk_physical_device **pdev_out)
1063 {
1064    struct hk_instance *instance = (struct hk_instance *)_instance;
1065    VkResult result;
1066 
1067    /* Blanket refusal to probe due to unstable UAPI. */
1068    return VK_ERROR_INCOMPATIBLE_DRIVER;
1069 
1070    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
1071        drm_device->bustype != DRM_BUS_PLATFORM)
1072       return VK_ERROR_INCOMPATIBLE_DRIVER;
1073 
1074    const char *path = drm_device->nodes[DRM_NODE_RENDER];
1075    int fd = open(path, O_RDWR | O_CLOEXEC);
1076    if (fd < 0) {
1077       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1078                        "failed to open device %s", path);
1079    }
1080 
1081    drmVersionPtr version = drmGetVersion(fd);
1082    if (!version) {
1083       result =
1084          vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1085                    "failed to query kernel driver version for device %s", path);
1086       goto fail_fd;
1087    }
1088 
1089    bool is_asahi = (strcmp(version->name, "asahi") == 0);
1090    is_asahi |= strcmp(version->name, "virtio_gpu") == 0;
1091    drmFreeVersion(version);
1092 
1093    if (!is_asahi) {
1094       result =
1095          vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1096                    "device %s does not use the asahi kernel driver", path);
1097       goto fail_fd;
1098    }
1099 
1100    struct stat st;
1101    if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
1102       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1103                          "fstat() failed on %s: %m",
1104                          drm_device->nodes[DRM_NODE_RENDER]);
1105       goto fail_fd;
1106    }
1107    const dev_t render_dev = st.st_rdev;
1108 
1109    struct hk_physical_device *pdev =
1110       vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
1111                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1112 
1113    if (pdev == NULL) {
1114       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1115       goto fail_fd;
1116    }
1117 
1118    /* TODO: we're render-only, should we be reporting displays anyway in
1119     * KHR_display?
1120     */
1121    pdev->master_fd = -1;
1122 
1123 #if 0
1124    if (instance->vk.enabled_extensions.KHR_display) {
1125       int master_fd =
1126          open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
1127 
1128       if (master_fd >= 0) {
1129          struct stat st;
1130          if (!stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
1131             pdev->master_fd = master_fd;
1132             properties.drmHasPrimary = true;
1133             properties.drmPrimaryMajor = major(st.st_rdev);
1134             properties.drmPrimaryMinor = minor(st.st_rdev);
1135          }
1136       }
1137    }
1138 #endif
1139 
1140    pdev->render_dev = render_dev;
1141    pdev->dev.fd = fd;
1142 
1143    if (!agx_open_device(NULL, &pdev->dev)) {
1144       result = vk_error(instance, VK_ERROR_UNKNOWN);
1145       goto fail_pdev_alloc;
1146    }
1147 
1148    struct vk_physical_device_dispatch_table dispatch_table;
1149    vk_physical_device_dispatch_table_from_entrypoints(
1150       &dispatch_table, &hk_physical_device_entrypoints, true);
1151    vk_physical_device_dispatch_table_from_entrypoints(
1152       &dispatch_table, &wsi_physical_device_entrypoints, false);
1153 
1154    struct vk_device_extension_table supported_extensions;
1155    hk_get_device_extensions(instance, &supported_extensions);
1156 
1157    struct vk_features supported_features;
1158    hk_get_device_features(&supported_extensions, &supported_features);
1159 
1160    struct vk_properties properties;
1161    hk_get_device_properties(&pdev->dev, instance, &properties);
1162 
1163    properties.drmHasRender = true;
1164    properties.drmRenderMajor = major(render_dev);
1165    properties.drmRenderMinor = minor(render_dev);
1166 
1167    result = vk_physical_device_init(&pdev->vk, &instance->vk,
1168                                     &supported_extensions, &supported_features,
1169                                     &properties, &dispatch_table);
1170    if (result != VK_SUCCESS)
1171       goto fail_agx_device;
1172 
1173    hk_physical_device_init_pipeline_cache(pdev);
1174 
1175    uint64_t sysmem_size_B = hk_get_sysmem_heap_size();
1176    if (sysmem_size_B == 0) {
1177       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1178                          "Failed to query total system memory");
1179       goto fail_disk_cache;
1180    }
1181 
1182    uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1183    pdev->mem_heaps[sysmem_heap_idx] = (struct hk_memory_heap){
1184       .size = sysmem_size_B,
1185       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1186       .available = hk_get_sysmem_heap_available,
1187    };
1188 
1189    pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){
1190       .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1191                        VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1192                        VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
1193                        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1194       .heapIndex = sysmem_heap_idx,
1195    };
1196 
1197    assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1198    assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1199 
1200    /* TODO: VK_QUEUE_SPARSE_BINDING_BIT*/
1201    pdev->queue_families[pdev->queue_family_count++] = (struct hk_queue_family){
1202       .queue_flags =
1203          VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1204 
1205       .queue_count = 1,
1206    };
1207    assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1208 
1209    unsigned st_idx = 0;
1210    pdev->syncobj_sync_type = vk_drm_syncobj_get_type(fd);
1211    pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
1212    pdev->sync_types[st_idx++] = NULL;
1213    assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
1214    pdev->vk.supported_sync_types = pdev->sync_types;
1215 
1216    result = hk_init_wsi(pdev);
1217    if (result != VK_SUCCESS)
1218       goto fail_disk_cache;
1219 
1220    *pdev_out = &pdev->vk;
1221 
1222    return VK_SUCCESS;
1223 
1224 fail_disk_cache:
1225    hk_physical_device_free_disk_cache(pdev);
1226    vk_physical_device_finish(&pdev->vk);
1227 fail_agx_device:
1228    agx_close_device(&pdev->dev);
1229 fail_pdev_alloc:
1230    if (pdev->master_fd)
1231       close(pdev->master_fd);
1232 
1233    vk_free(&pdev->vk.instance->alloc, pdev);
1234 fail_fd:
1235    close(fd);
1236    return result;
1237 }
1238 
1239 void
hk_physical_device_destroy(struct vk_physical_device * vk_pdev)1240 hk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1241 {
1242    struct hk_physical_device *pdev =
1243       container_of(vk_pdev, struct hk_physical_device, vk);
1244 
1245    hk_finish_wsi(pdev);
1246 
1247    if (pdev->master_fd >= 0)
1248       close(pdev->master_fd);
1249 
1250    hk_physical_device_free_disk_cache(pdev);
1251    agx_close_device(&pdev->dev);
1252    vk_physical_device_finish(&pdev->vk);
1253    vk_free(&pdev->vk.instance->alloc, pdev);
1254 }
1255 
1256 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1257 hk_GetPhysicalDeviceMemoryProperties2(
1258    VkPhysicalDevice physicalDevice,
1259    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1260 {
1261    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1262 
1263    pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1264    for (int i = 0; i < pdev->mem_heap_count; i++) {
1265       pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){
1266          .size = pdev->mem_heaps[i].size,
1267          .flags = pdev->mem_heaps[i].flags,
1268       };
1269    }
1270 
1271    pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1272    for (int i = 0; i < pdev->mem_type_count; i++) {
1273       pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1274    }
1275 
1276    vk_foreach_struct(ext, pMemoryProperties->pNext) {
1277       switch (ext->sType) {
1278       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1279          VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1280 
1281          for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1282             const struct hk_memory_heap *heap = &pdev->mem_heaps[i];
1283             uint64_t used = p_atomic_read(&heap->used);
1284 
1285             /* From the Vulkan 1.3.278 spec:
1286              *
1287              *    "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1288              *    values in which memory usages are returned, with one element
1289              *    for each memory heap. A heap’s usage is an estimate of how
1290              *    much memory the process is currently using in that heap."
1291              *
1292              * TODO: Include internal allocations?
1293              */
1294             p->heapUsage[i] = used;
1295 
1296             uint64_t available = heap->size;
1297             if (heap->available)
1298                available = heap->available(pdev);
1299 
1300             /* From the Vulkan 1.3.278 spec:
1301              *
1302              *    "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1303              *    values in which memory budgets are returned, with one
1304              *    element for each memory heap. A heap’s budget is a rough
1305              *    estimate of how much memory the process can allocate from
1306              *    that heap before allocations may fail or cause performance
1307              *    degradation. The budget includes any currently allocated
1308              *    device memory."
1309              *
1310              * and
1311              *
1312              *    "The heapBudget value must be less than or equal to
1313              *    VkMemoryHeap::size for each heap."
1314              *
1315              * available (queried above) is the total amount free memory
1316              * system-wide and does not include our allocations so we need
1317              * to add that in.
1318              */
1319             uint64_t budget = MIN2(available + used, heap->size);
1320 
1321             /* Set the budget at 90% of available to avoid thrashing */
1322             p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1323          }
1324 
1325          /* From the Vulkan 1.3.278 spec:
1326           *
1327           *    "The heapBudget and heapUsage values must be zero for array
1328           *    elements greater than or equal to
1329           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1330           *    heapBudget value must be non-zero for array elements less than
1331           *    VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1332           */
1333          for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1334             p->heapBudget[i] = 0u;
1335             p->heapUsage[i] = 0u;
1336          }
1337          break;
1338       }
1339       default:
1340          vk_debug_ignored_stype(ext->sType);
1341          break;
1342       }
1343    }
1344 }
1345 
1346 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1347 hk_GetPhysicalDeviceQueueFamilyProperties2(
1348    VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
1349    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1350 {
1351    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1352    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1353                           pQueueFamilyPropertyCount);
1354 
1355    for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1356       const struct hk_queue_family *queue_family = &pdev->queue_families[i];
1357 
1358       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1359       {
1360          p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1361          p->queueFamilyProperties.queueCount = queue_family->queue_count;
1362          p->queueFamilyProperties.timestampValidBits = 0; // TODO 64;
1363          p->queueFamilyProperties.minImageTransferGranularity =
1364             (VkExtent3D){1, 1, 1};
1365 
1366          vk_foreach_struct(ext, p->pNext) {
1367             switch (ext->sType) {
1368             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1369                VkQueueFamilyGlobalPriorityPropertiesKHR *props = (void *)ext;
1370 
1371                /* TODO: support multiple priorities */
1372                props->priorityCount = 1;
1373                props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
1374                break;
1375             }
1376             default:
1377                break;
1378             }
1379          }
1380       }
1381    }
1382 }
1383 
1384 static const VkTimeDomainKHR hk_time_domains[] = {
1385    VK_TIME_DOMAIN_DEVICE_KHR,
1386    VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
1387 #ifdef CLOCK_MONOTONIC_RAW
1388    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
1389 #endif
1390 };
1391 
1392 VKAPI_ATTR VkResult VKAPI_CALL
hk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)1393 hk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,
1394                                                 uint32_t *pTimeDomainCount,
1395                                                 VkTimeDomainKHR *pTimeDomains)
1396 {
1397    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
1398 
1399    for (int d = 0; d < ARRAY_SIZE(hk_time_domains); d++) {
1400       vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
1401       {
1402          *i = hk_time_domains[d];
1403       }
1404    }
1405 
1406    return vk_outarray_status(&out);
1407 }
1408 
1409 VKAPI_ATTR void VKAPI_CALL
hk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1410 hk_GetPhysicalDeviceMultisamplePropertiesEXT(
1411    VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
1412    VkMultisamplePropertiesEXT *pMultisampleProperties)
1413 {
1414    VK_FROM_HANDLE(hk_physical_device, pdev, physicalDevice);
1415 
1416    if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1417       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1418    } else {
1419       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1420    }
1421 }
1422