xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/panvk_physical_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_device.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include <sys/sysinfo.h>
13 
14 #include "util/disk_cache.h"
15 #include "git_sha1.h"
16 
17 #include "vk_device.h"
18 #include "vk_drm_syncobj.h"
19 #include "vk_format.h"
20 #include "vk_limits.h"
21 #include "vk_log.h"
22 #include "vk_shader_module.h"
23 #include "vk_util.h"
24 
25 #include "panvk_device.h"
26 #include "panvk_entrypoints.h"
27 #include "panvk_instance.h"
28 #include "panvk_physical_device.h"
29 #include "panvk_wsi.h"
30 
31 #include "pan_format.h"
32 #include "pan_props.h"
33 
34 #include "genxml/gen_macros.h"
35 
36 #define ARM_VENDOR_ID        0x13b5
37 #define MAX_PUSH_DESCRIPTORS 32
38 /* We reserve one ubo for push constant, one for sysvals and one per-set for the
39  * descriptor metadata  */
40 #define RESERVED_UBO_COUNT                   6
41 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
42 #define MAX_INLINE_UNIFORM_BLOCK_SIZE        (1 << 16)
43 
44 static int
get_cache_uuid(uint16_t family,void * uuid)45 get_cache_uuid(uint16_t family, void *uuid)
46 {
47    uint32_t mesa_timestamp;
48    uint16_t f = family;
49 
50    if (!disk_cache_get_function_timestamp(get_cache_uuid, &mesa_timestamp))
51       return -1;
52 
53    memset(uuid, 0, VK_UUID_SIZE);
54    memcpy(uuid, &mesa_timestamp, 4);
55    memcpy((char *)uuid + 4, &f, 2);
56    snprintf((char *)uuid + 6, VK_UUID_SIZE - 10, "pan");
57    return 0;
58 }
59 
60 static void
get_device_extensions(const struct panvk_physical_device * device,struct vk_device_extension_table * ext)61 get_device_extensions(const struct panvk_physical_device *device,
62                       struct vk_device_extension_table *ext)
63 {
64    *ext = (struct vk_device_extension_table){
65       .KHR_buffer_device_address = true,
66       .KHR_copy_commands2 = true,
67       .KHR_device_group = true,
68       .KHR_descriptor_update_template = true,
69       .KHR_driver_properties = true,
70       .KHR_maintenance3 = true,
71       .KHR_pipeline_executable_properties = true,
72       .KHR_pipeline_library = true,
73       .KHR_push_descriptor = true,
74       .KHR_sampler_mirror_clamp_to_edge = true,
75       .KHR_shader_expect_assume = true,
76       .KHR_storage_buffer_storage_class = true,
77 #ifdef PANVK_USE_WSI_PLATFORM
78       .KHR_swapchain = true,
79 #endif
80       .KHR_synchronization2 = true,
81       .KHR_variable_pointers = true,
82       .EXT_buffer_device_address = true,
83       .EXT_custom_border_color = true,
84       .EXT_graphics_pipeline_library = true,
85       .EXT_index_type_uint8 = true,
86       .EXT_pipeline_creation_cache_control = true,
87       .EXT_pipeline_creation_feedback = true,
88       .EXT_private_data = true,
89       .EXT_shader_module_identifier = true,
90       .EXT_vertex_attribute_divisor = true,
91       .GOOGLE_decorate_string = true,
92       .GOOGLE_hlsl_functionality1 = true,
93       .GOOGLE_user_type = true,
94    };
95 }
96 
97 static void
get_features(const struct panvk_physical_device * device,struct vk_features * features)98 get_features(const struct panvk_physical_device *device,
99              struct vk_features *features)
100 {
101    unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
102 
103    *features = (struct vk_features){
104       /* Vulkan 1.0 */
105       .robustBufferAccess = true,
106       .fullDrawIndexUint32 = true,
107       .independentBlend = true,
108       .logicOp = true,
109       .wideLines = true,
110       .largePoints = true,
111       .textureCompressionETC2 = true,
112       .textureCompressionASTC_LDR = true,
113       .samplerAnisotropy = true,
114       .shaderUniformBufferArrayDynamicIndexing = true,
115       .shaderSampledImageArrayDynamicIndexing = true,
116       .shaderStorageBufferArrayDynamicIndexing = true,
117       .shaderStorageImageArrayDynamicIndexing = true,
118 
119       /* Vulkan 1.1 */
120       .storageBuffer16BitAccess = false,
121       .uniformAndStorageBuffer16BitAccess = false,
122       .storagePushConstant16 = false,
123       .storageInputOutput16 = false,
124       .multiview = false,
125       .multiviewGeometryShader = false,
126       .multiviewTessellationShader = false,
127       .variablePointersStorageBuffer = true,
128       .variablePointers = true,
129       .protectedMemory = false,
130       .samplerYcbcrConversion = false,
131       .shaderDrawParameters = false,
132 
133       /* Vulkan 1.2 */
134       .samplerMirrorClampToEdge = true,
135       .drawIndirectCount = false,
136       .storageBuffer8BitAccess = false,
137       .uniformAndStorageBuffer8BitAccess = false,
138       .storagePushConstant8 = false,
139       .shaderBufferInt64Atomics = false,
140       .shaderSharedInt64Atomics = false,
141       .shaderFloat16 = false,
142       .shaderInt8 = false,
143 
144       .descriptorIndexing = false,
145       .shaderInputAttachmentArrayDynamicIndexing = false,
146       .shaderUniformTexelBufferArrayDynamicIndexing = false,
147       .shaderStorageTexelBufferArrayDynamicIndexing = false,
148       .shaderUniformBufferArrayNonUniformIndexing = false,
149       .shaderSampledImageArrayNonUniformIndexing = false,
150       .shaderStorageBufferArrayNonUniformIndexing = false,
151       .shaderStorageImageArrayNonUniformIndexing = false,
152       .shaderInputAttachmentArrayNonUniformIndexing = false,
153       .shaderUniformTexelBufferArrayNonUniformIndexing = false,
154       .shaderStorageTexelBufferArrayNonUniformIndexing = false,
155       .descriptorBindingUniformBufferUpdateAfterBind = false,
156       .descriptorBindingSampledImageUpdateAfterBind = false,
157       .descriptorBindingStorageImageUpdateAfterBind = false,
158       .descriptorBindingStorageBufferUpdateAfterBind = false,
159       .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
160       .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
161       .descriptorBindingUpdateUnusedWhilePending = false,
162       .descriptorBindingPartiallyBound = false,
163       .descriptorBindingVariableDescriptorCount = false,
164       .runtimeDescriptorArray = false,
165 
166       .samplerFilterMinmax = false,
167       .scalarBlockLayout = false,
168       .imagelessFramebuffer = false,
169       .uniformBufferStandardLayout = false,
170       .shaderSubgroupExtendedTypes = false,
171       .separateDepthStencilLayouts = false,
172       .hostQueryReset = false,
173       .timelineSemaphore = false,
174       .bufferDeviceAddress = true,
175       .bufferDeviceAddressCaptureReplay = false,
176       .bufferDeviceAddressMultiDevice = false,
177       .vulkanMemoryModel = false,
178       .vulkanMemoryModelDeviceScope = false,
179       .vulkanMemoryModelAvailabilityVisibilityChains = false,
180       .shaderOutputViewportIndex = false,
181       .shaderOutputLayer = false,
182       .subgroupBroadcastDynamicId = false,
183 
184       /* Vulkan 1.3 */
185       .robustImageAccess = false,
186       .inlineUniformBlock = false,
187       .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
188       .pipelineCreationCacheControl = true,
189       .privateData = true,
190       .shaderDemoteToHelperInvocation = false,
191       .shaderTerminateInvocation = false,
192       .subgroupSizeControl = false,
193       .computeFullSubgroups = false,
194       .synchronization2 = true,
195       .textureCompressionASTC_HDR = false,
196       .shaderZeroInitializeWorkgroupMemory = false,
197       .dynamicRendering = false,
198       .shaderIntegerDotProduct = false,
199       .maintenance4 = false,
200 
201       /* VK_EXT_graphics_pipeline_library */
202       .graphicsPipelineLibrary = true,
203 
204       /* VK_EXT_index_type_uint8 */
205       .indexTypeUint8 = true,
206 
207       /* VK_EXT_vertex_attribute_divisor */
208       .vertexAttributeInstanceRateDivisor = true,
209       .vertexAttributeInstanceRateZeroDivisor = true,
210 
211       /* VK_EXT_depth_clip_enable */
212       .depthClipEnable = true,
213 
214       /* VK_EXT_4444_formats */
215       .formatA4R4G4B4 = true,
216       .formatA4B4G4R4 = true,
217 
218       /* VK_EXT_custom_border_color */
219       .customBorderColors = true,
220 
221       /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to
222        * make it work, which forces us to apply the same swizzle on the border
223        * color, meaning we need to know the format when preparing the border
224        * color.
225        */
226       .customBorderColorWithoutFormat = arch != 7,
227 
228       /* VK_KHR_pipeline_executable_properties */
229       .pipelineExecutableInfo = true,
230 
231       /* VK_KHR_shader_expect_assume */
232       .shaderExpectAssume = true,
233 
234       /* VK_EXT_shader_module_identifier */
235       .shaderModuleIdentifier = true,
236    };
237 }
238 
239 static void
get_device_properties(const struct panvk_instance * instance,const struct panvk_physical_device * device,struct vk_properties * properties)240 get_device_properties(const struct panvk_instance *instance,
241                       const struct panvk_physical_device *device,
242                       struct vk_properties *properties)
243 {
244    /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
245    VkSampleCountFlags sample_counts =
246       VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
247 
248    uint64_t os_page_size = 4096;
249    os_get_page_size(&os_page_size);
250 
251    unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
252 
253    /* Ensure that the max threads count per workgroup is valid for Bifrost */
254    assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
255 
256    *properties = (struct vk_properties){
257       .apiVersion = panvk_get_vk_version(),
258       .driverVersion = vk_get_driver_version(),
259       .vendorID = ARM_VENDOR_ID,
260 
261       /* Collect arch_major, arch_minor, arch_rev and product_major,
262        * as done by the Arm driver.
263        */
264       .deviceID = device->kmod.props.gpu_prod_id << 16,
265       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
266 
267       /* Vulkan 1.0 limits */
268       /* Maximum texture dimension is 2^16. */
269       .maxImageDimension1D = (1 << 16),
270       .maxImageDimension2D = (1 << 16),
271       .maxImageDimension3D = (1 << 16),
272       .maxImageDimensionCube = (1 << 16),
273       .maxImageArrayLayers = (1 << 16),
274       /* Currently limited by the 1D texture size, which is 2^16.
275        * TODO: If we expose buffer views as 2D textures, we can increase the
276        * limit.
277        */
278       .maxTexelBufferElements = (1 << 16),
279       /* Each uniform entry is 16-byte and the number of entries is encoded in a
280        * 12-bit field, with the minus(1) modifier, which gives 2^20.
281        */
282       .maxUniformBufferRange = 1 << 20,
283       /* Storage buffer access is lowered to globals, so there's no limit here,
284        * except for the SW-descriptor we use to encode storage buffer
285        * descriptors, where the size is a 32-bit field.
286        */
287       .maxStorageBufferRange = UINT32_MAX,
288       /* 128 bytes of push constants, so we're aligned with the minimum Vulkan
289        * requirements.
290        */
291       .maxPushConstantsSize = 128,
292       /* There's no HW limit here. Should we advertize something smaller? */
293       .maxMemoryAllocationCount = UINT32_MAX,
294       /* Again, no hardware limit, but most drivers seem to advertive 64k. */
295       .maxSamplerAllocationCount = 64 * 1024,
296       /* A cache line. */
297       .bufferImageGranularity = 64,
298       /* Sparse binding not supported yet. */
299       .sparseAddressSpaceSize = 0,
300       /* On Bifrost, this is a software limit. We pick the minimum required by
301        * Vulkan, because Bifrost GPUs don't have unified descriptor tables,
302        * which forces us to agregatte all descriptors from all sets and dispatch
303        * them to per-type descriptor tables emitted at draw/dispatch time. The
304        * more sets we support the more copies we are likely to have to do at
305        * draw time.
306        *
307        * Valhall has native support for descriptor sets, and allows a maximum
308        * of 16 sets, but we reserve one for our internal use, so we have 15
309        * left.
310        */
311       .maxBoundDescriptorSets = arch <= 7 ? 4 : 15,
312       /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
313       .maxDescriptorSetSamplers = UINT16_MAX,
314       /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
315        * for our internal UBOs.
316        */
317       .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
318       .maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
319       /* SSBOs are limited by the size of a uniform buffer which contains our
320        * panvk_ssbo_desc objects.
321        * panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
322        * 16-byte too. The number of entries is encoded in a 12-bit field, with
323        * a minus(1) modifier, which gives a maximum of 2^12 SSBO
324        * descriptors.
325        */
326       .maxDescriptorSetStorageBuffers = 1 << 12,
327       /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
328       .maxDescriptorSetSampledImages = UINT16_MAX,
329       /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
330        * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
331        */
332       .maxDescriptorSetStorageImages = 1 << 8,
333       /* A maximum of 8 color render targets, and one depth-stencil render
334        * target.
335        */
336       .maxDescriptorSetInputAttachments = 9,
337 
338       /* We could theoretically use the maxDescriptor values here (except for
339        * UBOs where we're really limited to 256 on the shader side), but on
340        * Bifrost we have to copy some tables around, which comes at an extra
341        * memory/processing cost, so let's pick something smaller.
342        */
343       .maxPerStageDescriptorInputAttachments = 9,
344       .maxPerStageDescriptorSampledImages = 256,
345       .maxPerStageDescriptorSamplers = 128,
346       .maxPerStageDescriptorStorageBuffers = 64,
347       .maxPerStageDescriptorStorageImages = 32,
348       .maxPerStageDescriptorUniformBuffers = 64,
349       .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64,
350 
351       /* Software limits to keep VkCommandBuffer tracking sane. */
352       .maxDescriptorSetUniformBuffersDynamic = 16,
353       .maxDescriptorSetStorageBuffersDynamic = 8,
354       /* Software limit to keep VkCommandBuffer tracking sane. The HW supports
355        * up to 2^9 vertex attributes.
356        */
357       .maxVertexInputAttributes = 16,
358       .maxVertexInputBindings = 16,
359       /* MALI_ATTRIBUTE::offset is 32-bit. */
360       .maxVertexInputAttributeOffset = UINT32_MAX,
361       /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
362       .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
363       /* 32 vec4 varyings. */
364       .maxVertexOutputComponents = 128,
365       /* Tesselation shaders not supported. */
366       .maxTessellationGenerationLevel = 0,
367       .maxTessellationPatchSize = 0,
368       .maxTessellationControlPerVertexInputComponents = 0,
369       .maxTessellationControlPerVertexOutputComponents = 0,
370       .maxTessellationControlPerPatchOutputComponents = 0,
371       .maxTessellationControlTotalOutputComponents = 0,
372       .maxTessellationEvaluationInputComponents = 0,
373       .maxTessellationEvaluationOutputComponents = 0,
374       /* Geometry shaders not supported. */
375       .maxGeometryShaderInvocations = 0,
376       .maxGeometryInputComponents = 0,
377       .maxGeometryOutputComponents = 0,
378       .maxGeometryOutputVertices = 0,
379       .maxGeometryTotalOutputComponents = 0,
380       /* 32 vec4 varyings. */
381       .maxFragmentInputComponents = 128,
382       /* 8 render targets. */
383       .maxFragmentOutputAttachments = 8,
384       /* We don't support dual source blending yet. */
385       .maxFragmentDualSrcAttachments = 0,
386       /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
387        * above).
388        */
389       .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
390       /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
391        * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
392        * really make sense to expose this amount of memory, especially since
393        * it's backed by global memory anyway.
394        */
395       .maxComputeSharedMemorySize = 32768,
396       /* Software limit to meet Vulkan 1.0 requirements. We split the
397        * dispatch in several jobs if it's too big.
398        */
399       .maxComputeWorkGroupCount = {65535, 65535, 65535},
400 
401       /* We could also split into serveral jobs but this has many limitations.
402        * As such we limit to the max threads per workgroup supported by the GPU.
403        */
404       .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
405       .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
406                                   device->kmod.props.max_threads_per_wg,
407                                   device->kmod.props.max_threads_per_wg},
408       /* 8-bit subpixel precision. */
409       .subPixelPrecisionBits = 8,
410       .subTexelPrecisionBits = 8,
411       .mipmapPrecisionBits = 8,
412       /* Software limit. */
413       .maxDrawIndexedIndexValue = UINT32_MAX,
414       /* Make it one for now. */
415       .maxDrawIndirectCount = 1,
416       .maxSamplerLodBias = 255,
417       .maxSamplerAnisotropy = 16,
418       .maxViewports = 1,
419       /* Same as the framebuffer limit. */
420       .maxViewportDimensions = {(1 << 14), (1 << 14)},
421       /* Encoded in a 16-bit signed integer. */
422       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
423       .viewportSubPixelBits = 0,
424       /* Align on a page. */
425       .minMemoryMapAlignment = os_page_size,
426       /* Some compressed texture formats require 128-byte alignment. */
427       .minTexelBufferOffsetAlignment = 64,
428       /* Always aligned on a uniform slot (vec4). */
429       .minUniformBufferOffsetAlignment = 16,
430       /* Lowered to global accesses, which happen at the 32-bit granularity. */
431       .minStorageBufferOffsetAlignment = 4,
432       /* Signed 4-bit value. */
433       .minTexelOffset = -8,
434       .maxTexelOffset = 7,
435       .minTexelGatherOffset = -8,
436       .maxTexelGatherOffset = 7,
437       .minInterpolationOffset = -0.5,
438       .maxInterpolationOffset = 0.5,
439       .subPixelInterpolationOffsetBits = 8,
440       .maxFramebufferWidth = (1 << 14),
441       .maxFramebufferHeight = (1 << 14),
442       .maxFramebufferLayers = 256,
443       .framebufferColorSampleCounts = sample_counts,
444       .framebufferDepthSampleCounts = sample_counts,
445       .framebufferStencilSampleCounts = sample_counts,
446       .framebufferNoAttachmentsSampleCounts = sample_counts,
447       .maxColorAttachments = 8,
448       .sampledImageColorSampleCounts = sample_counts,
449       .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
450       .sampledImageDepthSampleCounts = sample_counts,
451       .sampledImageStencilSampleCounts = sample_counts,
452       .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
453       .maxSampleMaskWords = 1,
454       .timestampComputeAndGraphics = false,
455       .timestampPeriod = 0,
456       .maxClipDistances = 0,
457       .maxCullDistances = 0,
458       .maxCombinedClipAndCullDistances = 0,
459       .discreteQueuePriorities = 2,
460       .pointSizeRange = {0.125, 4095.9375},
461       .lineWidthRange = {0.0, 7.9921875},
462       .pointSizeGranularity = (1.0 / 16.0),
463       .lineWidthGranularity = (1.0 / 128.0),
464       .strictLines = false,
465       .standardSampleLocations = true,
466       .optimalBufferCopyOffsetAlignment = 64,
467       .optimalBufferCopyRowPitchAlignment = 64,
468       .nonCoherentAtomSize = 64,
469 
470       /* Vulkan 1.0 sparse properties */
471       .sparseResidencyNonResidentStrict = false,
472       .sparseResidencyAlignedMipSize = false,
473       .sparseResidencyStandard2DBlockShape = false,
474       .sparseResidencyStandard2DMultisampleBlockShape = false,
475       .sparseResidencyStandard3DBlockShape = false,
476 
477       /* Vulkan 1.1 properties */
478       /* XXX: 1.1 support */
479       .subgroupSize = 8,
480       .subgroupSupportedStages = VK_SHADER_STAGE_ALL,
481       .subgroupSupportedOperations =
482          VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
483          VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
484          VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
485          VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
486          VK_SUBGROUP_FEATURE_VOTE_BIT,
487       .subgroupQuadOperationsInAllStages = false,
488       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
489       .maxMultiviewViewCount = 0,
490       .maxMultiviewInstanceIndex = 0,
491       .protectedNoFault = false,
492       .maxPerSetDescriptors = UINT16_MAX,
493       /* Our buffer size fields allow only this much */
494       .maxMemoryAllocationSize = UINT32_MAX,
495 
496       /* Vulkan 1.2 properties */
497       /* XXX: 1.2 support */
498       /* XXX: VK_KHR_depth_stencil_resolve */
499       .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
500       .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
501       .independentResolveNone = true,
502       .independentResolve = true,
503       /* VK_KHR_driver_properties */
504       .driverID = VK_DRIVER_ID_MESA_PANVK,
505       .conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
506       /* XXX: VK_KHR_shader_float_controls */
507       .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
508       .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
509       .shaderSignedZeroInfNanPreserveFloat16 = true,
510       .shaderSignedZeroInfNanPreserveFloat32 = true,
511       .shaderSignedZeroInfNanPreserveFloat64 = false,
512       .shaderDenormPreserveFloat16 = true,
513       .shaderDenormPreserveFloat32 = true,
514       .shaderDenormPreserveFloat64 = false,
515       .shaderDenormFlushToZeroFloat16 = true,
516       .shaderDenormFlushToZeroFloat32 = true,
517       .shaderDenormFlushToZeroFloat64 = false,
518       .shaderRoundingModeRTEFloat16 = true,
519       .shaderRoundingModeRTEFloat32 = true,
520       .shaderRoundingModeRTEFloat64 = false,
521       .shaderRoundingModeRTZFloat16 = true,
522       .shaderRoundingModeRTZFloat32 = true,
523       .shaderRoundingModeRTZFloat64 = false,
524       /* XXX: VK_EXT_descriptor_indexing */
525       .maxUpdateAfterBindDescriptorsInAllPools = 0,
526       .shaderUniformBufferArrayNonUniformIndexingNative = false,
527       .shaderSampledImageArrayNonUniformIndexingNative = false,
528       .shaderStorageBufferArrayNonUniformIndexingNative = false,
529       .shaderStorageImageArrayNonUniformIndexingNative = false,
530       .shaderInputAttachmentArrayNonUniformIndexingNative = false,
531       .robustBufferAccessUpdateAfterBind = false,
532       .quadDivergentImplicitLod = false,
533       .maxPerStageDescriptorUpdateAfterBindSamplers = 0,
534       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
535       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
536       .maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
537       .maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
538       .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
539       .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
540       .maxPerStageUpdateAfterBindResources = 0,
541       .maxDescriptorSetUpdateAfterBindSamplers = 0,
542       .maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
543       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
544       .maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
545       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
546       .maxDescriptorSetUpdateAfterBindSampledImages = 0,
547       .maxDescriptorSetUpdateAfterBindStorageImages = 0,
548       .maxDescriptorSetUpdateAfterBindInputAttachments = 0,
549       /* XXX: VK_EXT_sampler_filter_minmax */
550       .filterMinmaxSingleComponentFormats = false,
551       .filterMinmaxImageComponentMapping = false,
552       /* XXX: VK_KHR_timeline_semaphore */
553       .maxTimelineSemaphoreValueDifference = INT64_MAX,
554       .framebufferIntegerColorSampleCounts = sample_counts,
555 
556       /* Vulkan 1.3 properties */
557       /* XXX: 1.3 support */
558       /* XXX: VK_EXT_subgroup_size_control */
559       .minSubgroupSize = 8,
560       .maxSubgroupSize = 8,
561       .maxComputeWorkgroupSubgroups = 48,
562       .requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL,
563       /* XXX: VK_EXT_inline_uniform_block */
564       .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
565       .maxPerStageDescriptorInlineUniformBlocks =
566          MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
567       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
568          MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
569       .maxDescriptorSetInlineUniformBlocks =
570          MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
571       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
572          MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
573       .maxInlineUniformTotalSize =
574          MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
575       /* XXX: VK_KHR_shader_integer_dot_product */
576       .integerDotProduct8BitUnsignedAccelerated = true,
577       .integerDotProduct8BitSignedAccelerated = true,
578       .integerDotProduct4x8BitPackedUnsignedAccelerated = true,
579       .integerDotProduct4x8BitPackedSignedAccelerated = true,
580       /* XXX: VK_EXT_texel_buffer_alignment */
581       .storageTexelBufferOffsetAlignmentBytes = 64,
582       .storageTexelBufferOffsetSingleTexelAlignment = false,
583       .uniformTexelBufferOffsetAlignmentBytes = 4,
584       .uniformTexelBufferOffsetSingleTexelAlignment = true,
585       /* XXX: VK_KHR_maintenance4 */
586       .maxBufferSize = 1 << 30,
587 
588       /* VK_EXT_custom_border_color */
589       .maxCustomBorderColorSamplers = 32768,
590 
591       /* VK_EXT_graphics_pipeline_library */
592       .graphicsPipelineLibraryFastLinking = true,
593       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
594 
595       /* VK_KHR_vertex_attribute_divisor */
596       /* We will have to restrict this a bit for multiview */
597       .maxVertexAttribDivisor = UINT32_MAX,
598       .supportsNonZeroFirstInstance = false,
599 
600       /* VK_KHR_push_descriptor */
601       .maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
602    };
603 
604    snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
605             device->name);
606 
607    memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
608 
609    const struct {
610       uint16_t vendor_id;
611       uint32_t device_id;
612       uint8_t pad[8];
613    } dev_uuid = {
614       .vendor_id = ARM_VENDOR_ID,
615       .device_id = device->model->gpu_id,
616    };
617 
618    STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
619    memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
620    STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
621    memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
622 
623    snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
624    snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
625             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
626 
627    /* VK_EXT_shader_module_identifier */
628    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
629                  sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
630    memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
631           vk_shaderModuleIdentifierAlgorithmUUID,
632           sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
633 }
634 
635 void
panvk_physical_device_finish(struct panvk_physical_device * device)636 panvk_physical_device_finish(struct panvk_physical_device *device)
637 {
638    panvk_wsi_finish(device);
639 
640    pan_kmod_dev_destroy(device->kmod.dev);
641    if (device->master_fd != -1)
642       close(device->master_fd);
643 
644    vk_physical_device_finish(&device->vk);
645 }
646 
647 VkResult
panvk_physical_device_init(struct panvk_physical_device * device,struct panvk_instance * instance,drmDevicePtr drm_device)648 panvk_physical_device_init(struct panvk_physical_device *device,
649                            struct panvk_instance *instance,
650                            drmDevicePtr drm_device)
651 {
652    const char *path = drm_device->nodes[DRM_NODE_RENDER];
653    VkResult result = VK_SUCCESS;
654    drmVersionPtr version;
655    int fd;
656    int master_fd = -1;
657 
658    fd = open(path, O_RDWR | O_CLOEXEC);
659    if (fd < 0) {
660       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
661                        "failed to open device %s", path);
662    }
663 
664    version = drmGetVersion(fd);
665    if (!version) {
666       close(fd);
667       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
668                        "failed to query kernel driver version for device %s",
669                        path);
670    }
671 
672    if (strcmp(version->name, "panfrost") && strcmp(version->name, "panthor")) {
673       drmFreeVersion(version);
674       close(fd);
675       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
676                        "device %s does not use the panfrost kernel driver",
677                        path);
678    }
679 
680    drmFreeVersion(version);
681 
682    if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
683       close(fd);
684       return vk_errorf(
685          instance, VK_ERROR_INCOMPATIBLE_DRIVER,
686          "WARNING: panvk is not a conformant vulkan implementation, "
687          "pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know what you're doing.");
688    }
689 
690    if (instance->debug_flags & PANVK_DEBUG_STARTUP)
691       vk_logi(VK_LOG_NO_OBJS(instance), "Found compatible device '%s'.", path);
692 
693    device->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD,
694                                           &instance->kmod.allocator);
695 
696    if (!device->kmod.dev) {
697       result = vk_errorf(instance, panvk_errno_to_vk_error(), "cannot create device");
698       goto fail;
699    }
700 
701    pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
702 
703    device->model = panfrost_get_model(device->kmod.props.gpu_prod_id,
704                                       device->kmod.props.gpu_variant);
705 
706    unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
707 
708    switch (arch) {
709    case 6:
710    case 7:
711    case 10:
712       break;
713 
714    default:
715       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
716                          "%s not supported", device->model->name);
717       goto fail;
718    }
719 
720    if (instance->vk.enabled_extensions.KHR_display) {
721       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
722       if (master_fd >= 0) {
723          /* TODO: free master_fd is accel is not working? */
724       }
725    }
726 
727    device->master_fd = master_fd;
728 
729    device->formats.all = panfrost_format_table(arch);
730    device->formats.blendable = panfrost_blendable_format_table(arch);
731 
732    memset(device->name, 0, sizeof(device->name));
733    sprintf(device->name, "%s", device->model->name);
734 
735    if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) {
736       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
737                          "cannot generate UUID");
738       goto fail;
739    }
740 
741    vk_warn_non_conformant_implementation("panvk");
742 
743    device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
744    /* We don't support timelines in the uAPI yet and we don't want it getting
745     * suddenly turned on by vk_drm_syncobj_get_type() without us adding panvk
746     * code for it first.
747     */
748    device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
749 
750    struct vk_device_extension_table supported_extensions;
751    get_device_extensions(device, &supported_extensions);
752 
753    struct vk_features supported_features;
754    get_features(device, &supported_features);
755 
756    struct vk_properties properties;
757    get_device_properties(instance, device, &properties);
758 
759    struct vk_physical_device_dispatch_table dispatch_table;
760    vk_physical_device_dispatch_table_from_entrypoints(
761       &dispatch_table, &panvk_physical_device_entrypoints, true);
762    vk_physical_device_dispatch_table_from_entrypoints(
763       &dispatch_table, &wsi_physical_device_entrypoints, false);
764 
765    result = vk_physical_device_init(&device->vk, &instance->vk,
766                                     &supported_extensions, &supported_features,
767                                     &properties, &dispatch_table);
768 
769    if (result != VK_SUCCESS) {
770       vk_error(instance, result);
771       goto fail;
772    }
773 
774    device->sync_types[0] = &device->drm_syncobj_type;
775    device->sync_types[1] = NULL;
776    device->vk.supported_sync_types = device->sync_types;
777 
778    result = panvk_wsi_init(device);
779    if (result != VK_SUCCESS) {
780       vk_error(instance, result);
781       goto fail;
782    }
783 
784    return VK_SUCCESS;
785 
786 fail:
787    if (device->vk.instance)
788       vk_physical_device_finish(&device->vk);
789 
790    if (device->kmod.dev)
791       pan_kmod_dev_destroy(device->kmod.dev);
792 
793    if (fd != -1)
794       close(fd);
795    if (master_fd != -1)
796       close(master_fd);
797    return result;
798 }
799 
800 static const VkQueueFamilyProperties panvk_queue_family_properties = {
801    .queueFlags =
802       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
803    .queueCount = 1,
804    .timestampValidBits = 0,
805    .minImageTransferGranularity = {1, 1, 1},
806 };
807 
808 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)809 panvk_GetPhysicalDeviceQueueFamilyProperties2(
810    VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
811    VkQueueFamilyProperties2 *pQueueFamilyProperties)
812 {
813    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
814                           pQueueFamilyPropertyCount);
815 
816    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
817    {
818       p->queueFamilyProperties = panvk_queue_family_properties;
819    }
820 }
821 
822 static uint64_t
get_system_heap_size()823 get_system_heap_size()
824 {
825    struct sysinfo info;
826    sysinfo(&info);
827 
828    uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit;
829 
830    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
831     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
832     */
833    uint64_t available_ram;
834    if (total_ram <= 4ull * 1024 * 1024 * 1024)
835       available_ram = total_ram / 2;
836    else
837       available_ram = total_ram * 3 / 4;
838 
839    return available_ram;
840 }
841 
842 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)843 panvk_GetPhysicalDeviceMemoryProperties2(
844    VkPhysicalDevice physicalDevice,
845    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
846 {
847    pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties){
848       .memoryHeapCount = 1,
849       .memoryHeaps[0].size = get_system_heap_size(),
850       .memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
851       .memoryTypeCount = 1,
852       .memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
853                                       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
854                                       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
855       .memoryTypes[0].heapIndex = 0,
856    };
857 }
858 
859 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)860 panvk_GetPhysicalDeviceExternalSemaphoreProperties(
861    VkPhysicalDevice physicalDevice,
862    const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
863    VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
864 {
865    if ((pExternalSemaphoreInfo->handleType ==
866            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
867         pExternalSemaphoreInfo->handleType ==
868            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
869       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
870          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
871          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
872       pExternalSemaphoreProperties->compatibleHandleTypes =
873          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
874          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
875       pExternalSemaphoreProperties->externalSemaphoreFeatures =
876          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
877          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
878    } else {
879       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
880       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
881       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
882    }
883 }
884 
885 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)886 panvk_GetPhysicalDeviceExternalFenceProperties(
887    VkPhysicalDevice physicalDevice,
888    const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
889    VkExternalFenceProperties *pExternalFenceProperties)
890 {
891    pExternalFenceProperties->exportFromImportedHandleTypes = 0;
892    pExternalFenceProperties->compatibleHandleTypes = 0;
893    pExternalFenceProperties->externalFenceFeatures = 0;
894 }
895 
896 #define DEVICE_PER_ARCH_FUNCS(_ver)                                            \
897    VkResult panvk_v##_ver##_create_device(                                     \
898       struct panvk_physical_device *physical_device,                           \
899       const VkDeviceCreateInfo *pCreateInfo,                                   \
900       const VkAllocationCallbacks *pAllocator, VkDevice *pDevice);             \
901                                                                                \
902    void panvk_v##_ver##_destroy_device(                                        \
903       struct panvk_device *device, const VkAllocationCallbacks *pAllocator)
904 
905 DEVICE_PER_ARCH_FUNCS(6);
906 DEVICE_PER_ARCH_FUNCS(7);
907 DEVICE_PER_ARCH_FUNCS(10);
908 
909 VKAPI_ATTR VkResult VKAPI_CALL
panvk_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)910 panvk_CreateDevice(VkPhysicalDevice physicalDevice,
911                    const VkDeviceCreateInfo *pCreateInfo,
912                    const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
913 {
914    VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
915    unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
916    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
917 
918    panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
919                            pCreateInfo, pAllocator, pDevice);
920 
921    return result;
922 }
923 
924 VKAPI_ATTR void VKAPI_CALL
panvk_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)925 panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
926 {
927    VK_FROM_HANDLE(panvk_device, device, _device);
928    struct panvk_physical_device *physical_device =
929       to_panvk_physical_device(device->vk.physical);
930    unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
931 
932    panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
933 }
934 
935 static bool
format_is_supported(struct panvk_physical_device * physical_device,const struct panfrost_format fmt)936 format_is_supported(struct panvk_physical_device *physical_device,
937                     const struct panfrost_format fmt)
938 {
939    /* If the format ID is zero, it's not supported. */
940    if (!fmt.hw)
941       return false;
942 
943    /* Compressed formats (ID < 32) are optional. We need to check against
944     * the supported formats reported by the GPU. */
945    unsigned idx = MALI_EXTRACT_INDEX(fmt.hw);
946    if (MALI_EXTRACT_TYPE(idx) == MALI_FORMAT_COMPRESSED) {
947       uint32_t supported_compr_fmts =
948          panfrost_query_compressed_formats(&physical_device->kmod.props);
949 
950       assert(idx < 32);
951 
952       if (!(BITFIELD_BIT(idx) & supported_compr_fmts))
953          return false;
954    }
955 
956    return true;
957 }
958 
959 static void
get_format_properties(struct panvk_physical_device * physical_device,VkFormat format,VkFormatProperties * out_properties)960 get_format_properties(struct panvk_physical_device *physical_device,
961                       VkFormat format, VkFormatProperties *out_properties)
962 {
963    VkFormatFeatureFlags tex = 0, buffer = 0;
964    enum pipe_format pfmt = vk_format_to_pipe_format(format);
965    unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
966 
967    /* FIXME: Valhall doesn't support interleaved D32_S8X24. Implement it as
968     * a multi-plane format, and we probably want to switch Bifrost to this
969     * layout too, since:
970     * - it's more cache-friendly (you load more samples on a cache-line if you don't
971     *   have those 24 dummy bits)
972     * - it takes less memory (you don't lose those 24bits per texel)
973     * - we can use AFBC
974     */
975    if (arch >= 9 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
976       goto end;
977 
978    if (pfmt == PIPE_FORMAT_NONE)
979       goto end;
980 
981    const struct panfrost_format fmt = physical_device->formats.all[pfmt];
982 
983    if (!format_is_supported(physical_device, fmt))
984       goto end;
985 
986    /* 3byte formats are not supported by the buffer <-> image copy helpers. */
987    if (util_format_get_blocksize(pfmt) == 3)
988       goto end;
989 
990    /* Reject sRGB formats (see
991     * https://github.com/KhronosGroup/Vulkan-Docs/issues/2214).
992     */
993    if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt))
994       buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
995 
996    if (fmt.bind & PAN_BIND_SAMPLER_VIEW) {
997       tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
998              VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
999              VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
1000              VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
1001              VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
1002 
1003       /* Integer formats only support nearest filtering */
1004       if (!util_format_is_scaled(pfmt) && !util_format_is_pure_integer(pfmt))
1005          tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
1006 
1007       if (!util_format_is_depth_or_stencil(pfmt))
1008          buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
1009 
1010       tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT;
1011    }
1012 
1013    if (fmt.bind & PAN_BIND_RENDER_TARGET) {
1014       tex |= VK_FORMAT_FEATURE_BLIT_DST_BIT;
1015       tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
1016 
1017       /* SNORM rendering isn't working yet (nir_lower_blend bugs), disable for
1018        * now.
1019        *
1020        * XXX: Enable once fixed.
1021        */
1022       if (!util_format_is_snorm(pfmt)) {
1023          tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
1024          tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
1025       }
1026 
1027       if (!util_format_is_depth_and_stencil(pfmt))
1028          buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
1029    }
1030 
1031    if (pfmt == PIPE_FORMAT_R32_UINT || pfmt == PIPE_FORMAT_R32_SINT) {
1032       buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
1033       tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
1034    }
1035 
1036    if (fmt.bind & PAN_BIND_DEPTH_STENCIL)
1037       tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
1038 
1039 end:
1040    out_properties->linearTilingFeatures = tex;
1041    out_properties->optimalTilingFeatures = tex;
1042    out_properties->bufferFeatures = buffer;
1043 }
1044 
1045 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties * pFormatProperties)1046 panvk_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,
1047                                         VkFormat format,
1048                                         VkFormatProperties *pFormatProperties)
1049 {
1050    VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1051 
1052    get_format_properties(physical_device, format, pFormatProperties);
1053 }
1054 
1055 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties2 * pFormatProperties)1056 panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
1057                                          VkFormat format,
1058                                          VkFormatProperties2 *pFormatProperties)
1059 {
1060    VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1061 
1062    get_format_properties(physical_device, format,
1063                          &pFormatProperties->formatProperties);
1064 
1065    VkDrmFormatModifierPropertiesListEXT *list = vk_find_struct(
1066       pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
1067    if (list) {
1068       VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
1069                              list->pDrmFormatModifierProperties,
1070                              &list->drmFormatModifierCount);
1071 
1072       vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out,
1073                                mod_props)
1074       {
1075          mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
1076          mod_props->drmFormatModifierPlaneCount = 1;
1077       }
1078    }
1079 }
1080 
1081 static VkResult
get_image_format_properties(struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * info,VkImageFormatProperties * pImageFormatProperties,VkFormatFeatureFlags * p_feature_flags)1082 get_image_format_properties(struct panvk_physical_device *physical_device,
1083                             const VkPhysicalDeviceImageFormatInfo2 *info,
1084                             VkImageFormatProperties *pImageFormatProperties,
1085                             VkFormatFeatureFlags *p_feature_flags)
1086 {
1087    VkFormatProperties format_props;
1088    VkFormatFeatureFlags format_feature_flags;
1089    VkExtent3D maxExtent;
1090    uint32_t maxMipLevels;
1091    uint32_t maxArraySize;
1092    VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
1093    enum pipe_format format = vk_format_to_pipe_format(info->format);
1094 
1095    get_format_properties(physical_device, info->format, &format_props);
1096 
1097    switch (info->tiling) {
1098    case VK_IMAGE_TILING_LINEAR:
1099       format_feature_flags = format_props.linearTilingFeatures;
1100       break;
1101 
1102    case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
1103       /* The only difference between optimal and linear is currently whether
1104        * depth/stencil attachments are allowed on depth/stencil formats.
1105        * There's no reason to allow importing depth/stencil textures, so just
1106        * disallow it and then this annoying edge case goes away.
1107        *
1108        * TODO: If anyone cares, we could enable this by looking at the
1109        * modifier and checking if it's LINEAR or not.
1110        */
1111       if (util_format_is_depth_or_stencil(format))
1112          goto unsupported;
1113 
1114       assert(format_props.optimalTilingFeatures ==
1115              format_props.linearTilingFeatures);
1116       FALLTHROUGH;
1117    case VK_IMAGE_TILING_OPTIMAL:
1118       format_feature_flags = format_props.optimalTilingFeatures;
1119       break;
1120    default:
1121       unreachable("bad VkPhysicalDeviceImageFormatInfo2");
1122    }
1123 
1124    if (format_feature_flags == 0)
1125       goto unsupported;
1126 
1127    switch (info->type) {
1128    default:
1129       unreachable("bad vkimage type");
1130    case VK_IMAGE_TYPE_1D:
1131       maxExtent.width = 16384;
1132       maxExtent.height = 1;
1133       maxExtent.depth = 1;
1134       maxMipLevels = 15; /* log2(maxWidth) + 1 */
1135       maxArraySize = 2048;
1136       break;
1137    case VK_IMAGE_TYPE_2D:
1138       maxExtent.width = 16384;
1139       maxExtent.height = 16384;
1140       maxExtent.depth = 1;
1141       maxMipLevels = 15; /* log2(maxWidth) + 1 */
1142       maxArraySize = 2048;
1143       break;
1144    case VK_IMAGE_TYPE_3D:
1145       maxExtent.width = 2048;
1146       maxExtent.height = 2048;
1147       maxExtent.depth = 2048;
1148       maxMipLevels = 12; /* log2(maxWidth) + 1 */
1149       maxArraySize = 1;
1150       break;
1151    }
1152 
1153    if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
1154        info->type == VK_IMAGE_TYPE_2D &&
1155        (format_feature_flags &
1156         (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
1157          VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1158        !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
1159        !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1160       sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
1161    }
1162 
1163    if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
1164       if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
1165          goto unsupported;
1166       }
1167    }
1168 
1169    if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
1170       if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
1171          goto unsupported;
1172       }
1173    }
1174 
1175    if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
1176       if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
1177          goto unsupported;
1178       }
1179    }
1180 
1181    if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
1182       if (!(format_feature_flags &
1183             VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
1184          goto unsupported;
1185       }
1186    }
1187 
1188    *pImageFormatProperties = (VkImageFormatProperties){
1189       .maxExtent = maxExtent,
1190       .maxMipLevels = maxMipLevels,
1191       .maxArrayLayers = maxArraySize,
1192       .sampleCounts = sampleCounts,
1193 
1194       /* FINISHME: Accurately calculate
1195        * VkImageFormatProperties::maxResourceSize.
1196        */
1197       .maxResourceSize = UINT32_MAX,
1198    };
1199 
1200    if (p_feature_flags)
1201       *p_feature_flags = format_feature_flags;
1202 
1203    return VK_SUCCESS;
1204 unsupported:
1205    *pImageFormatProperties = (VkImageFormatProperties){
1206       .maxExtent = {0, 0, 0},
1207       .maxMipLevels = 0,
1208       .maxArrayLayers = 0,
1209       .sampleCounts = 0,
1210       .maxResourceSize = 0,
1211    };
1212 
1213    return VK_ERROR_FORMAT_NOT_SUPPORTED;
1214 }
1215 
1216 VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkImageTiling tiling,VkImageUsageFlags usage,VkImageCreateFlags createFlags,VkImageFormatProperties * pImageFormatProperties)1217 panvk_GetPhysicalDeviceImageFormatProperties(
1218    VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
1219    VkImageTiling tiling, VkImageUsageFlags usage,
1220    VkImageCreateFlags createFlags,
1221    VkImageFormatProperties *pImageFormatProperties)
1222 {
1223    VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1224 
1225    const VkPhysicalDeviceImageFormatInfo2 info = {
1226       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1227       .pNext = NULL,
1228       .format = format,
1229       .type = type,
1230       .tiling = tiling,
1231       .usage = usage,
1232       .flags = createFlags,
1233    };
1234 
1235    return get_image_format_properties(physical_device, &info,
1236                                       pImageFormatProperties, NULL);
1237 }
1238 
1239 static VkResult
panvk_get_external_image_format_properties(const struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * pImageFormatInfo,VkExternalMemoryHandleTypeFlagBits handleType,VkExternalMemoryProperties * external_properties)1240 panvk_get_external_image_format_properties(
1241    const struct panvk_physical_device *physical_device,
1242    const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
1243    VkExternalMemoryHandleTypeFlagBits handleType,
1244    VkExternalMemoryProperties *external_properties)
1245 {
1246    VkExternalMemoryFeatureFlagBits flags = 0;
1247    VkExternalMemoryHandleTypeFlags export_flags = 0;
1248    VkExternalMemoryHandleTypeFlags compat_flags = 0;
1249 
1250    /* From the Vulkan 1.1.98 spec:
1251     *
1252     *    If handleType is not compatible with the format, type, tiling,
1253     *    usage, and flags specified in VkPhysicalDeviceImageFormatInfo2,
1254     *    then vkGetPhysicalDeviceImageFormatProperties2 returns
1255     *    VK_ERROR_FORMAT_NOT_SUPPORTED.
1256     */
1257    switch (handleType) {
1258    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
1259    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1260       switch (pImageFormatInfo->type) {
1261       case VK_IMAGE_TYPE_2D:
1262          flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT |
1263                  VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
1264                  VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1265          compat_flags = export_flags =
1266             VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
1267             VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1268          break;
1269       default:
1270          return vk_errorf(
1271             physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1272             "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)",
1273             handleType, pImageFormatInfo->type);
1274       }
1275       break;
1276    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1277       flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1278       compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
1279       break;
1280    default:
1281       return vk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1282                        "VkExternalMemoryTypeFlagBits(0x%x) unsupported",
1283                        handleType);
1284    }
1285 
1286    *external_properties = (VkExternalMemoryProperties){
1287       .externalMemoryFeatures = flags,
1288       .exportFromImportedHandleTypes = export_flags,
1289       .compatibleHandleTypes = compat_flags,
1290    };
1291 
1292    return VK_SUCCESS;
1293 }
1294 
1295 VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceImageFormatInfo2 * base_info,VkImageFormatProperties2 * base_props)1296 panvk_GetPhysicalDeviceImageFormatProperties2(
1297    VkPhysicalDevice physicalDevice,
1298    const VkPhysicalDeviceImageFormatInfo2 *base_info,
1299    VkImageFormatProperties2 *base_props)
1300 {
1301    VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1302    const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
1303    const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL;
1304    VkExternalImageFormatProperties *external_props = NULL;
1305    VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL;
1306    VkFormatFeatureFlags format_feature_flags;
1307    VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
1308    VkResult result;
1309 
1310    result = get_image_format_properties(physical_device, base_info,
1311                                         &base_props->imageFormatProperties,
1312                                         &format_feature_flags);
1313    if (result != VK_SUCCESS)
1314       return result;
1315 
1316    /* Extract input structs */
1317    vk_foreach_struct_const(s, base_info->pNext) {
1318       switch (s->sType) {
1319       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
1320          external_info = (const void *)s;
1321          break;
1322       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT:
1323          image_view_info = (const void *)s;
1324          break;
1325       default:
1326          break;
1327       }
1328    }
1329 
1330    /* Extract output structs */
1331    vk_foreach_struct(s, base_props->pNext) {
1332       switch (s->sType) {
1333       case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
1334          external_props = (void *)s;
1335          break;
1336       case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT:
1337          cubic_props = (void *)s;
1338          break;
1339       case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
1340          ycbcr_props = (void *)s;
1341          break;
1342       default:
1343          break;
1344       }
1345    }
1346 
1347    /* From the Vulkan 1.0.42 spec:
1348     *
1349     *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
1350     *    behave as if VkPhysicalDeviceExternalImageFormatInfo was not
1351     *    present and VkExternalImageFormatProperties will be ignored.
1352     */
1353    if (external_info && external_info->handleType != 0) {
1354       VkExternalImageFormatProperties fallback_external_props;
1355 
1356       if (!external_props) {
1357          memset(&fallback_external_props, 0, sizeof(fallback_external_props));
1358          external_props = &fallback_external_props;
1359       }
1360 
1361       result = panvk_get_external_image_format_properties(
1362          physical_device, base_info, external_info->handleType,
1363          &external_props->externalMemoryProperties);
1364       if (result != VK_SUCCESS)
1365          goto fail;
1366    }
1367 
1368    if (cubic_props) {
1369       /* note: blob only allows cubic filtering for 2D and 2D array views
1370        * its likely we can enable it for 1D and CUBE, needs testing however
1371        */
1372       if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D ||
1373            image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) &&
1374           (format_feature_flags &
1375            VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) {
1376          cubic_props->filterCubic = true;
1377          cubic_props->filterCubicMinmax = true;
1378       } else {
1379          cubic_props->filterCubic = false;
1380          cubic_props->filterCubicMinmax = false;
1381       }
1382    }
1383 
1384    if (ycbcr_props)
1385       ycbcr_props->combinedImageSamplerDescriptorCount = 1;
1386 
1387    return VK_SUCCESS;
1388 
1389 fail:
1390    if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
1391       /* From the Vulkan 1.0.42 spec:
1392        *
1393        *    If the combination of parameters to
1394        *    vkGetPhysicalDeviceImageFormatProperties2 is not supported by
1395        *    the implementation for use in vkCreateImage, then all members of
1396        *    imageFormatProperties will be filled with zero.
1397        */
1398       base_props->imageFormatProperties = (VkImageFormatProperties){};
1399    }
1400 
1401    return result;
1402 }
1403 
1404 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkSampleCountFlagBits samples,VkImageUsageFlags usage,VkImageTiling tiling,uint32_t * pNumProperties,VkSparseImageFormatProperties * pProperties)1405 panvk_GetPhysicalDeviceSparseImageFormatProperties(
1406    VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
1407    VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling,
1408    uint32_t *pNumProperties, VkSparseImageFormatProperties *pProperties)
1409 {
1410    /* Sparse images are not yet supported. */
1411    *pNumProperties = 0;
1412 }
1413 
1414 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSparseImageFormatInfo2 * pFormatInfo,uint32_t * pPropertyCount,VkSparseImageFormatProperties2 * pProperties)1415 panvk_GetPhysicalDeviceSparseImageFormatProperties2(
1416    VkPhysicalDevice physicalDevice,
1417    const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
1418    uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
1419 {
1420    /* Sparse images are not yet supported. */
1421    *pPropertyCount = 0;
1422 }
1423 
1424 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalBufferInfo * pExternalBufferInfo,VkExternalBufferProperties * pExternalBufferProperties)1425 panvk_GetPhysicalDeviceExternalBufferProperties(
1426    VkPhysicalDevice physicalDevice,
1427    const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
1428    VkExternalBufferProperties *pExternalBufferProperties)
1429 {
1430    panvk_stub();
1431 }
1432