1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_device.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include <sys/sysinfo.h>
13
14 #include "util/disk_cache.h"
15 #include "git_sha1.h"
16
17 #include "vk_device.h"
18 #include "vk_drm_syncobj.h"
19 #include "vk_format.h"
20 #include "vk_limits.h"
21 #include "vk_log.h"
22 #include "vk_shader_module.h"
23 #include "vk_util.h"
24
25 #include "panvk_device.h"
26 #include "panvk_entrypoints.h"
27 #include "panvk_instance.h"
28 #include "panvk_physical_device.h"
29 #include "panvk_wsi.h"
30
31 #include "pan_format.h"
32 #include "pan_props.h"
33
34 #include "genxml/gen_macros.h"
35
36 #define ARM_VENDOR_ID 0x13b5
37 #define MAX_PUSH_DESCRIPTORS 32
38 /* We reserve one ubo for push constant, one for sysvals and one per-set for the
39 * descriptor metadata */
40 #define RESERVED_UBO_COUNT 6
41 #define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
42 #define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16)
43
44 static int
get_cache_uuid(uint16_t family,void * uuid)45 get_cache_uuid(uint16_t family, void *uuid)
46 {
47 uint32_t mesa_timestamp;
48 uint16_t f = family;
49
50 if (!disk_cache_get_function_timestamp(get_cache_uuid, &mesa_timestamp))
51 return -1;
52
53 memset(uuid, 0, VK_UUID_SIZE);
54 memcpy(uuid, &mesa_timestamp, 4);
55 memcpy((char *)uuid + 4, &f, 2);
56 snprintf((char *)uuid + 6, VK_UUID_SIZE - 10, "pan");
57 return 0;
58 }
59
60 static void
get_device_extensions(const struct panvk_physical_device * device,struct vk_device_extension_table * ext)61 get_device_extensions(const struct panvk_physical_device *device,
62 struct vk_device_extension_table *ext)
63 {
64 *ext = (struct vk_device_extension_table){
65 .KHR_buffer_device_address = true,
66 .KHR_copy_commands2 = true,
67 .KHR_device_group = true,
68 .KHR_descriptor_update_template = true,
69 .KHR_driver_properties = true,
70 .KHR_maintenance3 = true,
71 .KHR_pipeline_executable_properties = true,
72 .KHR_pipeline_library = true,
73 .KHR_push_descriptor = true,
74 .KHR_sampler_mirror_clamp_to_edge = true,
75 .KHR_shader_expect_assume = true,
76 .KHR_storage_buffer_storage_class = true,
77 #ifdef PANVK_USE_WSI_PLATFORM
78 .KHR_swapchain = true,
79 #endif
80 .KHR_synchronization2 = true,
81 .KHR_variable_pointers = true,
82 .EXT_buffer_device_address = true,
83 .EXT_custom_border_color = true,
84 .EXT_graphics_pipeline_library = true,
85 .EXT_index_type_uint8 = true,
86 .EXT_pipeline_creation_cache_control = true,
87 .EXT_pipeline_creation_feedback = true,
88 .EXT_private_data = true,
89 .EXT_shader_module_identifier = true,
90 .EXT_vertex_attribute_divisor = true,
91 .GOOGLE_decorate_string = true,
92 .GOOGLE_hlsl_functionality1 = true,
93 .GOOGLE_user_type = true,
94 };
95 }
96
97 static void
get_features(const struct panvk_physical_device * device,struct vk_features * features)98 get_features(const struct panvk_physical_device *device,
99 struct vk_features *features)
100 {
101 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
102
103 *features = (struct vk_features){
104 /* Vulkan 1.0 */
105 .robustBufferAccess = true,
106 .fullDrawIndexUint32 = true,
107 .independentBlend = true,
108 .logicOp = true,
109 .wideLines = true,
110 .largePoints = true,
111 .textureCompressionETC2 = true,
112 .textureCompressionASTC_LDR = true,
113 .samplerAnisotropy = true,
114 .shaderUniformBufferArrayDynamicIndexing = true,
115 .shaderSampledImageArrayDynamicIndexing = true,
116 .shaderStorageBufferArrayDynamicIndexing = true,
117 .shaderStorageImageArrayDynamicIndexing = true,
118
119 /* Vulkan 1.1 */
120 .storageBuffer16BitAccess = false,
121 .uniformAndStorageBuffer16BitAccess = false,
122 .storagePushConstant16 = false,
123 .storageInputOutput16 = false,
124 .multiview = false,
125 .multiviewGeometryShader = false,
126 .multiviewTessellationShader = false,
127 .variablePointersStorageBuffer = true,
128 .variablePointers = true,
129 .protectedMemory = false,
130 .samplerYcbcrConversion = false,
131 .shaderDrawParameters = false,
132
133 /* Vulkan 1.2 */
134 .samplerMirrorClampToEdge = true,
135 .drawIndirectCount = false,
136 .storageBuffer8BitAccess = false,
137 .uniformAndStorageBuffer8BitAccess = false,
138 .storagePushConstant8 = false,
139 .shaderBufferInt64Atomics = false,
140 .shaderSharedInt64Atomics = false,
141 .shaderFloat16 = false,
142 .shaderInt8 = false,
143
144 .descriptorIndexing = false,
145 .shaderInputAttachmentArrayDynamicIndexing = false,
146 .shaderUniformTexelBufferArrayDynamicIndexing = false,
147 .shaderStorageTexelBufferArrayDynamicIndexing = false,
148 .shaderUniformBufferArrayNonUniformIndexing = false,
149 .shaderSampledImageArrayNonUniformIndexing = false,
150 .shaderStorageBufferArrayNonUniformIndexing = false,
151 .shaderStorageImageArrayNonUniformIndexing = false,
152 .shaderInputAttachmentArrayNonUniformIndexing = false,
153 .shaderUniformTexelBufferArrayNonUniformIndexing = false,
154 .shaderStorageTexelBufferArrayNonUniformIndexing = false,
155 .descriptorBindingUniformBufferUpdateAfterBind = false,
156 .descriptorBindingSampledImageUpdateAfterBind = false,
157 .descriptorBindingStorageImageUpdateAfterBind = false,
158 .descriptorBindingStorageBufferUpdateAfterBind = false,
159 .descriptorBindingUniformTexelBufferUpdateAfterBind = false,
160 .descriptorBindingStorageTexelBufferUpdateAfterBind = false,
161 .descriptorBindingUpdateUnusedWhilePending = false,
162 .descriptorBindingPartiallyBound = false,
163 .descriptorBindingVariableDescriptorCount = false,
164 .runtimeDescriptorArray = false,
165
166 .samplerFilterMinmax = false,
167 .scalarBlockLayout = false,
168 .imagelessFramebuffer = false,
169 .uniformBufferStandardLayout = false,
170 .shaderSubgroupExtendedTypes = false,
171 .separateDepthStencilLayouts = false,
172 .hostQueryReset = false,
173 .timelineSemaphore = false,
174 .bufferDeviceAddress = true,
175 .bufferDeviceAddressCaptureReplay = false,
176 .bufferDeviceAddressMultiDevice = false,
177 .vulkanMemoryModel = false,
178 .vulkanMemoryModelDeviceScope = false,
179 .vulkanMemoryModelAvailabilityVisibilityChains = false,
180 .shaderOutputViewportIndex = false,
181 .shaderOutputLayer = false,
182 .subgroupBroadcastDynamicId = false,
183
184 /* Vulkan 1.3 */
185 .robustImageAccess = false,
186 .inlineUniformBlock = false,
187 .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
188 .pipelineCreationCacheControl = true,
189 .privateData = true,
190 .shaderDemoteToHelperInvocation = false,
191 .shaderTerminateInvocation = false,
192 .subgroupSizeControl = false,
193 .computeFullSubgroups = false,
194 .synchronization2 = true,
195 .textureCompressionASTC_HDR = false,
196 .shaderZeroInitializeWorkgroupMemory = false,
197 .dynamicRendering = false,
198 .shaderIntegerDotProduct = false,
199 .maintenance4 = false,
200
201 /* VK_EXT_graphics_pipeline_library */
202 .graphicsPipelineLibrary = true,
203
204 /* VK_EXT_index_type_uint8 */
205 .indexTypeUint8 = true,
206
207 /* VK_EXT_vertex_attribute_divisor */
208 .vertexAttributeInstanceRateDivisor = true,
209 .vertexAttributeInstanceRateZeroDivisor = true,
210
211 /* VK_EXT_depth_clip_enable */
212 .depthClipEnable = true,
213
214 /* VK_EXT_4444_formats */
215 .formatA4R4G4B4 = true,
216 .formatA4B4G4R4 = true,
217
218 /* VK_EXT_custom_border_color */
219 .customBorderColors = true,
220
221 /* v7 doesn't support AFBC(BGR). We need to tweak the texture swizzle to
222 * make it work, which forces us to apply the same swizzle on the border
223 * color, meaning we need to know the format when preparing the border
224 * color.
225 */
226 .customBorderColorWithoutFormat = arch != 7,
227
228 /* VK_KHR_pipeline_executable_properties */
229 .pipelineExecutableInfo = true,
230
231 /* VK_KHR_shader_expect_assume */
232 .shaderExpectAssume = true,
233
234 /* VK_EXT_shader_module_identifier */
235 .shaderModuleIdentifier = true,
236 };
237 }
238
239 static void
get_device_properties(const struct panvk_instance * instance,const struct panvk_physical_device * device,struct vk_properties * properties)240 get_device_properties(const struct panvk_instance *instance,
241 const struct panvk_physical_device *device,
242 struct vk_properties *properties)
243 {
244 /* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
245 VkSampleCountFlags sample_counts =
246 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
247
248 uint64_t os_page_size = 4096;
249 os_get_page_size(&os_page_size);
250
251 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
252
253 /* Ensure that the max threads count per workgroup is valid for Bifrost */
254 assert(arch > 8 || device->kmod.props.max_threads_per_wg <= 1024);
255
256 *properties = (struct vk_properties){
257 .apiVersion = panvk_get_vk_version(),
258 .driverVersion = vk_get_driver_version(),
259 .vendorID = ARM_VENDOR_ID,
260
261 /* Collect arch_major, arch_minor, arch_rev and product_major,
262 * as done by the Arm driver.
263 */
264 .deviceID = device->kmod.props.gpu_prod_id << 16,
265 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
266
267 /* Vulkan 1.0 limits */
268 /* Maximum texture dimension is 2^16. */
269 .maxImageDimension1D = (1 << 16),
270 .maxImageDimension2D = (1 << 16),
271 .maxImageDimension3D = (1 << 16),
272 .maxImageDimensionCube = (1 << 16),
273 .maxImageArrayLayers = (1 << 16),
274 /* Currently limited by the 1D texture size, which is 2^16.
275 * TODO: If we expose buffer views as 2D textures, we can increase the
276 * limit.
277 */
278 .maxTexelBufferElements = (1 << 16),
279 /* Each uniform entry is 16-byte and the number of entries is encoded in a
280 * 12-bit field, with the minus(1) modifier, which gives 2^20.
281 */
282 .maxUniformBufferRange = 1 << 20,
283 /* Storage buffer access is lowered to globals, so there's no limit here,
284 * except for the SW-descriptor we use to encode storage buffer
285 * descriptors, where the size is a 32-bit field.
286 */
287 .maxStorageBufferRange = UINT32_MAX,
288 /* 128 bytes of push constants, so we're aligned with the minimum Vulkan
289 * requirements.
290 */
291 .maxPushConstantsSize = 128,
292 /* There's no HW limit here. Should we advertize something smaller? */
293 .maxMemoryAllocationCount = UINT32_MAX,
294 /* Again, no hardware limit, but most drivers seem to advertive 64k. */
295 .maxSamplerAllocationCount = 64 * 1024,
296 /* A cache line. */
297 .bufferImageGranularity = 64,
298 /* Sparse binding not supported yet. */
299 .sparseAddressSpaceSize = 0,
300 /* On Bifrost, this is a software limit. We pick the minimum required by
301 * Vulkan, because Bifrost GPUs don't have unified descriptor tables,
302 * which forces us to agregatte all descriptors from all sets and dispatch
303 * them to per-type descriptor tables emitted at draw/dispatch time. The
304 * more sets we support the more copies we are likely to have to do at
305 * draw time.
306 *
307 * Valhall has native support for descriptor sets, and allows a maximum
308 * of 16 sets, but we reserve one for our internal use, so we have 15
309 * left.
310 */
311 .maxBoundDescriptorSets = arch <= 7 ? 4 : 15,
312 /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
313 .maxDescriptorSetSamplers = UINT16_MAX,
314 /* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
315 * for our internal UBOs.
316 */
317 .maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
318 .maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
319 /* SSBOs are limited by the size of a uniform buffer which contains our
320 * panvk_ssbo_desc objects.
321 * panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
322 * 16-byte too. The number of entries is encoded in a 12-bit field, with
323 * a minus(1) modifier, which gives a maximum of 2^12 SSBO
324 * descriptors.
325 */
326 .maxDescriptorSetStorageBuffers = 1 << 12,
327 /* MALI_RENDERER_STATE::sampler_count is 16-bit. */
328 .maxDescriptorSetSampledImages = UINT16_MAX,
329 /* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
330 * MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
331 */
332 .maxDescriptorSetStorageImages = 1 << 8,
333 /* A maximum of 8 color render targets, and one depth-stencil render
334 * target.
335 */
336 .maxDescriptorSetInputAttachments = 9,
337
338 /* We could theoretically use the maxDescriptor values here (except for
339 * UBOs where we're really limited to 256 on the shader side), but on
340 * Bifrost we have to copy some tables around, which comes at an extra
341 * memory/processing cost, so let's pick something smaller.
342 */
343 .maxPerStageDescriptorInputAttachments = 9,
344 .maxPerStageDescriptorSampledImages = 256,
345 .maxPerStageDescriptorSamplers = 128,
346 .maxPerStageDescriptorStorageBuffers = 64,
347 .maxPerStageDescriptorStorageImages = 32,
348 .maxPerStageDescriptorUniformBuffers = 64,
349 .maxPerStageResources = 9 + 256 + 128 + 64 + 32 + 64,
350
351 /* Software limits to keep VkCommandBuffer tracking sane. */
352 .maxDescriptorSetUniformBuffersDynamic = 16,
353 .maxDescriptorSetStorageBuffersDynamic = 8,
354 /* Software limit to keep VkCommandBuffer tracking sane. The HW supports
355 * up to 2^9 vertex attributes.
356 */
357 .maxVertexInputAttributes = 16,
358 .maxVertexInputBindings = 16,
359 /* MALI_ATTRIBUTE::offset is 32-bit. */
360 .maxVertexInputAttributeOffset = UINT32_MAX,
361 /* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
362 .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
363 /* 32 vec4 varyings. */
364 .maxVertexOutputComponents = 128,
365 /* Tesselation shaders not supported. */
366 .maxTessellationGenerationLevel = 0,
367 .maxTessellationPatchSize = 0,
368 .maxTessellationControlPerVertexInputComponents = 0,
369 .maxTessellationControlPerVertexOutputComponents = 0,
370 .maxTessellationControlPerPatchOutputComponents = 0,
371 .maxTessellationControlTotalOutputComponents = 0,
372 .maxTessellationEvaluationInputComponents = 0,
373 .maxTessellationEvaluationOutputComponents = 0,
374 /* Geometry shaders not supported. */
375 .maxGeometryShaderInvocations = 0,
376 .maxGeometryInputComponents = 0,
377 .maxGeometryOutputComponents = 0,
378 .maxGeometryOutputVertices = 0,
379 .maxGeometryTotalOutputComponents = 0,
380 /* 32 vec4 varyings. */
381 .maxFragmentInputComponents = 128,
382 /* 8 render targets. */
383 .maxFragmentOutputAttachments = 8,
384 /* We don't support dual source blending yet. */
385 .maxFragmentDualSrcAttachments = 0,
386 /* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
387 * above).
388 */
389 .maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
390 /* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
391 * (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
392 * really make sense to expose this amount of memory, especially since
393 * it's backed by global memory anyway.
394 */
395 .maxComputeSharedMemorySize = 32768,
396 /* Software limit to meet Vulkan 1.0 requirements. We split the
397 * dispatch in several jobs if it's too big.
398 */
399 .maxComputeWorkGroupCount = {65535, 65535, 65535},
400
401 /* We could also split into serveral jobs but this has many limitations.
402 * As such we limit to the max threads per workgroup supported by the GPU.
403 */
404 .maxComputeWorkGroupInvocations = device->kmod.props.max_threads_per_wg,
405 .maxComputeWorkGroupSize = {device->kmod.props.max_threads_per_wg,
406 device->kmod.props.max_threads_per_wg,
407 device->kmod.props.max_threads_per_wg},
408 /* 8-bit subpixel precision. */
409 .subPixelPrecisionBits = 8,
410 .subTexelPrecisionBits = 8,
411 .mipmapPrecisionBits = 8,
412 /* Software limit. */
413 .maxDrawIndexedIndexValue = UINT32_MAX,
414 /* Make it one for now. */
415 .maxDrawIndirectCount = 1,
416 .maxSamplerLodBias = 255,
417 .maxSamplerAnisotropy = 16,
418 .maxViewports = 1,
419 /* Same as the framebuffer limit. */
420 .maxViewportDimensions = {(1 << 14), (1 << 14)},
421 /* Encoded in a 16-bit signed integer. */
422 .viewportBoundsRange = {INT16_MIN, INT16_MAX},
423 .viewportSubPixelBits = 0,
424 /* Align on a page. */
425 .minMemoryMapAlignment = os_page_size,
426 /* Some compressed texture formats require 128-byte alignment. */
427 .minTexelBufferOffsetAlignment = 64,
428 /* Always aligned on a uniform slot (vec4). */
429 .minUniformBufferOffsetAlignment = 16,
430 /* Lowered to global accesses, which happen at the 32-bit granularity. */
431 .minStorageBufferOffsetAlignment = 4,
432 /* Signed 4-bit value. */
433 .minTexelOffset = -8,
434 .maxTexelOffset = 7,
435 .minTexelGatherOffset = -8,
436 .maxTexelGatherOffset = 7,
437 .minInterpolationOffset = -0.5,
438 .maxInterpolationOffset = 0.5,
439 .subPixelInterpolationOffsetBits = 8,
440 .maxFramebufferWidth = (1 << 14),
441 .maxFramebufferHeight = (1 << 14),
442 .maxFramebufferLayers = 256,
443 .framebufferColorSampleCounts = sample_counts,
444 .framebufferDepthSampleCounts = sample_counts,
445 .framebufferStencilSampleCounts = sample_counts,
446 .framebufferNoAttachmentsSampleCounts = sample_counts,
447 .maxColorAttachments = 8,
448 .sampledImageColorSampleCounts = sample_counts,
449 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
450 .sampledImageDepthSampleCounts = sample_counts,
451 .sampledImageStencilSampleCounts = sample_counts,
452 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
453 .maxSampleMaskWords = 1,
454 .timestampComputeAndGraphics = false,
455 .timestampPeriod = 0,
456 .maxClipDistances = 0,
457 .maxCullDistances = 0,
458 .maxCombinedClipAndCullDistances = 0,
459 .discreteQueuePriorities = 2,
460 .pointSizeRange = {0.125, 4095.9375},
461 .lineWidthRange = {0.0, 7.9921875},
462 .pointSizeGranularity = (1.0 / 16.0),
463 .lineWidthGranularity = (1.0 / 128.0),
464 .strictLines = false,
465 .standardSampleLocations = true,
466 .optimalBufferCopyOffsetAlignment = 64,
467 .optimalBufferCopyRowPitchAlignment = 64,
468 .nonCoherentAtomSize = 64,
469
470 /* Vulkan 1.0 sparse properties */
471 .sparseResidencyNonResidentStrict = false,
472 .sparseResidencyAlignedMipSize = false,
473 .sparseResidencyStandard2DBlockShape = false,
474 .sparseResidencyStandard2DMultisampleBlockShape = false,
475 .sparseResidencyStandard3DBlockShape = false,
476
477 /* Vulkan 1.1 properties */
478 /* XXX: 1.1 support */
479 .subgroupSize = 8,
480 .subgroupSupportedStages = VK_SHADER_STAGE_ALL,
481 .subgroupSupportedOperations =
482 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
483 VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
484 VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
485 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
486 VK_SUBGROUP_FEATURE_VOTE_BIT,
487 .subgroupQuadOperationsInAllStages = false,
488 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
489 .maxMultiviewViewCount = 0,
490 .maxMultiviewInstanceIndex = 0,
491 .protectedNoFault = false,
492 .maxPerSetDescriptors = UINT16_MAX,
493 /* Our buffer size fields allow only this much */
494 .maxMemoryAllocationSize = UINT32_MAX,
495
496 /* Vulkan 1.2 properties */
497 /* XXX: 1.2 support */
498 /* XXX: VK_KHR_depth_stencil_resolve */
499 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
500 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
501 .independentResolveNone = true,
502 .independentResolve = true,
503 /* VK_KHR_driver_properties */
504 .driverID = VK_DRIVER_ID_MESA_PANVK,
505 .conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
506 /* XXX: VK_KHR_shader_float_controls */
507 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
508 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
509 .shaderSignedZeroInfNanPreserveFloat16 = true,
510 .shaderSignedZeroInfNanPreserveFloat32 = true,
511 .shaderSignedZeroInfNanPreserveFloat64 = false,
512 .shaderDenormPreserveFloat16 = true,
513 .shaderDenormPreserveFloat32 = true,
514 .shaderDenormPreserveFloat64 = false,
515 .shaderDenormFlushToZeroFloat16 = true,
516 .shaderDenormFlushToZeroFloat32 = true,
517 .shaderDenormFlushToZeroFloat64 = false,
518 .shaderRoundingModeRTEFloat16 = true,
519 .shaderRoundingModeRTEFloat32 = true,
520 .shaderRoundingModeRTEFloat64 = false,
521 .shaderRoundingModeRTZFloat16 = true,
522 .shaderRoundingModeRTZFloat32 = true,
523 .shaderRoundingModeRTZFloat64 = false,
524 /* XXX: VK_EXT_descriptor_indexing */
525 .maxUpdateAfterBindDescriptorsInAllPools = 0,
526 .shaderUniformBufferArrayNonUniformIndexingNative = false,
527 .shaderSampledImageArrayNonUniformIndexingNative = false,
528 .shaderStorageBufferArrayNonUniformIndexingNative = false,
529 .shaderStorageImageArrayNonUniformIndexingNative = false,
530 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
531 .robustBufferAccessUpdateAfterBind = false,
532 .quadDivergentImplicitLod = false,
533 .maxPerStageDescriptorUpdateAfterBindSamplers = 0,
534 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
535 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
536 .maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
537 .maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
538 .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
539 .maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
540 .maxPerStageUpdateAfterBindResources = 0,
541 .maxDescriptorSetUpdateAfterBindSamplers = 0,
542 .maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
543 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
544 .maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
545 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
546 .maxDescriptorSetUpdateAfterBindSampledImages = 0,
547 .maxDescriptorSetUpdateAfterBindStorageImages = 0,
548 .maxDescriptorSetUpdateAfterBindInputAttachments = 0,
549 /* XXX: VK_EXT_sampler_filter_minmax */
550 .filterMinmaxSingleComponentFormats = false,
551 .filterMinmaxImageComponentMapping = false,
552 /* XXX: VK_KHR_timeline_semaphore */
553 .maxTimelineSemaphoreValueDifference = INT64_MAX,
554 .framebufferIntegerColorSampleCounts = sample_counts,
555
556 /* Vulkan 1.3 properties */
557 /* XXX: 1.3 support */
558 /* XXX: VK_EXT_subgroup_size_control */
559 .minSubgroupSize = 8,
560 .maxSubgroupSize = 8,
561 .maxComputeWorkgroupSubgroups = 48,
562 .requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL,
563 /* XXX: VK_EXT_inline_uniform_block */
564 .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
565 .maxPerStageDescriptorInlineUniformBlocks =
566 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
567 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
568 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
569 .maxDescriptorSetInlineUniformBlocks =
570 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
571 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
572 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
573 .maxInlineUniformTotalSize =
574 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
575 /* XXX: VK_KHR_shader_integer_dot_product */
576 .integerDotProduct8BitUnsignedAccelerated = true,
577 .integerDotProduct8BitSignedAccelerated = true,
578 .integerDotProduct4x8BitPackedUnsignedAccelerated = true,
579 .integerDotProduct4x8BitPackedSignedAccelerated = true,
580 /* XXX: VK_EXT_texel_buffer_alignment */
581 .storageTexelBufferOffsetAlignmentBytes = 64,
582 .storageTexelBufferOffsetSingleTexelAlignment = false,
583 .uniformTexelBufferOffsetAlignmentBytes = 4,
584 .uniformTexelBufferOffsetSingleTexelAlignment = true,
585 /* XXX: VK_KHR_maintenance4 */
586 .maxBufferSize = 1 << 30,
587
588 /* VK_EXT_custom_border_color */
589 .maxCustomBorderColorSamplers = 32768,
590
591 /* VK_EXT_graphics_pipeline_library */
592 .graphicsPipelineLibraryFastLinking = true,
593 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
594
595 /* VK_KHR_vertex_attribute_divisor */
596 /* We will have to restrict this a bit for multiview */
597 .maxVertexAttribDivisor = UINT32_MAX,
598 .supportsNonZeroFirstInstance = false,
599
600 /* VK_KHR_push_descriptor */
601 .maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
602 };
603
604 snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
605 device->name);
606
607 memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
608
609 const struct {
610 uint16_t vendor_id;
611 uint32_t device_id;
612 uint8_t pad[8];
613 } dev_uuid = {
614 .vendor_id = ARM_VENDOR_ID,
615 .device_id = device->model->gpu_id,
616 };
617
618 STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
619 memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
620 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
621 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
622
623 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
624 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
625 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
626
627 /* VK_EXT_shader_module_identifier */
628 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
629 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
630 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
631 vk_shaderModuleIdentifierAlgorithmUUID,
632 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
633 }
634
635 void
panvk_physical_device_finish(struct panvk_physical_device * device)636 panvk_physical_device_finish(struct panvk_physical_device *device)
637 {
638 panvk_wsi_finish(device);
639
640 pan_kmod_dev_destroy(device->kmod.dev);
641 if (device->master_fd != -1)
642 close(device->master_fd);
643
644 vk_physical_device_finish(&device->vk);
645 }
646
647 VkResult
panvk_physical_device_init(struct panvk_physical_device * device,struct panvk_instance * instance,drmDevicePtr drm_device)648 panvk_physical_device_init(struct panvk_physical_device *device,
649 struct panvk_instance *instance,
650 drmDevicePtr drm_device)
651 {
652 const char *path = drm_device->nodes[DRM_NODE_RENDER];
653 VkResult result = VK_SUCCESS;
654 drmVersionPtr version;
655 int fd;
656 int master_fd = -1;
657
658 fd = open(path, O_RDWR | O_CLOEXEC);
659 if (fd < 0) {
660 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
661 "failed to open device %s", path);
662 }
663
664 version = drmGetVersion(fd);
665 if (!version) {
666 close(fd);
667 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
668 "failed to query kernel driver version for device %s",
669 path);
670 }
671
672 if (strcmp(version->name, "panfrost") && strcmp(version->name, "panthor")) {
673 drmFreeVersion(version);
674 close(fd);
675 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
676 "device %s does not use the panfrost kernel driver",
677 path);
678 }
679
680 drmFreeVersion(version);
681
682 if (!getenv("PAN_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
683 close(fd);
684 return vk_errorf(
685 instance, VK_ERROR_INCOMPATIBLE_DRIVER,
686 "WARNING: panvk is not a conformant vulkan implementation, "
687 "pass PAN_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know what you're doing.");
688 }
689
690 if (instance->debug_flags & PANVK_DEBUG_STARTUP)
691 vk_logi(VK_LOG_NO_OBJS(instance), "Found compatible device '%s'.", path);
692
693 device->kmod.dev = pan_kmod_dev_create(fd, PAN_KMOD_DEV_FLAG_OWNS_FD,
694 &instance->kmod.allocator);
695
696 if (!device->kmod.dev) {
697 result = vk_errorf(instance, panvk_errno_to_vk_error(), "cannot create device");
698 goto fail;
699 }
700
701 pan_kmod_dev_query_props(device->kmod.dev, &device->kmod.props);
702
703 device->model = panfrost_get_model(device->kmod.props.gpu_prod_id,
704 device->kmod.props.gpu_variant);
705
706 unsigned arch = pan_arch(device->kmod.props.gpu_prod_id);
707
708 switch (arch) {
709 case 6:
710 case 7:
711 case 10:
712 break;
713
714 default:
715 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
716 "%s not supported", device->model->name);
717 goto fail;
718 }
719
720 if (instance->vk.enabled_extensions.KHR_display) {
721 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
722 if (master_fd >= 0) {
723 /* TODO: free master_fd is accel is not working? */
724 }
725 }
726
727 device->master_fd = master_fd;
728
729 device->formats.all = panfrost_format_table(arch);
730 device->formats.blendable = panfrost_blendable_format_table(arch);
731
732 memset(device->name, 0, sizeof(device->name));
733 sprintf(device->name, "%s", device->model->name);
734
735 if (get_cache_uuid(device->kmod.props.gpu_prod_id, device->cache_uuid)) {
736 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
737 "cannot generate UUID");
738 goto fail;
739 }
740
741 vk_warn_non_conformant_implementation("panvk");
742
743 device->drm_syncobj_type = vk_drm_syncobj_get_type(device->kmod.dev->fd);
744 /* We don't support timelines in the uAPI yet and we don't want it getting
745 * suddenly turned on by vk_drm_syncobj_get_type() without us adding panvk
746 * code for it first.
747 */
748 device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
749
750 struct vk_device_extension_table supported_extensions;
751 get_device_extensions(device, &supported_extensions);
752
753 struct vk_features supported_features;
754 get_features(device, &supported_features);
755
756 struct vk_properties properties;
757 get_device_properties(instance, device, &properties);
758
759 struct vk_physical_device_dispatch_table dispatch_table;
760 vk_physical_device_dispatch_table_from_entrypoints(
761 &dispatch_table, &panvk_physical_device_entrypoints, true);
762 vk_physical_device_dispatch_table_from_entrypoints(
763 &dispatch_table, &wsi_physical_device_entrypoints, false);
764
765 result = vk_physical_device_init(&device->vk, &instance->vk,
766 &supported_extensions, &supported_features,
767 &properties, &dispatch_table);
768
769 if (result != VK_SUCCESS) {
770 vk_error(instance, result);
771 goto fail;
772 }
773
774 device->sync_types[0] = &device->drm_syncobj_type;
775 device->sync_types[1] = NULL;
776 device->vk.supported_sync_types = device->sync_types;
777
778 result = panvk_wsi_init(device);
779 if (result != VK_SUCCESS) {
780 vk_error(instance, result);
781 goto fail;
782 }
783
784 return VK_SUCCESS;
785
786 fail:
787 if (device->vk.instance)
788 vk_physical_device_finish(&device->vk);
789
790 if (device->kmod.dev)
791 pan_kmod_dev_destroy(device->kmod.dev);
792
793 if (fd != -1)
794 close(fd);
795 if (master_fd != -1)
796 close(master_fd);
797 return result;
798 }
799
800 static const VkQueueFamilyProperties panvk_queue_family_properties = {
801 .queueFlags =
802 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
803 .queueCount = 1,
804 .timestampValidBits = 0,
805 .minImageTransferGranularity = {1, 1, 1},
806 };
807
808 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)809 panvk_GetPhysicalDeviceQueueFamilyProperties2(
810 VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
811 VkQueueFamilyProperties2 *pQueueFamilyProperties)
812 {
813 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
814 pQueueFamilyPropertyCount);
815
816 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
817 {
818 p->queueFamilyProperties = panvk_queue_family_properties;
819 }
820 }
821
822 static uint64_t
get_system_heap_size()823 get_system_heap_size()
824 {
825 struct sysinfo info;
826 sysinfo(&info);
827
828 uint64_t total_ram = (uint64_t)info.totalram * info.mem_unit;
829
830 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
831 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
832 */
833 uint64_t available_ram;
834 if (total_ram <= 4ull * 1024 * 1024 * 1024)
835 available_ram = total_ram / 2;
836 else
837 available_ram = total_ram * 3 / 4;
838
839 return available_ram;
840 }
841
842 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)843 panvk_GetPhysicalDeviceMemoryProperties2(
844 VkPhysicalDevice physicalDevice,
845 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
846 {
847 pMemoryProperties->memoryProperties = (VkPhysicalDeviceMemoryProperties){
848 .memoryHeapCount = 1,
849 .memoryHeaps[0].size = get_system_heap_size(),
850 .memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
851 .memoryTypeCount = 1,
852 .memoryTypes[0].propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
853 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
854 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
855 .memoryTypes[0].heapIndex = 0,
856 };
857 }
858
859 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)860 panvk_GetPhysicalDeviceExternalSemaphoreProperties(
861 VkPhysicalDevice physicalDevice,
862 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
863 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
864 {
865 if ((pExternalSemaphoreInfo->handleType ==
866 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
867 pExternalSemaphoreInfo->handleType ==
868 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
869 pExternalSemaphoreProperties->exportFromImportedHandleTypes =
870 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
871 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
872 pExternalSemaphoreProperties->compatibleHandleTypes =
873 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
874 VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
875 pExternalSemaphoreProperties->externalSemaphoreFeatures =
876 VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
877 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
878 } else {
879 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
880 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
881 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
882 }
883 }
884
885 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)886 panvk_GetPhysicalDeviceExternalFenceProperties(
887 VkPhysicalDevice physicalDevice,
888 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
889 VkExternalFenceProperties *pExternalFenceProperties)
890 {
891 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
892 pExternalFenceProperties->compatibleHandleTypes = 0;
893 pExternalFenceProperties->externalFenceFeatures = 0;
894 }
895
896 #define DEVICE_PER_ARCH_FUNCS(_ver) \
897 VkResult panvk_v##_ver##_create_device( \
898 struct panvk_physical_device *physical_device, \
899 const VkDeviceCreateInfo *pCreateInfo, \
900 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice); \
901 \
902 void panvk_v##_ver##_destroy_device( \
903 struct panvk_device *device, const VkAllocationCallbacks *pAllocator)
904
905 DEVICE_PER_ARCH_FUNCS(6);
906 DEVICE_PER_ARCH_FUNCS(7);
907 DEVICE_PER_ARCH_FUNCS(10);
908
909 VKAPI_ATTR VkResult VKAPI_CALL
panvk_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)910 panvk_CreateDevice(VkPhysicalDevice physicalDevice,
911 const VkDeviceCreateInfo *pCreateInfo,
912 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
913 {
914 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
915 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
916 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
917
918 panvk_arch_dispatch_ret(arch, create_device, result, physical_device,
919 pCreateInfo, pAllocator, pDevice);
920
921 return result;
922 }
923
924 VKAPI_ATTR void VKAPI_CALL
panvk_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)925 panvk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
926 {
927 VK_FROM_HANDLE(panvk_device, device, _device);
928 struct panvk_physical_device *physical_device =
929 to_panvk_physical_device(device->vk.physical);
930 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
931
932 panvk_arch_dispatch(arch, destroy_device, device, pAllocator);
933 }
934
935 static bool
format_is_supported(struct panvk_physical_device * physical_device,const struct panfrost_format fmt)936 format_is_supported(struct panvk_physical_device *physical_device,
937 const struct panfrost_format fmt)
938 {
939 /* If the format ID is zero, it's not supported. */
940 if (!fmt.hw)
941 return false;
942
943 /* Compressed formats (ID < 32) are optional. We need to check against
944 * the supported formats reported by the GPU. */
945 unsigned idx = MALI_EXTRACT_INDEX(fmt.hw);
946 if (MALI_EXTRACT_TYPE(idx) == MALI_FORMAT_COMPRESSED) {
947 uint32_t supported_compr_fmts =
948 panfrost_query_compressed_formats(&physical_device->kmod.props);
949
950 assert(idx < 32);
951
952 if (!(BITFIELD_BIT(idx) & supported_compr_fmts))
953 return false;
954 }
955
956 return true;
957 }
958
959 static void
get_format_properties(struct panvk_physical_device * physical_device,VkFormat format,VkFormatProperties * out_properties)960 get_format_properties(struct panvk_physical_device *physical_device,
961 VkFormat format, VkFormatProperties *out_properties)
962 {
963 VkFormatFeatureFlags tex = 0, buffer = 0;
964 enum pipe_format pfmt = vk_format_to_pipe_format(format);
965 unsigned arch = pan_arch(physical_device->kmod.props.gpu_prod_id);
966
967 /* FIXME: Valhall doesn't support interleaved D32_S8X24. Implement it as
968 * a multi-plane format, and we probably want to switch Bifrost to this
969 * layout too, since:
970 * - it's more cache-friendly (you load more samples on a cache-line if you don't
971 * have those 24 dummy bits)
972 * - it takes less memory (you don't lose those 24bits per texel)
973 * - we can use AFBC
974 */
975 if (arch >= 9 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
976 goto end;
977
978 if (pfmt == PIPE_FORMAT_NONE)
979 goto end;
980
981 const struct panfrost_format fmt = physical_device->formats.all[pfmt];
982
983 if (!format_is_supported(physical_device, fmt))
984 goto end;
985
986 /* 3byte formats are not supported by the buffer <-> image copy helpers. */
987 if (util_format_get_blocksize(pfmt) == 3)
988 goto end;
989
990 /* Reject sRGB formats (see
991 * https://github.com/KhronosGroup/Vulkan-Docs/issues/2214).
992 */
993 if ((fmt.bind & PAN_BIND_VERTEX_BUFFER) && !util_format_is_srgb(pfmt))
994 buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
995
996 if (fmt.bind & PAN_BIND_SAMPLER_VIEW) {
997 tex |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
998 VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
999 VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
1000 VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
1001 VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
1002
1003 /* Integer formats only support nearest filtering */
1004 if (!util_format_is_scaled(pfmt) && !util_format_is_pure_integer(pfmt))
1005 tex |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
1006
1007 if (!util_format_is_depth_or_stencil(pfmt))
1008 buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
1009
1010 tex |= VK_FORMAT_FEATURE_BLIT_SRC_BIT;
1011 }
1012
1013 if (fmt.bind & PAN_BIND_RENDER_TARGET) {
1014 tex |= VK_FORMAT_FEATURE_BLIT_DST_BIT;
1015 tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
1016
1017 /* SNORM rendering isn't working yet (nir_lower_blend bugs), disable for
1018 * now.
1019 *
1020 * XXX: Enable once fixed.
1021 */
1022 if (!util_format_is_snorm(pfmt)) {
1023 tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
1024 tex |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
1025 }
1026
1027 if (!util_format_is_depth_and_stencil(pfmt))
1028 buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
1029 }
1030
1031 if (pfmt == PIPE_FORMAT_R32_UINT || pfmt == PIPE_FORMAT_R32_SINT) {
1032 buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
1033 tex |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
1034 }
1035
1036 if (fmt.bind & PAN_BIND_DEPTH_STENCIL)
1037 tex |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
1038
1039 end:
1040 out_properties->linearTilingFeatures = tex;
1041 out_properties->optimalTilingFeatures = tex;
1042 out_properties->bufferFeatures = buffer;
1043 }
1044
1045 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties * pFormatProperties)1046 panvk_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice,
1047 VkFormat format,
1048 VkFormatProperties *pFormatProperties)
1049 {
1050 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1051
1052 get_format_properties(physical_device, format, pFormatProperties);
1053 }
1054
1055 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,VkFormat format,VkFormatProperties2 * pFormatProperties)1056 panvk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
1057 VkFormat format,
1058 VkFormatProperties2 *pFormatProperties)
1059 {
1060 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1061
1062 get_format_properties(physical_device, format,
1063 &pFormatProperties->formatProperties);
1064
1065 VkDrmFormatModifierPropertiesListEXT *list = vk_find_struct(
1066 pFormatProperties->pNext, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
1067 if (list) {
1068 VK_OUTARRAY_MAKE_TYPED(VkDrmFormatModifierPropertiesEXT, out,
1069 list->pDrmFormatModifierProperties,
1070 &list->drmFormatModifierCount);
1071
1072 vk_outarray_append_typed(VkDrmFormatModifierProperties2EXT, &out,
1073 mod_props)
1074 {
1075 mod_props->drmFormatModifier = DRM_FORMAT_MOD_LINEAR;
1076 mod_props->drmFormatModifierPlaneCount = 1;
1077 }
1078 }
1079 }
1080
1081 static VkResult
get_image_format_properties(struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * info,VkImageFormatProperties * pImageFormatProperties,VkFormatFeatureFlags * p_feature_flags)1082 get_image_format_properties(struct panvk_physical_device *physical_device,
1083 const VkPhysicalDeviceImageFormatInfo2 *info,
1084 VkImageFormatProperties *pImageFormatProperties,
1085 VkFormatFeatureFlags *p_feature_flags)
1086 {
1087 VkFormatProperties format_props;
1088 VkFormatFeatureFlags format_feature_flags;
1089 VkExtent3D maxExtent;
1090 uint32_t maxMipLevels;
1091 uint32_t maxArraySize;
1092 VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
1093 enum pipe_format format = vk_format_to_pipe_format(info->format);
1094
1095 get_format_properties(physical_device, info->format, &format_props);
1096
1097 switch (info->tiling) {
1098 case VK_IMAGE_TILING_LINEAR:
1099 format_feature_flags = format_props.linearTilingFeatures;
1100 break;
1101
1102 case VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT:
1103 /* The only difference between optimal and linear is currently whether
1104 * depth/stencil attachments are allowed on depth/stencil formats.
1105 * There's no reason to allow importing depth/stencil textures, so just
1106 * disallow it and then this annoying edge case goes away.
1107 *
1108 * TODO: If anyone cares, we could enable this by looking at the
1109 * modifier and checking if it's LINEAR or not.
1110 */
1111 if (util_format_is_depth_or_stencil(format))
1112 goto unsupported;
1113
1114 assert(format_props.optimalTilingFeatures ==
1115 format_props.linearTilingFeatures);
1116 FALLTHROUGH;
1117 case VK_IMAGE_TILING_OPTIMAL:
1118 format_feature_flags = format_props.optimalTilingFeatures;
1119 break;
1120 default:
1121 unreachable("bad VkPhysicalDeviceImageFormatInfo2");
1122 }
1123
1124 if (format_feature_flags == 0)
1125 goto unsupported;
1126
1127 switch (info->type) {
1128 default:
1129 unreachable("bad vkimage type");
1130 case VK_IMAGE_TYPE_1D:
1131 maxExtent.width = 16384;
1132 maxExtent.height = 1;
1133 maxExtent.depth = 1;
1134 maxMipLevels = 15; /* log2(maxWidth) + 1 */
1135 maxArraySize = 2048;
1136 break;
1137 case VK_IMAGE_TYPE_2D:
1138 maxExtent.width = 16384;
1139 maxExtent.height = 16384;
1140 maxExtent.depth = 1;
1141 maxMipLevels = 15; /* log2(maxWidth) + 1 */
1142 maxArraySize = 2048;
1143 break;
1144 case VK_IMAGE_TYPE_3D:
1145 maxExtent.width = 2048;
1146 maxExtent.height = 2048;
1147 maxExtent.depth = 2048;
1148 maxMipLevels = 12; /* log2(maxWidth) + 1 */
1149 maxArraySize = 1;
1150 break;
1151 }
1152
1153 if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
1154 info->type == VK_IMAGE_TYPE_2D &&
1155 (format_feature_flags &
1156 (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
1157 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1158 !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
1159 !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
1160 sampleCounts |= VK_SAMPLE_COUNT_4_BIT;
1161 }
1162
1163 if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
1164 if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
1165 goto unsupported;
1166 }
1167 }
1168
1169 if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
1170 if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
1171 goto unsupported;
1172 }
1173 }
1174
1175 if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
1176 if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
1177 goto unsupported;
1178 }
1179 }
1180
1181 if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
1182 if (!(format_feature_flags &
1183 VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
1184 goto unsupported;
1185 }
1186 }
1187
1188 *pImageFormatProperties = (VkImageFormatProperties){
1189 .maxExtent = maxExtent,
1190 .maxMipLevels = maxMipLevels,
1191 .maxArrayLayers = maxArraySize,
1192 .sampleCounts = sampleCounts,
1193
1194 /* FINISHME: Accurately calculate
1195 * VkImageFormatProperties::maxResourceSize.
1196 */
1197 .maxResourceSize = UINT32_MAX,
1198 };
1199
1200 if (p_feature_flags)
1201 *p_feature_flags = format_feature_flags;
1202
1203 return VK_SUCCESS;
1204 unsupported:
1205 *pImageFormatProperties = (VkImageFormatProperties){
1206 .maxExtent = {0, 0, 0},
1207 .maxMipLevels = 0,
1208 .maxArrayLayers = 0,
1209 .sampleCounts = 0,
1210 .maxResourceSize = 0,
1211 };
1212
1213 return VK_ERROR_FORMAT_NOT_SUPPORTED;
1214 }
1215
1216 VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkImageTiling tiling,VkImageUsageFlags usage,VkImageCreateFlags createFlags,VkImageFormatProperties * pImageFormatProperties)1217 panvk_GetPhysicalDeviceImageFormatProperties(
1218 VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
1219 VkImageTiling tiling, VkImageUsageFlags usage,
1220 VkImageCreateFlags createFlags,
1221 VkImageFormatProperties *pImageFormatProperties)
1222 {
1223 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1224
1225 const VkPhysicalDeviceImageFormatInfo2 info = {
1226 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
1227 .pNext = NULL,
1228 .format = format,
1229 .type = type,
1230 .tiling = tiling,
1231 .usage = usage,
1232 .flags = createFlags,
1233 };
1234
1235 return get_image_format_properties(physical_device, &info,
1236 pImageFormatProperties, NULL);
1237 }
1238
1239 static VkResult
panvk_get_external_image_format_properties(const struct panvk_physical_device * physical_device,const VkPhysicalDeviceImageFormatInfo2 * pImageFormatInfo,VkExternalMemoryHandleTypeFlagBits handleType,VkExternalMemoryProperties * external_properties)1240 panvk_get_external_image_format_properties(
1241 const struct panvk_physical_device *physical_device,
1242 const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
1243 VkExternalMemoryHandleTypeFlagBits handleType,
1244 VkExternalMemoryProperties *external_properties)
1245 {
1246 VkExternalMemoryFeatureFlagBits flags = 0;
1247 VkExternalMemoryHandleTypeFlags export_flags = 0;
1248 VkExternalMemoryHandleTypeFlags compat_flags = 0;
1249
1250 /* From the Vulkan 1.1.98 spec:
1251 *
1252 * If handleType is not compatible with the format, type, tiling,
1253 * usage, and flags specified in VkPhysicalDeviceImageFormatInfo2,
1254 * then vkGetPhysicalDeviceImageFormatProperties2 returns
1255 * VK_ERROR_FORMAT_NOT_SUPPORTED.
1256 */
1257 switch (handleType) {
1258 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
1259 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1260 switch (pImageFormatInfo->type) {
1261 case VK_IMAGE_TYPE_2D:
1262 flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT |
1263 VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
1264 VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1265 compat_flags = export_flags =
1266 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
1267 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
1268 break;
1269 default:
1270 return vk_errorf(
1271 physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1272 "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)",
1273 handleType, pImageFormatInfo->type);
1274 }
1275 break;
1276 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
1277 flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
1278 compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
1279 break;
1280 default:
1281 return vk_errorf(physical_device, VK_ERROR_FORMAT_NOT_SUPPORTED,
1282 "VkExternalMemoryTypeFlagBits(0x%x) unsupported",
1283 handleType);
1284 }
1285
1286 *external_properties = (VkExternalMemoryProperties){
1287 .externalMemoryFeatures = flags,
1288 .exportFromImportedHandleTypes = export_flags,
1289 .compatibleHandleTypes = compat_flags,
1290 };
1291
1292 return VK_SUCCESS;
1293 }
1294
1295 VKAPI_ATTR VkResult VKAPI_CALL
panvk_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceImageFormatInfo2 * base_info,VkImageFormatProperties2 * base_props)1296 panvk_GetPhysicalDeviceImageFormatProperties2(
1297 VkPhysicalDevice physicalDevice,
1298 const VkPhysicalDeviceImageFormatInfo2 *base_info,
1299 VkImageFormatProperties2 *base_props)
1300 {
1301 VK_FROM_HANDLE(panvk_physical_device, physical_device, physicalDevice);
1302 const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
1303 const VkPhysicalDeviceImageViewImageFormatInfoEXT *image_view_info = NULL;
1304 VkExternalImageFormatProperties *external_props = NULL;
1305 VkFilterCubicImageViewImageFormatPropertiesEXT *cubic_props = NULL;
1306 VkFormatFeatureFlags format_feature_flags;
1307 VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
1308 VkResult result;
1309
1310 result = get_image_format_properties(physical_device, base_info,
1311 &base_props->imageFormatProperties,
1312 &format_feature_flags);
1313 if (result != VK_SUCCESS)
1314 return result;
1315
1316 /* Extract input structs */
1317 vk_foreach_struct_const(s, base_info->pNext) {
1318 switch (s->sType) {
1319 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
1320 external_info = (const void *)s;
1321 break;
1322 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT:
1323 image_view_info = (const void *)s;
1324 break;
1325 default:
1326 break;
1327 }
1328 }
1329
1330 /* Extract output structs */
1331 vk_foreach_struct(s, base_props->pNext) {
1332 switch (s->sType) {
1333 case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
1334 external_props = (void *)s;
1335 break;
1336 case VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT:
1337 cubic_props = (void *)s;
1338 break;
1339 case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
1340 ycbcr_props = (void *)s;
1341 break;
1342 default:
1343 break;
1344 }
1345 }
1346
1347 /* From the Vulkan 1.0.42 spec:
1348 *
1349 * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
1350 * behave as if VkPhysicalDeviceExternalImageFormatInfo was not
1351 * present and VkExternalImageFormatProperties will be ignored.
1352 */
1353 if (external_info && external_info->handleType != 0) {
1354 VkExternalImageFormatProperties fallback_external_props;
1355
1356 if (!external_props) {
1357 memset(&fallback_external_props, 0, sizeof(fallback_external_props));
1358 external_props = &fallback_external_props;
1359 }
1360
1361 result = panvk_get_external_image_format_properties(
1362 physical_device, base_info, external_info->handleType,
1363 &external_props->externalMemoryProperties);
1364 if (result != VK_SUCCESS)
1365 goto fail;
1366 }
1367
1368 if (cubic_props) {
1369 /* note: blob only allows cubic filtering for 2D and 2D array views
1370 * its likely we can enable it for 1D and CUBE, needs testing however
1371 */
1372 if ((image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D ||
1373 image_view_info->imageViewType == VK_IMAGE_VIEW_TYPE_2D_ARRAY) &&
1374 (format_feature_flags &
1375 VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT)) {
1376 cubic_props->filterCubic = true;
1377 cubic_props->filterCubicMinmax = true;
1378 } else {
1379 cubic_props->filterCubic = false;
1380 cubic_props->filterCubicMinmax = false;
1381 }
1382 }
1383
1384 if (ycbcr_props)
1385 ycbcr_props->combinedImageSamplerDescriptorCount = 1;
1386
1387 return VK_SUCCESS;
1388
1389 fail:
1390 if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
1391 /* From the Vulkan 1.0.42 spec:
1392 *
1393 * If the combination of parameters to
1394 * vkGetPhysicalDeviceImageFormatProperties2 is not supported by
1395 * the implementation for use in vkCreateImage, then all members of
1396 * imageFormatProperties will be filled with zero.
1397 */
1398 base_props->imageFormatProperties = (VkImageFormatProperties){};
1399 }
1400
1401 return result;
1402 }
1403
1404 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice,VkFormat format,VkImageType type,VkSampleCountFlagBits samples,VkImageUsageFlags usage,VkImageTiling tiling,uint32_t * pNumProperties,VkSparseImageFormatProperties * pProperties)1405 panvk_GetPhysicalDeviceSparseImageFormatProperties(
1406 VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type,
1407 VkSampleCountFlagBits samples, VkImageUsageFlags usage, VkImageTiling tiling,
1408 uint32_t *pNumProperties, VkSparseImageFormatProperties *pProperties)
1409 {
1410 /* Sparse images are not yet supported. */
1411 *pNumProperties = 0;
1412 }
1413
1414 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceSparseImageFormatProperties2(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceSparseImageFormatInfo2 * pFormatInfo,uint32_t * pPropertyCount,VkSparseImageFormatProperties2 * pProperties)1415 panvk_GetPhysicalDeviceSparseImageFormatProperties2(
1416 VkPhysicalDevice physicalDevice,
1417 const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
1418 uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
1419 {
1420 /* Sparse images are not yet supported. */
1421 *pPropertyCount = 0;
1422 }
1423
1424 VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceExternalBufferProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalBufferInfo * pExternalBufferInfo,VkExternalBufferProperties * pExternalBufferProperties)1425 panvk_GetPhysicalDeviceExternalBufferProperties(
1426 VkPhysicalDevice physicalDevice,
1427 const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
1428 VkExternalBufferProperties *pExternalBufferProperties)
1429 {
1430 panvk_stub();
1431 }
1432