xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on v3dv driver which is:
8  * Copyright © 2019 Raspberry Pi
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE.
28  */
29 
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41 
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pvr_bo.h"
45 #include "pvr_border.h"
46 #include "pvr_clear.h"
47 #include "pvr_csb.h"
48 #include "pvr_csb_enum_helpers.h"
49 #include "pvr_debug.h"
50 #include "pvr_device_info.h"
51 #include "pvr_dump_info.h"
52 #include "pvr_hardcode.h"
53 #include "pvr_job_render.h"
54 #include "pvr_limits.h"
55 #include "pvr_pds.h"
56 #include "pvr_private.h"
57 #include "pvr_robustness.h"
58 #include "pvr_tex_state.h"
59 #include "pvr_types.h"
60 #include "pvr_uscgen.h"
61 #include "pvr_util.h"
62 #include "pvr_winsys.h"
63 #include "rogue/rogue.h"
64 #include "util/build_id.h"
65 #include "util/log.h"
66 #include "util/macros.h"
67 #include "util/mesa-sha1.h"
68 #include "util/os_misc.h"
69 #include "util/u_dynarray.h"
70 #include "util/u_math.h"
71 #include "vk_alloc.h"
72 #include "vk_extensions.h"
73 #include "vk_log.h"
74 #include "vk_object.h"
75 #include "vk_physical_device_features.h"
76 #include "vk_physical_device_properties.h"
77 #include "vk_sampler.h"
78 #include "vk_util.h"
79 
80 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
81 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
83 
84 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
85  * devices will have a smaller global free list size, as usually this use-case
86  * implies smaller amounts of work spread out. The free list can still grow as
87  * required.
88  */
89 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
90 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
91 
92 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
93  * been rounded up since the percentage is treated as an integer.
94  */
95 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
96 
97 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
98 #   define PVR_USE_WSI_PLATFORM_DISPLAY true
99 #else
100 #   define PVR_USE_WSI_PLATFORM_DISPLAY false
101 #endif
102 
103 #if PVR_USE_WSI_PLATFORM_DISPLAY
104 #   define PVR_USE_WSI_PLATFORM true
105 #else
106 #   define PVR_USE_WSI_PLATFORM false
107 #endif
108 
109 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
110 
111 /* Amount of padding required for VkBuffers to ensure we don't read beyond
112  * a page boundary.
113  */
114 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
115 
116 /* Default size in bytes used by pvr_CreateDevice() for setting up the
117  * suballoc_general, suballoc_pds and suballoc_usc suballocators.
118  *
119  * TODO: Investigate if a different default size can improve the overall
120  * performance of internal driver allocations.
121  */
122 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
123 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
127 
128 struct pvr_drm_device_config {
129    struct pvr_drm_device_info {
130       const char *name;
131       size_t len;
132    } render, display;
133 };
134 
135 #define DEF_CONFIG(render_, display_)                               \
136    {                                                                \
137       .render = { .name = render_, .len = sizeof(render_) - 1 },    \
138       .display = { .name = display_, .len = sizeof(display_) - 1 }, \
139    }
140 
141 /* This is the list of supported DRM render/display driver configs. */
142 static const struct pvr_drm_device_config pvr_drm_configs[] = {
143    DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
144    DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
145 };
146 
147 #undef DEF_CONFIG
148 
149 static const struct vk_instance_extension_table pvr_instance_extensions = {
150    .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
151    .KHR_external_fence_capabilities = true,
152    .KHR_external_memory_capabilities = true,
153    .KHR_external_semaphore_capabilities = true,
154    .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
155    .KHR_get_physical_device_properties2 = true,
156    .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
157    .KHR_surface = PVR_USE_WSI_PLATFORM,
158 #ifndef VK_USE_PLATFORM_WIN32_KHR
159    .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
160 #endif
161    .EXT_debug_report = true,
162    .EXT_debug_utils = true,
163 };
164 
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)165 static void pvr_physical_device_get_supported_extensions(
166    struct vk_device_extension_table *extensions)
167 {
168    *extensions = (struct vk_device_extension_table){
169       .KHR_bind_memory2 = true,
170       .KHR_copy_commands2 = true,
171       /* TODO: enable this extension when the conformance tests get
172        * updated to version 1.3.6.0, the current version does not
173        * include the imagination driver ID, which will make a dEQP
174        * test fail
175        */
176       .KHR_driver_properties = false,
177       .KHR_external_fence = true,
178       .KHR_external_fence_fd = true,
179       .KHR_external_memory = true,
180       .KHR_external_memory_fd = true,
181       .KHR_format_feature_flags2 = true,
182       .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
183       .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
184       .KHR_get_memory_requirements2 = true,
185       .KHR_image_format_list = true,
186       .KHR_index_type_uint8 = true,
187       .KHR_shader_expect_assume = true,
188       .KHR_swapchain = PVR_USE_WSI_PLATFORM,
189       .KHR_timeline_semaphore = true,
190       .KHR_uniform_buffer_standard_layout = true,
191       .EXT_external_memory_dma_buf = true,
192       .EXT_host_query_reset = true,
193       .EXT_index_type_uint8 = true,
194       .EXT_memory_budget = true,
195       .EXT_private_data = true,
196       .EXT_scalar_block_layout = true,
197       .EXT_texel_buffer_alignment = true,
198       .EXT_tooling_info = true,
199    };
200 }
201 
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)202 static void pvr_physical_device_get_supported_features(
203    const struct pvr_device_info *const dev_info,
204    struct vk_features *const features)
205 {
206    *features = (struct vk_features){
207       /* Vulkan 1.0 */
208       .robustBufferAccess = true,
209       .fullDrawIndexUint32 = true,
210       .imageCubeArray = true,
211       .independentBlend = false,
212       .geometryShader = false,
213       .tessellationShader = false,
214       .sampleRateShading = true,
215       .dualSrcBlend = false,
216       .logicOp = false,
217       .multiDrawIndirect = true,
218       .drawIndirectFirstInstance = true,
219       .depthClamp = true,
220       .depthBiasClamp = true,
221       .fillModeNonSolid = false,
222       .depthBounds = false,
223       .wideLines = true,
224       .largePoints = true,
225       .alphaToOne = false,
226       .multiViewport = false,
227       .samplerAnisotropy = false,
228       .textureCompressionETC2 = true,
229       .textureCompressionASTC_LDR = false,
230       .textureCompressionBC = false,
231       .occlusionQueryPrecise = false,
232       .pipelineStatisticsQuery = false,
233       .vertexPipelineStoresAndAtomics = true,
234       .fragmentStoresAndAtomics = true,
235       .shaderTessellationAndGeometryPointSize = false,
236       .shaderImageGatherExtended = false,
237       .shaderStorageImageExtendedFormats = true,
238       .shaderStorageImageMultisample = false,
239       .shaderStorageImageReadWithoutFormat = true,
240       .shaderStorageImageWriteWithoutFormat = false,
241       .shaderUniformBufferArrayDynamicIndexing = true,
242       .shaderSampledImageArrayDynamicIndexing = true,
243       .shaderStorageBufferArrayDynamicIndexing = true,
244       .shaderStorageImageArrayDynamicIndexing = true,
245       .shaderClipDistance = false,
246       .shaderCullDistance = false,
247       .shaderFloat64 = false,
248       .shaderInt64 = true,
249       .shaderInt16 = true,
250       .shaderResourceResidency = false,
251       .shaderResourceMinLod = false,
252       .sparseBinding = false,
253       .sparseResidencyBuffer = false,
254       .sparseResidencyImage2D = false,
255       .sparseResidencyImage3D = false,
256       .sparseResidency2Samples = false,
257       .sparseResidency4Samples = false,
258       .sparseResidency8Samples = false,
259       .sparseResidency16Samples = false,
260       .sparseResidencyAliased = false,
261       .variableMultisampleRate = false,
262       .inheritedQueries = false,
263 
264       /* VK_KHR_index_type_uint8 */
265       .indexTypeUint8 = true,
266 
267       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
268       .timelineSemaphore = true,
269 
270       /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
271       .uniformBufferStandardLayout = true,
272 
273       /* Vulkan 1.2 / VK_EXT_host_query_reset */
274       .hostQueryReset = true,
275 
276       /* Vulkan 1.3 / VK_EXT_private_data */
277       .privateData = true,
278 
279       /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
280       .scalarBlockLayout = true,
281 
282       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
283       .texelBufferAlignment = true,
284 
285       /* VK_KHR_shader_expect_assume */
286       .shaderExpectAssume = true,
287    };
288 }
289 
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])290 static bool pvr_physical_device_init_pipeline_cache_uuid(
291    const struct pvr_device_info *const dev_info,
292    uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
293 {
294    struct mesa_sha1 sha1_ctx;
295    unsigned build_id_len;
296    uint8_t sha1[20];
297    uint64_t bvnc;
298 
299    const struct build_id_note *note =
300       build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
301    if (!note) {
302       mesa_loge("Failed to find build-id");
303       return false;
304    }
305 
306    build_id_len = build_id_length(note);
307    if (build_id_len < 20) {
308       mesa_loge("Build-id too short. It needs to be a SHA");
309       return false;
310    }
311 
312    bvnc = pvr_get_packed_bvnc(dev_info);
313 
314    _mesa_sha1_init(&sha1_ctx);
315    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
316    _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
317    _mesa_sha1_final(&sha1_ctx, sha1);
318    memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
319 
320    return true;
321 }
322 
323 struct pvr_descriptor_limits {
324    uint32_t max_per_stage_resources;
325    uint32_t max_per_stage_samplers;
326    uint32_t max_per_stage_uniform_buffers;
327    uint32_t max_per_stage_storage_buffers;
328    uint32_t max_per_stage_sampled_images;
329    uint32_t max_per_stage_storage_images;
330    uint32_t max_per_stage_input_attachments;
331 };
332 
333 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)334 pvr_get_physical_device_descriptor_limits(
335    const struct pvr_device_info *dev_info,
336    const struct pvr_device_runtime_info *dev_runtime_info)
337 {
338    enum pvr_descriptor_cs_level {
339       /* clang-format off */
340       CS4096, /* 6XT and some XE cores with large CS. */
341       CS2560, /* Mid range Rogue XE cores. */
342       CS2048, /* Low end Rogue XE cores. */
343       CS1536, /* Ultra-low-end 9XEP. */
344       CS680,  /* lower limits for older devices. */
345       CS408,  /* 7XE. */
346       /* clang-format on */
347    };
348 
349    static const struct pvr_descriptor_limits descriptor_limits[] = {
350       [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
351       [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
352       [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
353       [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
354       [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
355       [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
356    };
357 
358    const uint32_t common_size =
359       pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
360                                                  dev_runtime_info,
361                                                  UINT32_MAX,
362                                                  1);
363    enum pvr_descriptor_cs_level cs_level;
364 
365    if (common_size >= 2048) {
366       cs_level = CS2048;
367    } else if (common_size >= 1526) {
368       cs_level = CS1536;
369    } else if (common_size >= 680) {
370       cs_level = CS680;
371    } else if (common_size >= 408) {
372       cs_level = CS408;
373    } else {
374       mesa_loge("This core appears to have a very limited amount of shared "
375                 "register space and may not meet the Vulkan spec limits.");
376       abort();
377    }
378 
379    return &descriptor_limits[cs_level];
380 }
381 
pvr_physical_device_get_properties(const struct pvr_physical_device * const pdevice,struct vk_properties * const properties)382 static bool pvr_physical_device_get_properties(
383    const struct pvr_physical_device *const pdevice,
384    struct vk_properties *const properties)
385 {
386    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
387    const struct pvr_device_runtime_info *const dev_runtime_info =
388       &pdevice->dev_runtime_info;
389    const struct pvr_descriptor_limits *descriptor_limits =
390       pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
391 
392    /* Default value based on the minimum value found in all existing cores. */
393    const uint32_t max_multisample =
394       PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
395 
396    /* Default value based on the minimum value found in all existing cores. */
397    const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
398 
399    /* Default value based on the minimum value found in all existing cores. */
400    const uint32_t uvs_pba_entries =
401       PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
402 
403    /* Default value based on the minimum value found in all existing cores. */
404    const uint32_t num_user_clip_planes =
405       PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
406 
407    const uint32_t sub_pixel_precision =
408       PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
409 
410    const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
411 
412    const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
413 
414    const uint32_t max_user_vertex_components =
415       ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
416 
417    /* The workgroup invocations are limited by the case where we have a compute
418     * barrier - each slot has a fixed number of invocations, the whole workgroup
419     * may need to span multiple slots. As each slot will WAIT at the barrier
420     * until the last invocation completes, all have to be schedulable at the
421     * same time.
422     *
423     * Typically all Rogue cores have 16 slots. Some of the smallest cores are
424     * reduced to 14.
425     *
426     * The compute barrier slot exhaustion scenario can be tested with:
427     * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
428     *    .atomicwrite*guard*comp
429     */
430 
431    /* Default value based on the minimum value found in all existing cores. */
432    const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
433 
434    /* Default value based on the minimum value found in all existing cores. */
435    const uint32_t max_instances_per_pds_task =
436       PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
437 
438    const uint32_t max_compute_work_group_invocations =
439       (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
440 
441    bool ret;
442 
443    *properties = (struct vk_properties){
444       /* Vulkan 1.0 */
445       .apiVersion = PVR_API_VERSION,
446       .driverVersion = vk_get_driver_version(),
447       .vendorID = VK_VENDOR_ID_IMAGINATION,
448       .deviceID = dev_info->ident.device_id,
449       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
450       /* deviceName and pipelineCacheUUID are filled below .*/
451 
452       .maxImageDimension1D = max_render_size,
453       .maxImageDimension2D = max_render_size,
454       .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
455       .maxImageDimensionCube = max_render_size,
456       .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
457       .maxTexelBufferElements = 64U * 1024U,
458       .maxUniformBufferRange = 128U * 1024U * 1024U,
459       .maxStorageBufferRange = 128U * 1024U * 1024U,
460       .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
461       .maxMemoryAllocationCount = UINT32_MAX,
462       .maxSamplerAllocationCount = UINT32_MAX,
463       .bufferImageGranularity = 1U,
464       .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
465       /* Maximum number of descriptor sets that can be bound simultaneously. */
466       .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
467       .maxPerStageResources = descriptor_limits->max_per_stage_resources,
468       .maxPerStageDescriptorSamplers =
469          descriptor_limits->max_per_stage_samplers,
470       .maxPerStageDescriptorUniformBuffers =
471          descriptor_limits->max_per_stage_uniform_buffers,
472       .maxPerStageDescriptorStorageBuffers =
473          descriptor_limits->max_per_stage_storage_buffers,
474       .maxPerStageDescriptorSampledImages =
475          descriptor_limits->max_per_stage_sampled_images,
476       .maxPerStageDescriptorStorageImages =
477          descriptor_limits->max_per_stage_storage_images,
478       .maxPerStageDescriptorInputAttachments =
479          descriptor_limits->max_per_stage_input_attachments,
480       .maxDescriptorSetSamplers = 256U,
481       .maxDescriptorSetUniformBuffers = 256U,
482       .maxDescriptorSetUniformBuffersDynamic =
483          PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
484       .maxDescriptorSetStorageBuffers = 256U,
485       .maxDescriptorSetStorageBuffersDynamic =
486          PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
487       .maxDescriptorSetSampledImages = 256U,
488       .maxDescriptorSetStorageImages = 256U,
489       .maxDescriptorSetInputAttachments = 256U,
490 
491       /* Vertex Shader Limits */
492       .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
493       .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
494       .maxVertexInputAttributeOffset = 0xFFFF,
495       .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
496       .maxVertexOutputComponents = max_user_vertex_components,
497 
498       /* Tessellation Limits */
499       .maxTessellationGenerationLevel = 0,
500       .maxTessellationPatchSize = 0,
501       .maxTessellationControlPerVertexInputComponents = 0,
502       .maxTessellationControlPerVertexOutputComponents = 0,
503       .maxTessellationControlPerPatchOutputComponents = 0,
504       .maxTessellationControlTotalOutputComponents = 0,
505       .maxTessellationEvaluationInputComponents = 0,
506       .maxTessellationEvaluationOutputComponents = 0,
507 
508       /* Geometry Shader Limits */
509       .maxGeometryShaderInvocations = 0,
510       .maxGeometryInputComponents = 0,
511       .maxGeometryOutputComponents = 0,
512       .maxGeometryOutputVertices = 0,
513       .maxGeometryTotalOutputComponents = 0,
514 
515       /* Fragment Shader Limits */
516       .maxFragmentInputComponents = max_user_vertex_components,
517       .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
518       .maxFragmentDualSrcAttachments = 0,
519       .maxFragmentCombinedOutputResources =
520          descriptor_limits->max_per_stage_storage_buffers +
521          descriptor_limits->max_per_stage_storage_images +
522          PVR_MAX_COLOR_ATTACHMENTS,
523 
524       /* Compute Shader Limits */
525       .maxComputeSharedMemorySize = 16U * 1024U,
526       .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
527       .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
528       .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
529                                    max_compute_work_group_invocations,
530                                    64U },
531 
532       /* Rasterization Limits */
533       .subPixelPrecisionBits = sub_pixel_precision,
534       .subTexelPrecisionBits = 8U,
535       .mipmapPrecisionBits = 8U,
536 
537       .maxDrawIndexedIndexValue = UINT32_MAX,
538       .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
539       .maxSamplerLodBias = 16.0f,
540       .maxSamplerAnisotropy = 1.0f,
541       .maxViewports = PVR_MAX_VIEWPORTS,
542 
543       .maxViewportDimensions[0] = max_render_size,
544       .maxViewportDimensions[1] = max_render_size,
545       .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
546       .viewportBoundsRange[1] = 2U * max_render_size,
547 
548       .viewportSubPixelBits = 0,
549       .minMemoryMapAlignment = pdevice->ws->page_size,
550       .minTexelBufferOffsetAlignment = 16U,
551       .minUniformBufferOffsetAlignment = 4U,
552       .minStorageBufferOffsetAlignment = 4U,
553 
554       .minTexelOffset = -8,
555       .maxTexelOffset = 7U,
556       .minTexelGatherOffset = -8,
557       .maxTexelGatherOffset = 7,
558       .minInterpolationOffset = -0.5,
559       .maxInterpolationOffset = 0.5,
560       .subPixelInterpolationOffsetBits = 4U,
561 
562       .maxFramebufferWidth = max_render_size,
563       .maxFramebufferHeight = max_render_size,
564       .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
565 
566       .framebufferColorSampleCounts = max_sample_bits,
567       .framebufferDepthSampleCounts = max_sample_bits,
568       .framebufferStencilSampleCounts = max_sample_bits,
569       .framebufferNoAttachmentsSampleCounts = max_sample_bits,
570       .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
571       .sampledImageColorSampleCounts = max_sample_bits,
572       .sampledImageIntegerSampleCounts = max_sample_bits,
573       .sampledImageDepthSampleCounts = max_sample_bits,
574       .sampledImageStencilSampleCounts = max_sample_bits,
575       .storageImageSampleCounts = max_sample_bits,
576       .maxSampleMaskWords = 1U,
577       .timestampComputeAndGraphics = false,
578       .timestampPeriod = 0.0f,
579       .maxClipDistances = num_user_clip_planes,
580       .maxCullDistances = num_user_clip_planes,
581       .maxCombinedClipAndCullDistances = num_user_clip_planes,
582       .discreteQueuePriorities = 2U,
583       .pointSizeRange[0] = 1.0f,
584       .pointSizeRange[1] = 511.0f,
585       .pointSizeGranularity = 0.0625f,
586       .lineWidthRange[0] = 1.0f / 16.0f,
587       .lineWidthRange[1] = 16.0f,
588       .lineWidthGranularity = 1.0f / 16.0f,
589       .strictLines = false,
590       .standardSampleLocations = true,
591       .optimalBufferCopyOffsetAlignment = 4U,
592       .optimalBufferCopyRowPitchAlignment = 4U,
593       .nonCoherentAtomSize = 1U,
594 
595       /* Vulkan 1.2 / VK_KHR_driver_properties */
596       .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
597       .driverName = "Imagination open-source Mesa driver",
598       .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
599       .conformanceVersion = {
600          .major = 1,
601          .minor = 3,
602          .subminor = 4,
603          .patch = 1,
604       },
605 
606       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
607       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
608 
609       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
610       .storageTexelBufferOffsetAlignmentBytes = 16,
611       .storageTexelBufferOffsetSingleTexelAlignment = true,
612       .uniformTexelBufferOffsetAlignmentBytes = 16,
613       .uniformTexelBufferOffsetSingleTexelAlignment = false,
614    };
615 
616    snprintf(properties->deviceName,
617             sizeof(properties->deviceName),
618             "Imagination PowerVR %s %s",
619             dev_info->ident.series_name,
620             dev_info->ident.public_name);
621 
622    ret = pvr_physical_device_init_pipeline_cache_uuid(
623       dev_info,
624       properties->pipelineCacheUUID);
625    if (!ret)
626       return false;
627 
628    return true;
629 }
630 
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)631 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
632 {
633    *pApiVersion = PVR_API_VERSION;
634    return VK_SUCCESS;
635 }
636 
637 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)638 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
639                                          uint32_t *pPropertyCount,
640                                          VkExtensionProperties *pProperties)
641 {
642    if (pLayerName)
643       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
644 
645    return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
646                                                      pPropertyCount,
647                                                      pProperties);
648 }
649 
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)650 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
651 {
652    struct pvr_physical_device *pdevice =
653       container_of(vk_pdevice, struct pvr_physical_device, vk);
654 
655    /* Be careful here. The device might not have been initialized. This can
656     * happen since initialization is done in vkEnumeratePhysicalDevices() but
657     * finish is done in vkDestroyInstance(). Make sure that you check for NULL
658     * before freeing or that the freeing functions accept NULL pointers.
659     */
660 
661    if (pdevice->compiler)
662       ralloc_free(pdevice->compiler);
663 
664    pvr_wsi_finish(pdevice);
665 
666    if (pdevice->ws)
667       pvr_winsys_destroy(pdevice->ws);
668 
669    vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
670    vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
671 
672    vk_physical_device_finish(&pdevice->vk);
673 
674    vk_free(&pdevice->vk.instance->alloc, pdevice);
675 }
676 
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)677 void pvr_DestroyInstance(VkInstance _instance,
678                          const VkAllocationCallbacks *pAllocator)
679 {
680    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
681 
682    if (!instance)
683       return;
684 
685    VG(VALGRIND_DESTROY_MEMPOOL(instance));
686 
687    vk_instance_finish(&instance->vk);
688    vk_free(&instance->vk.alloc, instance);
689 }
690 
pvr_compute_heap_size(void)691 static uint64_t pvr_compute_heap_size(void)
692 {
693    /* Query the total ram from the system */
694    uint64_t total_ram;
695    if (!os_get_total_physical_memory(&total_ram))
696       return 0;
697 
698    /* We don't want to burn too much ram with the GPU. If the user has 4GiB
699     * or less, we use at most half. If they have more than 4GiB, we use 3/4.
700     */
701    uint64_t available_ram;
702    if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
703       available_ram = total_ram / 2U;
704    else
705       available_ram = total_ram * 3U / 4U;
706 
707    return available_ram;
708 }
709 
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)710 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
711                                          struct pvr_instance *instance,
712                                          drmDevicePtr drm_render_device,
713                                          drmDevicePtr drm_display_device)
714 {
715    struct vk_physical_device_dispatch_table dispatch_table;
716    struct vk_device_extension_table supported_extensions;
717    struct vk_properties supported_properties;
718    struct vk_features supported_features;
719    struct pvr_winsys *ws;
720    char *display_path;
721    char *render_path;
722    VkResult result;
723 
724    if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
725       return vk_errorf(instance,
726                        VK_ERROR_INCOMPATIBLE_DRIVER,
727                        "WARNING: powervr is not a conformant Vulkan "
728                        "implementation. Pass "
729                        "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
730                        "what you're doing.");
731    }
732 
733    render_path = vk_strdup(&instance->vk.alloc,
734                            drm_render_device->nodes[DRM_NODE_RENDER],
735                            VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
736    if (!render_path) {
737       result = VK_ERROR_OUT_OF_HOST_MEMORY;
738       goto err_out;
739    }
740 
741    if (instance->vk.enabled_extensions.KHR_display) {
742       display_path = vk_strdup(&instance->vk.alloc,
743                                drm_display_device->nodes[DRM_NODE_PRIMARY],
744                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
745       if (!display_path) {
746          result = VK_ERROR_OUT_OF_HOST_MEMORY;
747          goto err_vk_free_render_path;
748       }
749    } else {
750       display_path = NULL;
751    }
752 
753    result =
754       pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
755    if (result != VK_SUCCESS)
756       goto err_vk_free_display_path;
757 
758    pdevice->instance = instance;
759    pdevice->render_path = render_path;
760    pdevice->display_path = display_path;
761    pdevice->ws = ws;
762 
763    result = ws->ops->device_info_init(ws,
764                                       &pdevice->dev_info,
765                                       &pdevice->dev_runtime_info);
766    if (result != VK_SUCCESS)
767       goto err_pvr_winsys_destroy;
768 
769    pvr_physical_device_get_supported_extensions(&supported_extensions);
770    pvr_physical_device_get_supported_features(&pdevice->dev_info,
771                                               &supported_features);
772    if (!pvr_physical_device_get_properties(pdevice, &supported_properties)) {
773       result = vk_errorf(instance,
774                          VK_ERROR_INITIALIZATION_FAILED,
775                          "Failed to collect physical device properties");
776       goto err_pvr_winsys_destroy;
777    }
778 
779    vk_physical_device_dispatch_table_from_entrypoints(
780       &dispatch_table,
781       &pvr_physical_device_entrypoints,
782       true);
783 
784    vk_physical_device_dispatch_table_from_entrypoints(
785       &dispatch_table,
786       &wsi_physical_device_entrypoints,
787       false);
788 
789    result = vk_physical_device_init(&pdevice->vk,
790                                     &instance->vk,
791                                     &supported_extensions,
792                                     &supported_features,
793                                     &supported_properties,
794                                     &dispatch_table);
795    if (result != VK_SUCCESS)
796       goto err_pvr_winsys_destroy;
797 
798    pdevice->vk.supported_sync_types = ws->sync_types;
799 
800    /* Setup available memory heaps and types */
801    pdevice->memory.memoryHeapCount = 1;
802    pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
803    pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
804 
805    pdevice->memory.memoryTypeCount = 1;
806    pdevice->memory.memoryTypes[0].propertyFlags =
807       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
808       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
809       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
810    pdevice->memory.memoryTypes[0].heapIndex = 0;
811 
812    result = pvr_wsi_init(pdevice);
813    if (result != VK_SUCCESS) {
814       vk_error(instance, result);
815       goto err_vk_physical_device_finish;
816    }
817 
818    pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
819    if (!pdevice->compiler) {
820       result = vk_errorf(instance,
821                          VK_ERROR_INITIALIZATION_FAILED,
822                          "Failed to initialize Rogue compiler");
823       goto err_wsi_finish;
824    }
825 
826    return VK_SUCCESS;
827 
828 err_wsi_finish:
829    pvr_wsi_finish(pdevice);
830 
831 err_vk_physical_device_finish:
832    vk_physical_device_finish(&pdevice->vk);
833 
834 err_pvr_winsys_destroy:
835    pvr_winsys_destroy(ws);
836 
837 err_vk_free_display_path:
838    vk_free(&instance->vk.alloc, display_path);
839 
840 err_vk_free_render_path:
841    vk_free(&instance->vk.alloc, render_path);
842 
843 err_out:
844    return result;
845 }
846 
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)847 static VkResult pvr_get_drm_devices(void *const obj,
848                                     drmDevicePtr *const devices,
849                                     const int max_devices,
850                                     int *const num_devices_out)
851 {
852    int ret = drmGetDevices2(0, devices, max_devices);
853    if (ret < 0) {
854       return vk_errorf(obj,
855                        VK_ERROR_INITIALIZATION_FAILED,
856                        "Failed to enumerate drm devices (errno %d: %s)",
857                        -ret,
858                        strerror(-ret));
859    }
860 
861    if (num_devices_out)
862       *num_devices_out = ret;
863 
864    return VK_SUCCESS;
865 }
866 
867 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)868 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
869                           drmDevice *const drm_dev)
870 {
871    char **const compatible = drm_dev->deviceinfo.platform->compatible;
872 
873    for (char **compat = compatible; *compat; compat++) {
874       if (strncmp(*compat, info->name, info->len) == 0)
875          return true;
876    }
877 
878    return false;
879 }
880 
881 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)882 pvr_drm_device_get_config(drmDevice *const drm_dev)
883 {
884    for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
885       if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
886          return &pvr_drm_configs[i];
887    }
888 
889    return NULL;
890 }
891 
892 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)893 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
894                               char *const *comp_display,
895                               char *const *comp_render)
896 {
897    drmVersionPtr version_display, version_render;
898    struct pvr_device_dump_info info;
899 
900    version_display = drmGetVersion(pdevice->ws->display_fd);
901    if (!version_display)
902       return;
903 
904    version_render = drmGetVersion(pdevice->ws->render_fd);
905    if (!version_render) {
906       drmFreeVersion(version_display);
907       return;
908    }
909 
910    info.device_info = &pdevice->dev_info;
911    info.device_runtime_info = &pdevice->dev_runtime_info;
912    info.drm_display.patchlevel = version_display->version_patchlevel;
913    info.drm_display.major = version_display->version_major;
914    info.drm_display.minor = version_display->version_minor;
915    info.drm_display.name = version_display->name;
916    info.drm_display.date = version_display->date;
917    info.drm_display.comp = comp_display;
918    info.drm_render.patchlevel = version_render->version_patchlevel;
919    info.drm_render.major = version_render->version_major;
920    info.drm_render.minor = version_render->version_minor;
921    info.drm_render.name = version_render->name;
922    info.drm_render.date = version_render->date;
923    info.drm_render.comp = comp_render;
924 
925    pvr_dump_physical_device_info(&info);
926 
927    drmFreeVersion(version_display);
928    drmFreeVersion(version_render);
929 }
930 
931 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)932 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
933 {
934    struct pvr_instance *const instance =
935       container_of(vk_instance, struct pvr_instance, vk);
936 
937    const struct pvr_drm_device_config *config = NULL;
938 
939    drmDevicePtr drm_display_device = NULL;
940    drmDevicePtr drm_render_device = NULL;
941    struct pvr_physical_device *pdevice;
942    drmDevicePtr *drm_devices;
943    int num_drm_devices = 0;
944    VkResult result;
945 
946    result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
947    if (result != VK_SUCCESS)
948       goto out;
949 
950    if (num_drm_devices == 0) {
951       result = VK_SUCCESS;
952       goto out;
953    }
954 
955    drm_devices = vk_alloc(&vk_instance->alloc,
956                           sizeof(*drm_devices) * num_drm_devices,
957                           8,
958                           VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
959    if (!drm_devices) {
960       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
961       goto out;
962    }
963 
964    result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
965    if (result != VK_SUCCESS)
966       goto out_free_drm_device_ptrs;
967 
968    /* First search for our render node... */
969    for (int i = 0; i < num_drm_devices; i++) {
970       drmDevice *const drm_dev = drm_devices[i];
971 
972       if (drm_dev->bustype != DRM_BUS_PLATFORM)
973          continue;
974 
975       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
976          continue;
977 
978       config = pvr_drm_device_get_config(drm_dev);
979       if (config) {
980          drm_render_device = drm_dev;
981          break;
982       }
983    }
984 
985    if (!config) {
986       result = VK_SUCCESS;
987       goto out_free_drm_devices;
988    }
989 
990    mesa_logd("Found compatible render device '%s'.",
991              drm_render_device->nodes[DRM_NODE_RENDER]);
992 
993    /* ...then find the compatible display node. */
994    for (int i = 0; i < num_drm_devices; i++) {
995       drmDevice *const drm_dev = drm_devices[i];
996 
997       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
998          continue;
999 
1000       if (pvr_drm_device_compatible(&config->display, drm_dev)) {
1001          drm_display_device = drm_dev;
1002          break;
1003       }
1004    }
1005 
1006    if (!drm_display_device) {
1007       mesa_loge("Render device '%s' has no compatible display device.",
1008                 drm_render_device->nodes[DRM_NODE_RENDER]);
1009       result = VK_SUCCESS;
1010       goto out_free_drm_devices;
1011    }
1012 
1013    mesa_logd("Found compatible display device '%s'.",
1014              drm_display_device->nodes[DRM_NODE_PRIMARY]);
1015 
1016    pdevice = vk_zalloc(&vk_instance->alloc,
1017                        sizeof(*pdevice),
1018                        8,
1019                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1020    if (!pdevice) {
1021       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1022       goto out_free_drm_devices;
1023    }
1024 
1025    result = pvr_physical_device_init(pdevice,
1026                                      instance,
1027                                      drm_render_device,
1028                                      drm_display_device);
1029    if (result != VK_SUCCESS) {
1030       if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1031          result = VK_SUCCESS;
1032 
1033       goto err_free_pdevice;
1034    }
1035 
1036    if (PVR_IS_DEBUG_SET(INFO)) {
1037       pvr_physical_device_dump_info(
1038          pdevice,
1039          drm_display_device->deviceinfo.platform->compatible,
1040          drm_render_device->deviceinfo.platform->compatible);
1041    }
1042 
1043    list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1044 
1045    result = VK_SUCCESS;
1046    goto out_free_drm_devices;
1047 
1048 err_free_pdevice:
1049    vk_free(&vk_instance->alloc, pdevice);
1050 
1051 out_free_drm_devices:
1052    drmFreeDevices(drm_devices, num_drm_devices);
1053 
1054 out_free_drm_device_ptrs:
1055    vk_free(&vk_instance->alloc, drm_devices);
1056 
1057 out:
1058    return result;
1059 }
1060 
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1061 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1062                             const VkAllocationCallbacks *pAllocator,
1063                             VkInstance *pInstance)
1064 {
1065    struct vk_instance_dispatch_table dispatch_table;
1066    struct pvr_instance *instance;
1067    VkResult result;
1068 
1069    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1070 
1071    if (!pAllocator)
1072       pAllocator = vk_default_allocator();
1073 
1074    instance = vk_alloc(pAllocator,
1075                        sizeof(*instance),
1076                        8,
1077                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1078    if (!instance)
1079       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1080 
1081    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1082                                                &pvr_instance_entrypoints,
1083                                                true);
1084 
1085    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1086                                                &wsi_instance_entrypoints,
1087                                                false);
1088 
1089    result = vk_instance_init(&instance->vk,
1090                              &pvr_instance_extensions,
1091                              &dispatch_table,
1092                              pCreateInfo,
1093                              pAllocator);
1094    if (result != VK_SUCCESS) {
1095       vk_free(pAllocator, instance);
1096       return result;
1097    }
1098 
1099    pvr_process_debug_variable();
1100 
1101    instance->active_device_count = 0;
1102 
1103    instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1104    instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1105 
1106    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1107 
1108    *pInstance = pvr_instance_to_handle(instance);
1109 
1110    return VK_SUCCESS;
1111 }
1112 
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1113 static uint32_t pvr_get_simultaneous_num_allocs(
1114    const struct pvr_device_info *dev_info,
1115    ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1116 {
1117    uint32_t min_cluster_per_phantom;
1118 
1119    if (PVR_HAS_FEATURE(dev_info, s8xe))
1120       return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1121 
1122    assert(dev_runtime_info->num_phantoms == 1);
1123    min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1124 
1125    if (min_cluster_per_phantom >= 4)
1126       return 1;
1127    else if (min_cluster_per_phantom == 2)
1128       return 2;
1129    else
1130       return 4;
1131 }
1132 
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1133 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1134    const struct pvr_device_info *dev_info,
1135    const struct pvr_device_runtime_info *dev_runtime_info,
1136    uint32_t fs_common_size,
1137    uint32_t min_tiles_in_flight)
1138 {
1139    const uint32_t available_shareds =
1140       dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1141    const uint32_t max_tiles_in_flight =
1142       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1143    uint32_t num_tile_in_flight;
1144    uint32_t num_allocs;
1145 
1146    if (fs_common_size == 0)
1147       return max_tiles_in_flight;
1148 
1149    num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1150 
1151    if (fs_common_size == UINT32_MAX) {
1152       uint32_t max_common_size = available_shareds;
1153 
1154       num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1155 
1156       if (!PVR_HAS_ERN(dev_info, 38748)) {
1157          /* Hardware needs space for one extra shared allocation. */
1158          num_allocs += 1;
1159       }
1160 
1161       /* Double resource requirements to deal with fragmentation. */
1162       max_common_size /= num_allocs * 2;
1163       max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1164       max_common_size =
1165          ROUND_DOWN_TO(max_common_size,
1166                        PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1167 
1168       return max_common_size;
1169    }
1170 
1171    num_tile_in_flight = available_shareds / (fs_common_size * 2);
1172 
1173    if (!PVR_HAS_ERN(dev_info, 38748))
1174       num_tile_in_flight -= 1;
1175 
1176    num_tile_in_flight /= num_allocs;
1177 
1178 #if MESA_DEBUG
1179    /* Validate the above result. */
1180 
1181    assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1182    num_allocs *= num_tile_in_flight;
1183 
1184    if (!PVR_HAS_ERN(dev_info, 38748)) {
1185       /* Hardware needs space for one extra shared allocation. */
1186       num_allocs += 1;
1187    }
1188 
1189    assert(fs_common_size <= available_shareds / (num_allocs * 2));
1190 #endif
1191 
1192    return MIN2(num_tile_in_flight, max_tiles_in_flight);
1193 }
1194 
1195 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1196    .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1197                  VK_QUEUE_TRANSFER_BIT,
1198    .queueCount = PVR_MAX_QUEUES,
1199    .timestampValidBits = 0,
1200    .minImageTransferGranularity = { 1, 1, 1 },
1201 };
1202 
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1203 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1204 {
1205    const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1206    const uint64_t heap_used = pdevice->heap_used;
1207    uint64_t sys_available = 0, heap_available;
1208    ASSERTED bool has_available_memory =
1209       os_get_available_system_memory(&sys_available);
1210    assert(has_available_memory);
1211 
1212    /* Let's not incite the app to starve the system: report at most 90% of
1213     * available system memory.
1214     */
1215    heap_available = sys_available * 9 / 10;
1216    return MIN2(heap_size, heap_used + heap_available);
1217 }
1218 
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1219 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1220    VkPhysicalDevice physicalDevice,
1221    uint32_t *pQueueFamilyPropertyCount,
1222    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1223 {
1224    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1225                           out,
1226                           pQueueFamilyProperties,
1227                           pQueueFamilyPropertyCount);
1228 
1229    vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1230       p->queueFamilyProperties = pvr_queue_family_properties;
1231 
1232       vk_foreach_struct (ext, p->pNext) {
1233          vk_debug_ignored_stype(ext->sType);
1234       }
1235    }
1236 }
1237 
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1238 void pvr_GetPhysicalDeviceMemoryProperties2(
1239    VkPhysicalDevice physicalDevice,
1240    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1241 {
1242    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1243 
1244    pMemoryProperties->memoryProperties = pdevice->memory;
1245 
1246    vk_foreach_struct (ext, pMemoryProperties->pNext) {
1247       switch (ext->sType) {
1248       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1249          VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1250             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1251 
1252          pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1253          pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1254 
1255          for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1256             pMemoryBudget->heapBudget[i] = 0u;
1257             pMemoryBudget->heapUsage[i] = 0u;
1258          }
1259          break;
1260       }
1261       default:
1262          vk_debug_ignored_stype(ext->sType);
1263          break;
1264       }
1265    }
1266 }
1267 
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1268 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1269                                            const char *pName)
1270 {
1271    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1272    return vk_instance_get_proc_addr(&instance->vk,
1273                                     &pvr_instance_entrypoints,
1274                                     pName);
1275 }
1276 
1277 /* With version 1+ of the loader interface the ICD should expose
1278  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1279  * apps.
1280  */
1281 PUBLIC
1282 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1283 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1284 {
1285    return pvr_GetInstanceProcAddr(instance, pName);
1286 }
1287 
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1288 VkResult pvr_pds_compute_shader_create_and_upload(
1289    struct pvr_device *device,
1290    struct pvr_pds_compute_shader_program *program,
1291    struct pvr_pds_upload *const pds_upload_out)
1292 {
1293    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1294    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1295    size_t staging_buffer_size;
1296    uint32_t *staging_buffer;
1297    uint32_t *data_buffer;
1298    uint32_t *code_buffer;
1299    VkResult result;
1300 
1301    /* Calculate how much space we'll need for the compute shader PDS program.
1302     */
1303    pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1304 
1305    /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1306     * data size being in dwords.
1307     */
1308    /* Code size is in bytes, data size in dwords. */
1309    staging_buffer_size =
1310       PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1311 
1312    staging_buffer = vk_alloc(&device->vk.alloc,
1313                              staging_buffer_size,
1314                              8U,
1315                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1316    if (!staging_buffer)
1317       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1318 
1319    data_buffer = staging_buffer;
1320    code_buffer = pvr_pds_compute_shader(program,
1321                                         data_buffer,
1322                                         PDS_GENERATE_DATA_SEGMENT,
1323                                         dev_info);
1324 
1325    pvr_pds_compute_shader(program,
1326                           code_buffer,
1327                           PDS_GENERATE_CODE_SEGMENT,
1328                           dev_info);
1329 
1330    result = pvr_gpu_upload_pds(device,
1331                                data_buffer,
1332                                program->data_size,
1333                                PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1334                                code_buffer,
1335                                program->code_size / sizeof(uint32_t),
1336                                PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1337                                cache_line_size,
1338                                pds_upload_out);
1339 
1340    vk_free(&device->vk.alloc, staging_buffer);
1341 
1342    return result;
1343 }
1344 
pvr_device_init_compute_fence_program(struct pvr_device * device)1345 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1346 {
1347    struct pvr_pds_compute_shader_program program;
1348 
1349    pvr_pds_compute_shader_program_init(&program);
1350    /* Fence kernel. */
1351    program.fence = true;
1352    program.clear_pds_barrier = true;
1353 
1354    return pvr_pds_compute_shader_create_and_upload(
1355       device,
1356       &program,
1357       &device->pds_compute_fence_program);
1358 }
1359 
pvr_device_init_compute_empty_program(struct pvr_device * device)1360 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1361 {
1362    struct pvr_pds_compute_shader_program program;
1363 
1364    pvr_pds_compute_shader_program_init(&program);
1365    program.clear_pds_barrier = true;
1366 
1367    return pvr_pds_compute_shader_create_and_upload(
1368       device,
1369       &program,
1370       &device->pds_compute_empty_program);
1371 }
1372 
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1373 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1374    struct pvr_device *device,
1375    pvr_dev_addr_t usc_addr,
1376    uint32_t shareds,
1377    uint32_t temps,
1378    pvr_dev_addr_t shareds_buffer_addr,
1379    struct pvr_pds_upload *const upload_out,
1380    struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1381 {
1382    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1383    struct pvr_pds_vertex_shader_sa_program program = {
1384       .kick_usc = true,
1385       .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1386    };
1387    size_t staging_buffer_size;
1388    uint32_t *staging_buffer;
1389    VkResult result;
1390 
1391    /* We'll need to DMA the shareds into the USC's Common Store. */
1392    program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1393                                                     program.dma_address,
1394                                                     0,
1395                                                     shareds,
1396                                                     shareds_buffer_addr.addr,
1397                                                     false,
1398                                                     dev_info);
1399 
1400    /* DMA temp regs. */
1401    pvr_pds_setup_doutu(&program.usc_task_control,
1402                        usc_addr.addr,
1403                        temps,
1404                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1405                        false);
1406 
1407    pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1408 
1409    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1410 
1411    staging_buffer = vk_alloc(&device->vk.alloc,
1412                              staging_buffer_size,
1413                              8,
1414                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1415    if (!staging_buffer)
1416       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1417 
1418    /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1419    pvr_pds_vertex_shader_sa(&program,
1420                             staging_buffer,
1421                             PDS_GENERATE_DATA_SEGMENT,
1422                             dev_info);
1423    pvr_pds_vertex_shader_sa(&program,
1424                             &staging_buffer[program.data_size],
1425                             PDS_GENERATE_CODE_SEGMENT,
1426                             dev_info);
1427 
1428    /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1429     * is bigger so we handle it first (if needed) and realloc() for a smaller
1430     * size.
1431     */
1432    if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1433       /* FIXME: Figure out the define for alignment of 16. */
1434       result = pvr_gpu_upload_pds(device,
1435                                   &staging_buffer[0],
1436                                   program.data_size,
1437                                   16,
1438                                   &staging_buffer[program.data_size],
1439                                   program.code_size,
1440                                   16,
1441                                   16,
1442                                   sw_compute_barrier_upload_out);
1443       if (result != VK_SUCCESS) {
1444          vk_free(&device->vk.alloc, staging_buffer);
1445          return result;
1446       }
1447 
1448       program.clear_pds_barrier = false;
1449 
1450       pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1451 
1452       staging_buffer_size =
1453          PVR_DW_TO_BYTES(program.code_size + program.data_size);
1454 
1455       staging_buffer = vk_realloc(&device->vk.alloc,
1456                                   staging_buffer,
1457                                   staging_buffer_size,
1458                                   8,
1459                                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1460       if (!staging_buffer) {
1461          pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1462 
1463          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1464       }
1465 
1466       /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1467       pvr_pds_vertex_shader_sa(&program,
1468                                staging_buffer,
1469                                PDS_GENERATE_DATA_SEGMENT,
1470                                dev_info);
1471       pvr_pds_vertex_shader_sa(&program,
1472                                &staging_buffer[program.data_size],
1473                                PDS_GENERATE_CODE_SEGMENT,
1474                                dev_info);
1475    } else {
1476       *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1477          .pvr_bo = NULL,
1478       };
1479    }
1480 
1481    /* FIXME: Figure out the define for alignment of 16. */
1482    result = pvr_gpu_upload_pds(device,
1483                                &staging_buffer[0],
1484                                program.data_size,
1485                                16,
1486                                &staging_buffer[program.data_size],
1487                                program.code_size,
1488                                16,
1489                                16,
1490                                upload_out);
1491    if (result != VK_SUCCESS) {
1492       vk_free(&device->vk.alloc, staging_buffer);
1493       pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1494 
1495       return result;
1496    }
1497 
1498    vk_free(&device->vk.alloc, staging_buffer);
1499 
1500    return VK_SUCCESS;
1501 }
1502 
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1503 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1504 {
1505    uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1506    uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1507    struct util_dynarray usc_program;
1508    struct pvr_texture_state_info tex_info;
1509    uint32_t *dword_ptr;
1510    uint32_t usc_shareds;
1511    uint32_t usc_temps;
1512    VkResult result;
1513 
1514    util_dynarray_init(&usc_program, NULL);
1515    pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1516                                     &usc_program,
1517                                     &usc_shareds,
1518                                     &usc_temps);
1519 
1520    device->idfwdf_state.usc_shareds = usc_shareds;
1521 
1522    /* FIXME: Figure out the define for alignment of 16. */
1523    result = pvr_gpu_upload_usc(device,
1524                                usc_program.data,
1525                                usc_program.size,
1526                                16,
1527                                &device->idfwdf_state.usc);
1528    util_dynarray_fini(&usc_program);
1529 
1530    if (result != VK_SUCCESS)
1531       return result;
1532 
1533    /* TODO: Get the store buffer size from the compiler? */
1534    /* TODO: How was the size derived here? */
1535    result = pvr_bo_alloc(device,
1536                          device->heaps.general_heap,
1537                          4 * sizeof(float) * 4 * 2,
1538                          4,
1539                          0,
1540                          &device->idfwdf_state.store_bo);
1541    if (result != VK_SUCCESS)
1542       goto err_free_usc_program;
1543 
1544    result = pvr_bo_alloc(device,
1545                          device->heaps.general_heap,
1546                          usc_shareds * ROGUE_REG_SIZE_BYTES,
1547                          ROGUE_REG_SIZE_BYTES,
1548                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1549                          &device->idfwdf_state.shareds_bo);
1550    if (result != VK_SUCCESS)
1551       goto err_free_store_buffer;
1552 
1553    /* Pack state words. */
1554 
1555    pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1556       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1557       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1558       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1559       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1560    }
1561 
1562    /* clang-format off */
1563    pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1564    /* clang-format on */
1565 
1566    STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1567 
1568    tex_info = (struct pvr_texture_state_info){
1569       .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1570       .mem_layout = PVR_MEMLAYOUT_LINEAR,
1571       .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1572       .type = VK_IMAGE_VIEW_TYPE_2D,
1573       .extent = { .width = 4, .height = 2, .depth = 0 },
1574       .mip_levels = 1,
1575       .sample_count = 1,
1576       .stride = 4,
1577       .swizzle = { PIPE_SWIZZLE_X,
1578                    PIPE_SWIZZLE_Y,
1579                    PIPE_SWIZZLE_Z,
1580                    PIPE_SWIZZLE_W },
1581       .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1582    };
1583 
1584    result = pvr_pack_tex_state(device, &tex_info, image_state);
1585    if (result != VK_SUCCESS)
1586       goto err_free_shareds_buffer;
1587 
1588    /* Fill the shareds buffer. */
1589 
1590    dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1591 
1592 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1593 #define LOW_32(val) ((uint32_t)(val))
1594 
1595    /* TODO: Should we use compiler info to setup the shareds data instead of
1596     * assuming there's always 12 and this is how they should be setup?
1597     */
1598 
1599    dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1600    dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1601 
1602    /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1603    dword_ptr[2] = 0U;
1604    dword_ptr[3] = 0U;
1605 
1606    dword_ptr[4] = LOW_32(image_state[0]);
1607    dword_ptr[5] = HIGH_32(image_state[0]);
1608    dword_ptr[6] = LOW_32(image_state[1]);
1609    dword_ptr[7] = HIGH_32(image_state[1]);
1610 
1611    dword_ptr[8] = LOW_32(sampler_state[0]);
1612    dword_ptr[9] = HIGH_32(sampler_state[0]);
1613    dword_ptr[10] = LOW_32(sampler_state[1]);
1614    dword_ptr[11] = HIGH_32(sampler_state[1]);
1615    assert(11 + 1 == usc_shareds);
1616 
1617 #undef HIGH_32
1618 #undef LOW_32
1619 
1620    pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1621    dword_ptr = NULL;
1622 
1623    /* Generate and upload PDS programs. */
1624    result = pvr_pds_idfwdf_programs_create_and_upload(
1625       device,
1626       device->idfwdf_state.usc->dev_addr,
1627       usc_shareds,
1628       usc_temps,
1629       device->idfwdf_state.shareds_bo->vma->dev_addr,
1630       &device->idfwdf_state.pds,
1631       &device->idfwdf_state.sw_compute_barrier_pds);
1632    if (result != VK_SUCCESS)
1633       goto err_free_shareds_buffer;
1634 
1635    return VK_SUCCESS;
1636 
1637 err_free_shareds_buffer:
1638    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1639 
1640 err_free_store_buffer:
1641    pvr_bo_free(device, device->idfwdf_state.store_bo);
1642 
1643 err_free_usc_program:
1644    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1645 
1646    return result;
1647 }
1648 
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1649 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1650 {
1651    pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1652    pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1653    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1654    pvr_bo_free(device, device->idfwdf_state.store_bo);
1655    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1656 }
1657 
1658 /* FIXME: We should be calculating the size when we upload the code in
1659  * pvr_srv_setup_static_pixel_event_program().
1660  */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1661 static void pvr_device_get_pixel_event_pds_program_data_size(
1662    const struct pvr_device_info *dev_info,
1663    uint32_t *const data_size_in_dwords_out)
1664 {
1665    struct pvr_pds_event_program program = {
1666       /* No data to DMA, just a DOUTU needed. */
1667       .num_emit_word_pairs = 0,
1668    };
1669 
1670    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1671 
1672    *data_size_in_dwords_out = program.data_size;
1673 }
1674 
pvr_device_init_nop_program(struct pvr_device * device)1675 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1676 {
1677    const uint32_t cache_line_size =
1678       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1679    struct pvr_pds_kickusc_program program = { 0 };
1680    struct util_dynarray nop_usc_bin;
1681    uint32_t staging_buffer_size;
1682    uint32_t *staging_buffer;
1683    VkResult result;
1684 
1685    pvr_uscgen_nop(&nop_usc_bin);
1686 
1687    result = pvr_gpu_upload_usc(device,
1688                                util_dynarray_begin(&nop_usc_bin),
1689                                nop_usc_bin.size,
1690                                cache_line_size,
1691                                &device->nop_program.usc);
1692    util_dynarray_fini(&nop_usc_bin);
1693    if (result != VK_SUCCESS)
1694       return result;
1695 
1696    /* Setup a PDS program that kicks the static USC program. */
1697    pvr_pds_setup_doutu(&program.usc_task_control,
1698                        device->nop_program.usc->dev_addr.addr,
1699                        0U,
1700                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1701                        false);
1702 
1703    pvr_pds_set_sizes_pixel_shader(&program);
1704 
1705    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1706 
1707    staging_buffer = vk_alloc(&device->vk.alloc,
1708                              staging_buffer_size,
1709                              8U,
1710                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1711    if (!staging_buffer) {
1712       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1713       goto err_free_nop_usc_bo;
1714    }
1715 
1716    pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1717 
1718    /* FIXME: Figure out the define for alignment of 16. */
1719    result = pvr_gpu_upload_pds(device,
1720                                staging_buffer,
1721                                program.data_size,
1722                                16U,
1723                                &staging_buffer[program.data_size],
1724                                program.code_size,
1725                                16U,
1726                                16U,
1727                                &device->nop_program.pds);
1728    if (result != VK_SUCCESS)
1729       goto err_free_staging_buffer;
1730 
1731    vk_free(&device->vk.alloc, staging_buffer);
1732 
1733    return VK_SUCCESS;
1734 
1735 err_free_staging_buffer:
1736    vk_free(&device->vk.alloc, staging_buffer);
1737 
1738 err_free_nop_usc_bo:
1739    pvr_bo_suballoc_free(device->nop_program.usc);
1740 
1741    return result;
1742 }
1743 
pvr_device_init_tile_buffer_state(struct pvr_device * device)1744 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1745 {
1746    simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1747 
1748    for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1749       device->tile_buffer_state.buffers[i] = NULL;
1750 
1751    device->tile_buffer_state.buffer_count = 0;
1752 }
1753 
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1754 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1755 {
1756    /* Destroy the mutex first to trigger asserts in case it's still locked so
1757     * that we don't put things in an inconsistent state by freeing buffers that
1758     * might be in use or attempt to free buffers while new buffers are being
1759     * allocated.
1760     */
1761    simple_mtx_destroy(&device->tile_buffer_state.mtx);
1762 
1763    for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1764       pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1765 }
1766 
1767 /**
1768  * \brief Ensures that a certain amount of tile buffers are allocated.
1769  *
1770  * Make sure that \p capacity amount of tile buffers are allocated. If less were
1771  * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1772  */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1773 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1774                                            uint32_t capacity,
1775                                            uint32_t size_in_bytes)
1776 {
1777    struct pvr_device_tile_buffer_state *tile_buffer_state =
1778       &device->tile_buffer_state;
1779    const uint32_t cache_line_size =
1780       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1781    VkResult result;
1782 
1783    simple_mtx_lock(&tile_buffer_state->mtx);
1784 
1785    /* Clamping in release and asserting in debug. */
1786    assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1787    capacity = CLAMP(capacity,
1788                     tile_buffer_state->buffer_count,
1789                     ARRAY_SIZE(tile_buffer_state->buffers));
1790 
1791    /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1792     * allocations.
1793     */
1794    for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1795       result = pvr_bo_alloc(device,
1796                             device->heaps.general_heap,
1797                             size_in_bytes,
1798                             cache_line_size,
1799                             0,
1800                             &tile_buffer_state->buffers[i]);
1801       if (result != VK_SUCCESS) {
1802          for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1803             pvr_bo_free(device, tile_buffer_state->buffers[j]);
1804 
1805          goto err_release_lock;
1806       }
1807    }
1808 
1809    tile_buffer_state->buffer_count = capacity;
1810 
1811    simple_mtx_unlock(&tile_buffer_state->mtx);
1812 
1813    return VK_SUCCESS;
1814 
1815 err_release_lock:
1816    simple_mtx_unlock(&tile_buffer_state->mtx);
1817 
1818    return result;
1819 }
1820 
pvr_device_init_default_sampler_state(struct pvr_device * device)1821 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1822 {
1823    pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1824       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1825       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1826       sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1827       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1828       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1829       sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1830       sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1831       sampler.non_normalized_coords = true;
1832    }
1833 }
1834 
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1835 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1836                           const VkDeviceCreateInfo *pCreateInfo,
1837                           const VkAllocationCallbacks *pAllocator,
1838                           VkDevice *pDevice)
1839 {
1840    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1841    uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1842    struct pvr_instance *instance = pdevice->instance;
1843    struct vk_device_dispatch_table dispatch_table;
1844    struct pvr_device *device;
1845    struct pvr_winsys *ws;
1846    VkResult result;
1847 
1848    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1849 
1850    result = pvr_winsys_create(pdevice->render_path,
1851                               pdevice->display_path,
1852                               pAllocator ? pAllocator : &instance->vk.alloc,
1853                               &ws);
1854    if (result != VK_SUCCESS)
1855       goto err_out;
1856 
1857    device = vk_alloc2(&instance->vk.alloc,
1858                       pAllocator,
1859                       sizeof(*device),
1860                       8,
1861                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1862    if (!device) {
1863       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1864       goto err_pvr_winsys_destroy;
1865    }
1866 
1867    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1868                                              &pvr_device_entrypoints,
1869                                              true);
1870 
1871    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1872                                              &wsi_device_entrypoints,
1873                                              false);
1874 
1875    result = vk_device_init(&device->vk,
1876                            &pdevice->vk,
1877                            &dispatch_table,
1878                            pCreateInfo,
1879                            pAllocator);
1880    if (result != VK_SUCCESS)
1881       goto err_free_device;
1882 
1883    device->instance = instance;
1884    device->pdevice = pdevice;
1885    device->ws = ws;
1886 
1887    vk_device_set_drm_fd(&device->vk, ws->render_fd);
1888 
1889    if (ws->features.supports_threaded_submit) {
1890       /* Queue submission can be blocked if the kernel CCBs become full,
1891        * so enable threaded submit to not block the submitter.
1892        */
1893       vk_device_enable_threaded_submit(&device->vk);
1894    }
1895 
1896    ws->ops->get_heaps_info(ws, &device->heaps);
1897 
1898    result = pvr_bo_store_create(device);
1899    if (result != VK_SUCCESS)
1900       goto err_vk_device_finish;
1901 
1902    pvr_bo_suballocator_init(&device->suballoc_general,
1903                             device->heaps.general_heap,
1904                             device,
1905                             PVR_SUBALLOCATOR_GENERAL_SIZE);
1906    pvr_bo_suballocator_init(&device->suballoc_pds,
1907                             device->heaps.pds_heap,
1908                             device,
1909                             PVR_SUBALLOCATOR_PDS_SIZE);
1910    pvr_bo_suballocator_init(&device->suballoc_transfer,
1911                             device->heaps.transfer_frag_heap,
1912                             device,
1913                             PVR_SUBALLOCATOR_TRANSFER_SIZE);
1914    pvr_bo_suballocator_init(&device->suballoc_usc,
1915                             device->heaps.usc_heap,
1916                             device,
1917                             PVR_SUBALLOCATOR_USC_SIZE);
1918    pvr_bo_suballocator_init(&device->suballoc_vis_test,
1919                             device->heaps.vis_test_heap,
1920                             device,
1921                             PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1922 
1923    if (p_atomic_inc_return(&instance->active_device_count) >
1924        PVR_SECONDARY_DEVICE_THRESHOLD) {
1925       initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1926    }
1927 
1928    result = pvr_free_list_create(device,
1929                                  initial_free_list_size,
1930                                  PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1931                                  PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1932                                  PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1933                                  NULL /* parent_free_list */,
1934                                  &device->global_free_list);
1935    if (result != VK_SUCCESS)
1936       goto err_dec_device_count;
1937 
1938    result = pvr_device_init_nop_program(device);
1939    if (result != VK_SUCCESS)
1940       goto err_pvr_free_list_destroy;
1941 
1942    result = pvr_device_init_compute_fence_program(device);
1943    if (result != VK_SUCCESS)
1944       goto err_pvr_free_nop_program;
1945 
1946    result = pvr_device_init_compute_empty_program(device);
1947    if (result != VK_SUCCESS)
1948       goto err_pvr_free_compute_fence;
1949 
1950    result = pvr_device_create_compute_query_programs(device);
1951    if (result != VK_SUCCESS)
1952       goto err_pvr_free_compute_empty;
1953 
1954    result = pvr_device_init_compute_idfwdf_state(device);
1955    if (result != VK_SUCCESS)
1956       goto err_pvr_destroy_compute_query_programs;
1957 
1958    result = pvr_device_init_graphics_static_clear_state(device);
1959    if (result != VK_SUCCESS)
1960       goto err_pvr_finish_compute_idfwdf;
1961 
1962    result = pvr_device_init_spm_load_state(device);
1963    if (result != VK_SUCCESS)
1964       goto err_pvr_finish_graphics_static_clear_state;
1965 
1966    pvr_device_init_tile_buffer_state(device);
1967 
1968    result = pvr_queues_create(device, pCreateInfo);
1969    if (result != VK_SUCCESS)
1970       goto err_pvr_finish_tile_buffer_state;
1971 
1972    pvr_device_init_default_sampler_state(device);
1973 
1974    pvr_spm_init_scratch_buffer_store(device);
1975 
1976    result = pvr_init_robustness_buffer(device);
1977    if (result != VK_SUCCESS)
1978       goto err_pvr_spm_finish_scratch_buffer_store;
1979 
1980    result = pvr_border_color_table_init(&device->border_color_table, device);
1981    if (result != VK_SUCCESS)
1982       goto err_pvr_robustness_buffer_finish;
1983 
1984    /* FIXME: Move this to a later stage and possibly somewhere other than
1985     * pvr_device. The purpose of this is so that we don't have to get the size
1986     * on each kick.
1987     */
1988    pvr_device_get_pixel_event_pds_program_data_size(
1989       &pdevice->dev_info,
1990       &device->pixel_event_data_size_in_dwords);
1991 
1992    device->global_cmd_buffer_submit_count = 0;
1993    device->global_queue_present_count = 0;
1994 
1995    *pDevice = pvr_device_to_handle(device);
1996 
1997    return VK_SUCCESS;
1998 
1999 err_pvr_robustness_buffer_finish:
2000    pvr_robustness_buffer_finish(device);
2001 
2002 err_pvr_spm_finish_scratch_buffer_store:
2003    pvr_spm_finish_scratch_buffer_store(device);
2004 
2005    pvr_queues_destroy(device);
2006 
2007 err_pvr_finish_tile_buffer_state:
2008    pvr_device_finish_tile_buffer_state(device);
2009    pvr_device_finish_spm_load_state(device);
2010 
2011 err_pvr_finish_graphics_static_clear_state:
2012    pvr_device_finish_graphics_static_clear_state(device);
2013 
2014 err_pvr_finish_compute_idfwdf:
2015    pvr_device_finish_compute_idfwdf_state(device);
2016 
2017 err_pvr_destroy_compute_query_programs:
2018    pvr_device_destroy_compute_query_programs(device);
2019 
2020 err_pvr_free_compute_empty:
2021    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2022 
2023 err_pvr_free_compute_fence:
2024    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2025 
2026 err_pvr_free_nop_program:
2027    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2028    pvr_bo_suballoc_free(device->nop_program.usc);
2029 
2030 err_pvr_free_list_destroy:
2031    pvr_free_list_destroy(device->global_free_list);
2032 
2033 err_dec_device_count:
2034    p_atomic_dec(&device->instance->active_device_count);
2035 
2036    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2037    pvr_bo_suballocator_fini(&device->suballoc_usc);
2038    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2039    pvr_bo_suballocator_fini(&device->suballoc_pds);
2040    pvr_bo_suballocator_fini(&device->suballoc_general);
2041 
2042    pvr_bo_store_destroy(device);
2043 
2044 err_vk_device_finish:
2045    vk_device_finish(&device->vk);
2046 
2047 err_free_device:
2048    vk_free(&device->vk.alloc, device);
2049 
2050 err_pvr_winsys_destroy:
2051    pvr_winsys_destroy(ws);
2052 
2053 err_out:
2054    return result;
2055 }
2056 
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2057 void pvr_DestroyDevice(VkDevice _device,
2058                        const VkAllocationCallbacks *pAllocator)
2059 {
2060    PVR_FROM_HANDLE(pvr_device, device, _device);
2061 
2062    if (!device)
2063       return;
2064 
2065    pvr_border_color_table_finish(&device->border_color_table, device);
2066    pvr_robustness_buffer_finish(device);
2067    pvr_spm_finish_scratch_buffer_store(device);
2068    pvr_queues_destroy(device);
2069    pvr_device_finish_tile_buffer_state(device);
2070    pvr_device_finish_spm_load_state(device);
2071    pvr_device_finish_graphics_static_clear_state(device);
2072    pvr_device_finish_compute_idfwdf_state(device);
2073    pvr_device_destroy_compute_query_programs(device);
2074    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2075    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2076    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2077    pvr_bo_suballoc_free(device->nop_program.usc);
2078    pvr_free_list_destroy(device->global_free_list);
2079    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2080    pvr_bo_suballocator_fini(&device->suballoc_usc);
2081    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2082    pvr_bo_suballocator_fini(&device->suballoc_pds);
2083    pvr_bo_suballocator_fini(&device->suballoc_general);
2084    pvr_bo_store_destroy(device);
2085    pvr_winsys_destroy(device->ws);
2086    p_atomic_dec(&device->instance->active_device_count);
2087    vk_device_finish(&device->vk);
2088    vk_free(&device->vk.alloc, device);
2089 }
2090 
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2091 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2092                                               VkLayerProperties *pProperties)
2093 {
2094    if (!pProperties) {
2095       *pPropertyCount = 0;
2096       return VK_SUCCESS;
2097    }
2098 
2099    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2100 }
2101 
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2102 static void free_memory(struct pvr_device *device,
2103                         struct pvr_device_memory *mem,
2104                         const VkAllocationCallbacks *pAllocator)
2105 {
2106    if (!mem)
2107       return;
2108 
2109    /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2110     *   If a memory object is mapped at the time it is freed, it is implicitly
2111     *   unmapped.
2112     */
2113    if (mem->bo->map)
2114       device->ws->ops->buffer_unmap(mem->bo);
2115 
2116    p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2117 
2118    device->ws->ops->buffer_destroy(mem->bo);
2119 
2120    vk_object_free(&device->vk, pAllocator, mem);
2121 }
2122 
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2123 VkResult pvr_AllocateMemory(VkDevice _device,
2124                             const VkMemoryAllocateInfo *pAllocateInfo,
2125                             const VkAllocationCallbacks *pAllocator,
2126                             VkDeviceMemory *pMem)
2127 {
2128    const VkImportMemoryFdInfoKHR *fd_info = NULL;
2129    PVR_FROM_HANDLE(pvr_device, device, _device);
2130    enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2131    struct pvr_device_memory *mem;
2132    uint64_t heap_used;
2133    VkResult result;
2134 
2135    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2136    assert(pAllocateInfo->allocationSize > 0);
2137 
2138    mem = vk_object_alloc(&device->vk,
2139                          pAllocator,
2140                          sizeof(*mem),
2141                          VK_OBJECT_TYPE_DEVICE_MEMORY);
2142    if (!mem)
2143       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2144 
2145    vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2146       switch ((unsigned)ext->sType) {
2147       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2148          if (device->ws->display_fd >= 0)
2149             type = PVR_WINSYS_BO_TYPE_DISPLAY;
2150          break;
2151       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2152          fd_info = (void *)ext;
2153          break;
2154       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2155          break;
2156       default:
2157          vk_debug_ignored_stype(ext->sType);
2158          break;
2159       }
2160    }
2161 
2162    if (fd_info && fd_info->handleType) {
2163       VkDeviceSize aligned_alloc_size =
2164          ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2165 
2166       assert(
2167          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2168          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2169 
2170       result = device->ws->ops->buffer_create_from_fd(device->ws,
2171                                                       fd_info->fd,
2172                                                       &mem->bo);
2173       if (result != VK_SUCCESS)
2174          goto err_vk_object_free_mem;
2175 
2176       /* For security purposes, we reject importing the bo if it's smaller
2177        * than the requested allocation size. This prevents a malicious client
2178        * from passing a buffer to a trusted client, lying about the size, and
2179        * telling the trusted client to try and texture from an image that goes
2180        * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2181        * in the trusted client. The trusted client can protect itself against
2182        * this sort of attack but only if it can trust the buffer size.
2183        */
2184       if (aligned_alloc_size > mem->bo->size) {
2185          result = vk_errorf(device,
2186                             VK_ERROR_INVALID_EXTERNAL_HANDLE,
2187                             "Aligned requested size too large for the given fd "
2188                             "%" PRIu64 "B > %" PRIu64 "B",
2189                             pAllocateInfo->allocationSize,
2190                             mem->bo->size);
2191          device->ws->ops->buffer_destroy(mem->bo);
2192          goto err_vk_object_free_mem;
2193       }
2194 
2195       /* From the Vulkan spec:
2196        *
2197        *    "Importing memory from a file descriptor transfers ownership of
2198        *    the file descriptor from the application to the Vulkan
2199        *    implementation. The application must not perform any operations on
2200        *    the file descriptor after a successful import."
2201        *
2202        * If the import fails, we leave the file descriptor open.
2203        */
2204       close(fd_info->fd);
2205    } else {
2206       /* Align physical allocations to the page size of the heap that will be
2207        * used when binding device memory (see pvr_bind_memory()) to ensure the
2208        * entire allocation can be mapped.
2209        */
2210       const uint64_t alignment = device->heaps.general_heap->page_size;
2211 
2212       /* FIXME: Need to determine the flags based on
2213        * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2214        *
2215        * The alternative would be to store the flags alongside the memory
2216        * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2217        * that they can be looked up.
2218        */
2219       result = device->ws->ops->buffer_create(device->ws,
2220                                               pAllocateInfo->allocationSize,
2221                                               alignment,
2222                                               type,
2223                                               PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2224                                               &mem->bo);
2225       if (result != VK_SUCCESS)
2226          goto err_vk_object_free_mem;
2227    }
2228 
2229    heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2230    if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2231       free_memory(device, mem, pAllocator);
2232       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2233    }
2234 
2235    *pMem = pvr_device_memory_to_handle(mem);
2236 
2237    return VK_SUCCESS;
2238 
2239 err_vk_object_free_mem:
2240    vk_object_free(&device->vk, pAllocator, mem);
2241 
2242    return result;
2243 }
2244 
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2245 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2246                             const VkMemoryGetFdInfoKHR *pGetFdInfo,
2247                             int *pFd)
2248 {
2249    PVR_FROM_HANDLE(pvr_device, device, _device);
2250    PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2251 
2252    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2253 
2254    assert(
2255       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2256       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2257 
2258    return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2259 }
2260 
2261 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2262 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2263                              VkExternalMemoryHandleTypeFlagBits handleType,
2264                              int fd,
2265                              VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2266 {
2267    PVR_FROM_HANDLE(pvr_device, device, _device);
2268 
2269    switch (handleType) {
2270    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2271       /* FIXME: This should only allow memory types having
2272        * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2273        * dma-buf should be imported using cacheable memory types,
2274        * given exporter's mmap will always map it as cacheable.
2275        * Ref:
2276        * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2277        */
2278       pMemoryFdProperties->memoryTypeBits =
2279          (1 << device->pdevice->memory.memoryTypeCount) - 1;
2280       return VK_SUCCESS;
2281    default:
2282       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2283    }
2284 }
2285 
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2286 void pvr_FreeMemory(VkDevice _device,
2287                     VkDeviceMemory _mem,
2288                     const VkAllocationCallbacks *pAllocator)
2289 {
2290    PVR_FROM_HANDLE(pvr_device, device, _device);
2291    PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2292 
2293    free_memory(device, mem, pAllocator);
2294 }
2295 
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2296 VkResult pvr_MapMemory(VkDevice _device,
2297                        VkDeviceMemory _memory,
2298                        VkDeviceSize offset,
2299                        VkDeviceSize size,
2300                        VkMemoryMapFlags flags,
2301                        void **ppData)
2302 {
2303    PVR_FROM_HANDLE(pvr_device, device, _device);
2304    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2305    VkResult result;
2306 
2307    if (!mem) {
2308       *ppData = NULL;
2309       return VK_SUCCESS;
2310    }
2311 
2312    if (size == VK_WHOLE_SIZE)
2313       size = mem->bo->size - offset;
2314 
2315    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2316     *
2317     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2318     *    assert(size != 0);
2319     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2320     *    equal to the size of the memory minus offset
2321     */
2322 
2323    assert(size > 0);
2324    assert(offset + size <= mem->bo->size);
2325 
2326    /* Check if already mapped */
2327    if (mem->bo->map) {
2328       *ppData = (uint8_t *)mem->bo->map + offset;
2329       return VK_SUCCESS;
2330    }
2331 
2332    /* Map it all at once */
2333    result = device->ws->ops->buffer_map(mem->bo);
2334    if (result != VK_SUCCESS)
2335       return result;
2336 
2337    *ppData = (uint8_t *)mem->bo->map + offset;
2338 
2339    return VK_SUCCESS;
2340 }
2341 
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2342 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2343 {
2344    PVR_FROM_HANDLE(pvr_device, device, _device);
2345    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2346 
2347    if (!mem || !mem->bo->map)
2348       return;
2349 
2350    device->ws->ops->buffer_unmap(mem->bo);
2351 }
2352 
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2353 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2354                                      uint32_t memoryRangeCount,
2355                                      const VkMappedMemoryRange *pMemoryRanges)
2356 {
2357    return VK_SUCCESS;
2358 }
2359 
2360 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2361 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2362                                  uint32_t memoryRangeCount,
2363                                  const VkMappedMemoryRange *pMemoryRanges)
2364 {
2365    return VK_SUCCESS;
2366 }
2367 
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2368 void pvr_GetImageSparseMemoryRequirements2(
2369    VkDevice device,
2370    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2371    uint32_t *pSparseMemoryRequirementCount,
2372    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2373 {
2374    *pSparseMemoryRequirementCount = 0;
2375 }
2376 
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2377 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2378                                    VkDeviceMemory memory,
2379                                    VkDeviceSize *pCommittedMemoryInBytes)
2380 {
2381    *pCommittedMemoryInBytes = 0;
2382 }
2383 
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2384 VkResult pvr_bind_memory(struct pvr_device *device,
2385                          struct pvr_device_memory *mem,
2386                          VkDeviceSize offset,
2387                          VkDeviceSize size,
2388                          VkDeviceSize alignment,
2389                          struct pvr_winsys_vma **const vma_out,
2390                          pvr_dev_addr_t *const dev_addr_out)
2391 {
2392    VkDeviceSize virt_size =
2393       size + (offset & (device->heaps.general_heap->page_size - 1));
2394    struct pvr_winsys_vma *vma;
2395    pvr_dev_addr_t dev_addr;
2396    VkResult result;
2397 
2398    /* Valid usage:
2399     *
2400     *   "memoryOffset must be an integer multiple of the alignment member of
2401     *    the VkMemoryRequirements structure returned from a call to
2402     *    vkGetBufferMemoryRequirements with buffer"
2403     *
2404     *   "memoryOffset must be an integer multiple of the alignment member of
2405     *    the VkMemoryRequirements structure returned from a call to
2406     *    vkGetImageMemoryRequirements with image"
2407     */
2408    assert(offset % alignment == 0);
2409    assert(offset < mem->bo->size);
2410 
2411    result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2412                                         virt_size,
2413                                         alignment,
2414                                         &vma);
2415    if (result != VK_SUCCESS)
2416       goto err_out;
2417 
2418    result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2419    if (result != VK_SUCCESS)
2420       goto err_free_vma;
2421 
2422    *dev_addr_out = dev_addr;
2423    *vma_out = vma;
2424 
2425    return VK_SUCCESS;
2426 
2427 err_free_vma:
2428    device->ws->ops->heap_free(vma);
2429 
2430 err_out:
2431    return result;
2432 }
2433 
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2434 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2435 {
2436    device->ws->ops->vma_unmap(vma);
2437    device->ws->ops->heap_free(vma);
2438 }
2439 
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2440 VkResult pvr_BindBufferMemory2(VkDevice _device,
2441                                uint32_t bindInfoCount,
2442                                const VkBindBufferMemoryInfo *pBindInfos)
2443 {
2444    PVR_FROM_HANDLE(pvr_device, device, _device);
2445    uint32_t i;
2446 
2447    for (i = 0; i < bindInfoCount; i++) {
2448       PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2449       PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2450 
2451       VkResult result = pvr_bind_memory(device,
2452                                         mem,
2453                                         pBindInfos[i].memoryOffset,
2454                                         buffer->vk.size,
2455                                         buffer->alignment,
2456                                         &buffer->vma,
2457                                         &buffer->dev_addr);
2458       if (result != VK_SUCCESS) {
2459          while (i--) {
2460             PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2461             pvr_unbind_memory(device, buffer->vma);
2462          }
2463 
2464          return result;
2465       }
2466    }
2467 
2468    return VK_SUCCESS;
2469 }
2470 
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2471 VkResult pvr_QueueBindSparse(VkQueue _queue,
2472                              uint32_t bindInfoCount,
2473                              const VkBindSparseInfo *pBindInfo,
2474                              VkFence fence)
2475 {
2476    return VK_SUCCESS;
2477 }
2478 
2479 /* Event functions. */
2480 
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2481 VkResult pvr_CreateEvent(VkDevice _device,
2482                          const VkEventCreateInfo *pCreateInfo,
2483                          const VkAllocationCallbacks *pAllocator,
2484                          VkEvent *pEvent)
2485 {
2486    PVR_FROM_HANDLE(pvr_device, device, _device);
2487 
2488    struct pvr_event *event = vk_object_alloc(&device->vk,
2489                                              pAllocator,
2490                                              sizeof(*event),
2491                                              VK_OBJECT_TYPE_EVENT);
2492    if (!event)
2493       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2494 
2495    event->sync = NULL;
2496    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2497 
2498    *pEvent = pvr_event_to_handle(event);
2499 
2500    return VK_SUCCESS;
2501 }
2502 
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2503 void pvr_DestroyEvent(VkDevice _device,
2504                       VkEvent _event,
2505                       const VkAllocationCallbacks *pAllocator)
2506 {
2507    PVR_FROM_HANDLE(pvr_device, device, _device);
2508    PVR_FROM_HANDLE(pvr_event, event, _event);
2509 
2510    if (!event)
2511       return;
2512 
2513    if (event->sync)
2514       vk_sync_destroy(&device->vk, event->sync);
2515 
2516    vk_object_free(&device->vk, pAllocator, event);
2517 }
2518 
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2519 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2520 {
2521    PVR_FROM_HANDLE(pvr_device, device, _device);
2522    PVR_FROM_HANDLE(pvr_event, event, _event);
2523    VkResult result;
2524 
2525    switch (event->state) {
2526    case PVR_EVENT_STATE_SET_BY_DEVICE:
2527       if (!event->sync)
2528          return VK_EVENT_RESET;
2529 
2530       result =
2531          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2532       result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2533       break;
2534 
2535    case PVR_EVENT_STATE_RESET_BY_DEVICE:
2536       if (!event->sync)
2537          return VK_EVENT_RESET;
2538 
2539       result =
2540          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2541       result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2542       break;
2543 
2544    case PVR_EVENT_STATE_SET_BY_HOST:
2545       result = VK_EVENT_SET;
2546       break;
2547 
2548    case PVR_EVENT_STATE_RESET_BY_HOST:
2549       result = VK_EVENT_RESET;
2550       break;
2551 
2552    default:
2553       unreachable("Event object in unknown state");
2554    }
2555 
2556    return result;
2557 }
2558 
pvr_SetEvent(VkDevice _device,VkEvent _event)2559 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2560 {
2561    PVR_FROM_HANDLE(pvr_event, event, _event);
2562 
2563    if (event->sync) {
2564       PVR_FROM_HANDLE(pvr_device, device, _device);
2565 
2566       const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2567       if (result != VK_SUCCESS)
2568          return result;
2569    }
2570 
2571    event->state = PVR_EVENT_STATE_SET_BY_HOST;
2572 
2573    return VK_SUCCESS;
2574 }
2575 
pvr_ResetEvent(VkDevice _device,VkEvent _event)2576 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2577 {
2578    PVR_FROM_HANDLE(pvr_event, event, _event);
2579 
2580    if (event->sync) {
2581       PVR_FROM_HANDLE(pvr_device, device, _device);
2582 
2583       const VkResult result = vk_sync_reset(&device->vk, event->sync);
2584       if (result != VK_SUCCESS)
2585          return result;
2586    }
2587 
2588    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2589 
2590    return VK_SUCCESS;
2591 }
2592 
2593 /* Buffer functions. */
2594 
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2595 VkResult pvr_CreateBuffer(VkDevice _device,
2596                           const VkBufferCreateInfo *pCreateInfo,
2597                           const VkAllocationCallbacks *pAllocator,
2598                           VkBuffer *pBuffer)
2599 {
2600    PVR_FROM_HANDLE(pvr_device, device, _device);
2601    const uint32_t alignment = 4096;
2602    struct pvr_buffer *buffer;
2603 
2604    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2605    assert(pCreateInfo->usage != 0);
2606 
2607    /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2608    if (pCreateInfo->size >= ULONG_MAX - alignment)
2609       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2610 
2611    buffer =
2612       vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2613    if (!buffer)
2614       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2615 
2616    buffer->alignment = alignment;
2617 
2618    *pBuffer = pvr_buffer_to_handle(buffer);
2619 
2620    return VK_SUCCESS;
2621 }
2622 
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2623 void pvr_DestroyBuffer(VkDevice _device,
2624                        VkBuffer _buffer,
2625                        const VkAllocationCallbacks *pAllocator)
2626 {
2627    PVR_FROM_HANDLE(pvr_device, device, _device);
2628    PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2629 
2630    if (!buffer)
2631       return;
2632 
2633    if (buffer->vma)
2634       pvr_unbind_memory(device, buffer->vma);
2635 
2636    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2637 }
2638 
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2639 VkResult pvr_gpu_upload(struct pvr_device *device,
2640                         struct pvr_winsys_heap *heap,
2641                         const void *data,
2642                         size_t size,
2643                         uint64_t alignment,
2644                         struct pvr_suballoc_bo **const pvr_bo_out)
2645 {
2646    struct pvr_suballoc_bo *suballoc_bo = NULL;
2647    struct pvr_suballocator *allocator;
2648    VkResult result;
2649    void *map;
2650 
2651    assert(size > 0);
2652 
2653    if (heap == device->heaps.general_heap)
2654       allocator = &device->suballoc_general;
2655    else if (heap == device->heaps.pds_heap)
2656       allocator = &device->suballoc_pds;
2657    else if (heap == device->heaps.transfer_frag_heap)
2658       allocator = &device->suballoc_transfer;
2659    else if (heap == device->heaps.usc_heap)
2660       allocator = &device->suballoc_usc;
2661    else
2662       unreachable("Unknown heap type");
2663 
2664    result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2665    if (result != VK_SUCCESS)
2666       return result;
2667 
2668    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2669    memcpy(map, data, size);
2670 
2671    *pvr_bo_out = suballoc_bo;
2672 
2673    return VK_SUCCESS;
2674 }
2675 
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2676 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2677                             const void *code,
2678                             size_t code_size,
2679                             uint64_t code_alignment,
2680                             struct pvr_suballoc_bo **const pvr_bo_out)
2681 {
2682    struct pvr_suballoc_bo *suballoc_bo = NULL;
2683    VkResult result;
2684    void *map;
2685 
2686    assert(code_size > 0);
2687 
2688    /* The USC will prefetch the next instruction, so over allocate by 1
2689     * instruction to prevent reading off the end of a page into a potentially
2690     * unallocated page.
2691     */
2692    result = pvr_bo_suballoc(&device->suballoc_usc,
2693                             code_size + ROGUE_MAX_INSTR_BYTES,
2694                             code_alignment,
2695                             false,
2696                             &suballoc_bo);
2697    if (result != VK_SUCCESS)
2698       return result;
2699 
2700    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2701    memcpy(map, code, code_size);
2702 
2703    *pvr_bo_out = suballoc_bo;
2704 
2705    return VK_SUCCESS;
2706 }
2707 
2708 /**
2709  * \brief Upload PDS program data and code segments from host memory to device
2710  * memory.
2711  *
2712  * \param[in] device            Logical device pointer.
2713  * \param[in] data              Pointer to PDS data segment to upload.
2714  * \param[in] data_size_dwords  Size of PDS data segment in dwords.
2715  * \param[in] data_alignment    Required alignment of the PDS data segment in
2716  *                              bytes. Must be a power of two.
2717  * \param[in] code              Pointer to PDS code segment to upload.
2718  * \param[in] code_size_dwords  Size of PDS code segment in dwords.
2719  * \param[in] code_alignment    Required alignment of the PDS code segment in
2720  *                              bytes. Must be a power of two.
2721  * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
2722  *                              program in bytes.
2723  * \param[out] pds_upload_out   On success will be initialized based on the
2724  *                              uploaded PDS program.
2725  * \return VK_SUCCESS on success, or error code otherwise.
2726  */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2727 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2728                             const uint32_t *data,
2729                             uint32_t data_size_dwords,
2730                             uint32_t data_alignment,
2731                             const uint32_t *code,
2732                             uint32_t code_size_dwords,
2733                             uint32_t code_alignment,
2734                             uint64_t min_alignment,
2735                             struct pvr_pds_upload *const pds_upload_out)
2736 {
2737    /* All alignment and sizes below are in bytes. */
2738    const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2739    const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2740    const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2741    const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2742    const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2743    const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2744    const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2745                                      : data_aligned_size;
2746    VkResult result;
2747    void *map;
2748 
2749    assert(code || data);
2750    assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2751    assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2752 
2753    result = pvr_bo_suballoc(&device->suballoc_pds,
2754                             bo_size,
2755                             bo_alignment,
2756                             true,
2757                             &pds_upload_out->pvr_bo);
2758    if (result != VK_SUCCESS)
2759       return result;
2760 
2761    map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2762 
2763    if (data) {
2764       memcpy(map, data, data_size);
2765 
2766       pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2767                                     device->heaps.pds_heap->base_addr.addr;
2768 
2769       /* Store data size in dwords. */
2770       assert(data_aligned_size % 4 == 0);
2771       pds_upload_out->data_size = data_aligned_size / 4;
2772    } else {
2773       pds_upload_out->data_offset = 0;
2774       pds_upload_out->data_size = 0;
2775    }
2776 
2777    if (code) {
2778       memcpy((uint8_t *)map + code_offset, code, code_size);
2779 
2780       pds_upload_out->code_offset =
2781          (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2782          device->heaps.pds_heap->base_addr.addr;
2783 
2784       /* Store code size in dwords. */
2785       assert(code_aligned_size % 4 == 0);
2786       pds_upload_out->code_size = code_aligned_size / 4;
2787    } else {
2788       pds_upload_out->code_offset = 0;
2789       pds_upload_out->code_size = 0;
2790    }
2791 
2792    return VK_SUCCESS;
2793 }
2794 
2795 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2796 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2797                                  struct pvr_framebuffer *framebuffer)
2798 {
2799    const uint32_t cache_line_size =
2800       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2801    uint32_t ppp_state[3];
2802    VkResult result;
2803 
2804    pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2805       header.pres_terminate = true;
2806    }
2807 
2808    pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2809       term0.clip_right =
2810          DIV_ROUND_UP(
2811             framebuffer->width,
2812             PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2813          1;
2814       term0.clip_bottom =
2815          DIV_ROUND_UP(
2816             framebuffer->height,
2817             PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2818          1;
2819    }
2820 
2821    pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2822       term1.render_target = 0;
2823       term1.clip_left = 0;
2824    }
2825 
2826    result = pvr_gpu_upload(device,
2827                            device->heaps.general_heap,
2828                            ppp_state,
2829                            sizeof(ppp_state),
2830                            cache_line_size,
2831                            &framebuffer->ppp_state_bo);
2832    if (result != VK_SUCCESS)
2833       return result;
2834 
2835    /* Calculate the size of PPP state in dwords. */
2836    framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2837 
2838    return VK_SUCCESS;
2839 }
2840 
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2841 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2842                                     uint32_t render_targets_count)
2843 {
2844    uint32_t i;
2845 
2846    for (i = 0; i < render_targets_count; i++) {
2847       if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2848          goto err_mutex_destroy;
2849    }
2850 
2851    return true;
2852 
2853 err_mutex_destroy:
2854    while (i--)
2855       pthread_mutex_destroy(&render_targets[i].mutex);
2856 
2857    return false;
2858 }
2859 
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2860 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2861                                     uint32_t render_targets_count)
2862 {
2863    for (uint32_t i = 0; i < render_targets_count; i++) {
2864       if (render_targets[i].valid) {
2865          pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2866          render_targets[i].valid = false;
2867       }
2868 
2869       pthread_mutex_destroy(&render_targets[i].mutex);
2870    }
2871 }
2872 
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2873 VkResult pvr_CreateFramebuffer(VkDevice _device,
2874                                const VkFramebufferCreateInfo *pCreateInfo,
2875                                const VkAllocationCallbacks *pAllocator,
2876                                VkFramebuffer *pFramebuffer)
2877 {
2878    PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2879    PVR_FROM_HANDLE(pvr_device, device, _device);
2880    struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2881    struct pvr_spm_eot_state *spm_eot_state_per_render;
2882    struct pvr_render_target *render_targets;
2883    struct pvr_framebuffer *framebuffer;
2884    struct pvr_image_view **attachments;
2885    uint32_t render_targets_count;
2886    uint64_t scratch_buffer_size;
2887    VkResult result;
2888 
2889    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2890 
2891    render_targets_count =
2892       PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2893 
2894    VK_MULTIALLOC(ma);
2895    vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2896    vk_multialloc_add(&ma,
2897                      &attachments,
2898                      __typeof__(*attachments),
2899                      pCreateInfo->attachmentCount);
2900    vk_multialloc_add(&ma,
2901                      &render_targets,
2902                      __typeof__(*render_targets),
2903                      render_targets_count);
2904    vk_multialloc_add(&ma,
2905                      &spm_eot_state_per_render,
2906                      __typeof__(*spm_eot_state_per_render),
2907                      pass->hw_setup->render_count);
2908    vk_multialloc_add(&ma,
2909                      &spm_bgobj_state_per_render,
2910                      __typeof__(*spm_bgobj_state_per_render),
2911                      pass->hw_setup->render_count);
2912 
2913    if (!vk_multialloc_zalloc2(&ma,
2914                               &device->vk.alloc,
2915                               pAllocator,
2916                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2917       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2918 
2919    vk_object_base_init(&device->vk,
2920                        &framebuffer->base,
2921                        VK_OBJECT_TYPE_FRAMEBUFFER);
2922 
2923    framebuffer->width = pCreateInfo->width;
2924    framebuffer->height = pCreateInfo->height;
2925    framebuffer->layers = pCreateInfo->layers;
2926 
2927    framebuffer->attachments = attachments;
2928    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2929    for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2930       framebuffer->attachments[i] =
2931          pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2932    }
2933 
2934    result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2935    if (result != VK_SUCCESS)
2936       goto err_free_framebuffer;
2937 
2938    framebuffer->render_targets = render_targets;
2939    framebuffer->render_targets_count = render_targets_count;
2940    if (!pvr_render_targets_init(framebuffer->render_targets,
2941                                 render_targets_count)) {
2942       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2943       goto err_free_ppp_state_bo;
2944    }
2945 
2946    scratch_buffer_size =
2947       pvr_spm_scratch_buffer_calc_required_size(pass,
2948                                                 framebuffer->width,
2949                                                 framebuffer->height);
2950 
2951    result = pvr_spm_scratch_buffer_get_buffer(device,
2952                                               scratch_buffer_size,
2953                                               &framebuffer->scratch_buffer);
2954    if (result != VK_SUCCESS)
2955       goto err_finish_render_targets;
2956 
2957    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2958       uint32_t emit_count;
2959 
2960       result = pvr_spm_init_eot_state(device,
2961                                       &spm_eot_state_per_render[i],
2962                                       framebuffer,
2963                                       &pass->hw_setup->renders[i],
2964                                       &emit_count);
2965       if (result != VK_SUCCESS)
2966          goto err_finish_eot_state;
2967 
2968       result = pvr_spm_init_bgobj_state(device,
2969                                         &spm_bgobj_state_per_render[i],
2970                                         framebuffer,
2971                                         &pass->hw_setup->renders[i],
2972                                         emit_count);
2973       if (result != VK_SUCCESS)
2974          goto err_finish_bgobj_state;
2975 
2976       continue;
2977 
2978 err_finish_bgobj_state:
2979       pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
2980 
2981       for (uint32_t j = 0; j < i; j++)
2982          pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
2983 
2984 err_finish_eot_state:
2985       for (uint32_t j = 0; j < i; j++)
2986          pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
2987 
2988       goto err_finish_render_targets;
2989    }
2990 
2991    framebuffer->render_count = pass->hw_setup->render_count;
2992    framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
2993    framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
2994 
2995    *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2996 
2997    return VK_SUCCESS;
2998 
2999 err_finish_render_targets:
3000    pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
3001 
3002 err_free_ppp_state_bo:
3003    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3004 
3005 err_free_framebuffer:
3006    vk_object_base_finish(&framebuffer->base);
3007    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3008 
3009    return result;
3010 }
3011 
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3012 void pvr_DestroyFramebuffer(VkDevice _device,
3013                             VkFramebuffer _fb,
3014                             const VkAllocationCallbacks *pAllocator)
3015 {
3016    PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3017    PVR_FROM_HANDLE(pvr_device, device, _device);
3018 
3019    if (!framebuffer)
3020       return;
3021 
3022    for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3023       pvr_spm_finish_bgobj_state(device,
3024                                  &framebuffer->spm_bgobj_state_per_render[i]);
3025 
3026       pvr_spm_finish_eot_state(device,
3027                                &framebuffer->spm_eot_state_per_render[i]);
3028    }
3029 
3030    pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3031    pvr_render_targets_fini(framebuffer->render_targets,
3032                            framebuffer->render_targets_count);
3033    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3034    vk_object_base_finish(&framebuffer->base);
3035    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3036 }
3037 
3038 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3039 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3040                                   VkFilter filter)
3041 {
3042    switch (filter) {
3043    case VK_FILTER_NEAREST:
3044       return PVRX(TEXSTATE_FILTER_POINT);
3045    case VK_FILTER_LINEAR:
3046       return PVRX(TEXSTATE_FILTER_LINEAR);
3047    default:
3048       unreachable("Unknown filter type.");
3049    }
3050 }
3051 
3052 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3053 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3054 {
3055    switch (addr_mode) {
3056    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3057       return PVRX(TEXSTATE_ADDRMODE_REPEAT);
3058    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3059       return PVRX(TEXSTATE_ADDRMODE_FLIP);
3060    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3061       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
3062    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3063       return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
3064    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3065       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
3066    default:
3067       unreachable("Invalid sampler address mode.");
3068    }
3069 }
3070 
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3071 VkResult pvr_CreateSampler(VkDevice _device,
3072                            const VkSamplerCreateInfo *pCreateInfo,
3073                            const VkAllocationCallbacks *pAllocator,
3074                            VkSampler *pSampler)
3075 {
3076    PVR_FROM_HANDLE(pvr_device, device, _device);
3077    uint32_t border_color_table_index;
3078    struct pvr_sampler *sampler;
3079    float lod_rounding_bias;
3080    VkFilter min_filter;
3081    VkFilter mag_filter;
3082    VkResult result;
3083    float min_lod;
3084    float max_lod;
3085 
3086    STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3087                  sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3088 
3089    sampler =
3090       vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3091    if (!sampler) {
3092       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3093       goto err_out;
3094    }
3095 
3096    mag_filter = pCreateInfo->magFilter;
3097    min_filter = pCreateInfo->minFilter;
3098 
3099    result =
3100       pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3101                                                  sampler,
3102                                                  &border_color_table_index);
3103    if (result != VK_SUCCESS)
3104       goto err_free_sampler;
3105 
3106    if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3107       /* The min/mag filters may need adjustment here, the GPU should decide
3108        * which of the two filters to use based on the clamped LOD value: LOD
3109        * <= 0 implies magnification, while LOD > 0 implies minification.
3110        *
3111        * As a workaround, we override magFilter with minFilter if we know that
3112        * the magnification filter will never be used due to clamping anyway
3113        * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3114        * if maxLod <= 0.
3115        */
3116       if (pCreateInfo->minLod > 0.0f) {
3117          /* The clamped LOD will always be positive => always minify. */
3118          mag_filter = pCreateInfo->minFilter;
3119       }
3120 
3121       if (pCreateInfo->maxLod <= 0.0f) {
3122          /* The clamped LOD will always be negative or zero => always
3123           * magnify.
3124           */
3125          min_filter = pCreateInfo->magFilter;
3126       }
3127    }
3128 
3129    if (pCreateInfo->compareEnable) {
3130       sampler->descriptor.data.compare_op =
3131          (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3132    } else {
3133       sampler->descriptor.data.compare_op =
3134          (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3135    }
3136 
3137    sampler->descriptor.data.word3 = 0;
3138    pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3139                  TEXSTATE_SAMPLER,
3140                  word) {
3141       const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3142       const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
3143                                   (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3144       const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
3145                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3146                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3147       const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
3148                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3149                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3150 
3151       word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3152       word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3153 
3154       if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3155          word.mipfilter = true;
3156 
3157       word.addrmode_u =
3158          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3159       word.addrmode_v =
3160          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3161       word.addrmode_w =
3162          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3163 
3164       /* TODO: Figure out defines for these. */
3165       if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
3166          sampler->descriptor.data.word3 |= 0x40000000;
3167 
3168       if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
3169          sampler->descriptor.data.word3 |= 0x20000000;
3170 
3171       /* The Vulkan 1.0.205 spec says:
3172        *
3173        *    The absolute value of mipLodBias must be less than or equal to
3174        *    VkPhysicalDeviceLimits::maxSamplerLodBias.
3175        */
3176       word.dadjust =
3177          PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
3178          util_signed_fixed(
3179             CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3180             PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3181 
3182       /* Anisotropy is not supported for now. */
3183       word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
3184 
3185       if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3186           pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3187          /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3188           * selected by adding 0.5 and then truncating the input LOD value.
3189           * This hardware adds the 0.5 bias before clamping against
3190           * lodmin/lodmax, while Vulkan specifies the bias to be added after
3191           * clamping. We compensate for this difference by adding the 0.5
3192           * bias to the LOD bounds, too.
3193           */
3194          lod_rounding_bias = 0.5f;
3195       } else {
3196          lod_rounding_bias = 0.0f;
3197       }
3198 
3199       min_lod = pCreateInfo->minLod + lod_rounding_bias;
3200       word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3201                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3202 
3203       max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3204       word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3205                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3206 
3207       word.bordercolor_index = border_color_table_index;
3208 
3209       if (pCreateInfo->unnormalizedCoordinates)
3210          word.non_normalized_coords = true;
3211    }
3212 
3213    *pSampler = pvr_sampler_to_handle(sampler);
3214 
3215    return VK_SUCCESS;
3216 
3217 err_free_sampler:
3218    vk_object_free(&device->vk, pAllocator, sampler);
3219 
3220 err_out:
3221    return result;
3222 }
3223 
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3224 void pvr_DestroySampler(VkDevice _device,
3225                         VkSampler _sampler,
3226                         const VkAllocationCallbacks *pAllocator)
3227 {
3228    PVR_FROM_HANDLE(pvr_device, device, _device);
3229    PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3230 
3231    if (!sampler)
3232       return;
3233 
3234    vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3235 }
3236 
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3237 void pvr_GetBufferMemoryRequirements2(
3238    VkDevice _device,
3239    const VkBufferMemoryRequirementsInfo2 *pInfo,
3240    VkMemoryRequirements2 *pMemoryRequirements)
3241 {
3242    PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3243    PVR_FROM_HANDLE(pvr_device, device, _device);
3244    uint64_t size;
3245 
3246    /* The Vulkan 1.0.166 spec says:
3247     *
3248     *    memoryTypeBits is a bitmask and contains one bit set for every
3249     *    supported memory type for the resource. Bit 'i' is set if and only
3250     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3251     *    structure for the physical device is supported for the resource.
3252     *
3253     * All types are currently supported for buffers.
3254     */
3255    pMemoryRequirements->memoryRequirements.memoryTypeBits =
3256       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3257 
3258    pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3259 
3260    size = buffer->vk.size;
3261 
3262    if (size % device->ws->page_size == 0 ||
3263        size % device->ws->page_size >
3264           device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3265       /* TODO: We can save memory by having one extra virtual page mapped
3266        * in and having the first and last virtual page mapped to the first
3267        * physical address.
3268        */
3269       size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3270    }
3271 
3272    pMemoryRequirements->memoryRequirements.size =
3273       ALIGN_POT(size, buffer->alignment);
3274 }
3275 
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3276 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3277                                      const VkImageMemoryRequirementsInfo2 *pInfo,
3278                                      VkMemoryRequirements2 *pMemoryRequirements)
3279 {
3280    PVR_FROM_HANDLE(pvr_device, device, _device);
3281    PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3282 
3283    /* The Vulkan 1.0.166 spec says:
3284     *
3285     *    memoryTypeBits is a bitmask and contains one bit set for every
3286     *    supported memory type for the resource. Bit 'i' is set if and only
3287     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3288     *    structure for the physical device is supported for the resource.
3289     *
3290     * All types are currently supported for images.
3291     */
3292    const uint32_t memory_types =
3293       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3294 
3295    /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3296     * in GetImageMemoryRequirements()), but this should be known at image
3297     * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3298     * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3299     * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3300     * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3301     *
3302     * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3303     * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3304     * render and compute jobs.
3305     */
3306    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3307    pMemoryRequirements->memoryRequirements.size =
3308       align64(image->size, image->alignment);
3309    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3310 }
3311