xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/panvk_vX_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_image.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "vk_cmd_enqueue_entrypoints.h"
13 #include "vk_common_entrypoints.h"
14 
15 #include "panvk_buffer.h"
16 #include "panvk_cmd_alloc.h"
17 #include "panvk_cmd_buffer.h"
18 #include "panvk_device.h"
19 #include "panvk_entrypoints.h"
20 #include "panvk_instance.h"
21 #include "panvk_macros.h"
22 #include "panvk_physical_device.h"
23 #include "panvk_priv_bo.h"
24 #include "panvk_queue.h"
25 
26 #include "genxml/decode.h"
27 #include "genxml/gen_macros.h"
28 
29 #include "kmod/pan_kmod.h"
30 #include "pan_props.h"
31 #include "pan_samples.h"
32 
33 static void *
panvk_kmod_zalloc(const struct pan_kmod_allocator * allocator,size_t size,bool transient)34 panvk_kmod_zalloc(const struct pan_kmod_allocator *allocator, size_t size,
35                   bool transient)
36 {
37    const VkAllocationCallbacks *vkalloc = allocator->priv;
38 
39    void *obj = vk_zalloc(vkalloc, size, 8,
40                          transient ? VK_SYSTEM_ALLOCATION_SCOPE_COMMAND
41                                    : VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
42 
43    /* We force errno to -ENOMEM on host allocation failures so we can properly
44     * report it back as VK_ERROR_OUT_OF_HOST_MEMORY. */
45    errno = obj ? 0 : -ENOMEM;
46 
47    return obj;
48 }
49 
50 static void
panvk_kmod_free(const struct pan_kmod_allocator * allocator,void * data)51 panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data)
52 {
53    const VkAllocationCallbacks *vkalloc = allocator->priv;
54 
55    return vk_free(vkalloc, data);
56 }
57 
58 static void
panvk_device_init_mempools(struct panvk_device * dev)59 panvk_device_init_mempools(struct panvk_device *dev)
60 {
61    struct panvk_pool_properties rw_pool_props = {
62       .create_flags = 0,
63       .slab_size = 16 * 1024,
64       .label = "Device RW cached memory pool",
65       .owns_bos = false,
66       .needs_locking = true,
67       .prealloc = false,
68    };
69 
70    panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props);
71 
72    struct panvk_pool_properties rw_nc_pool_props = {
73       .create_flags = PAN_KMOD_BO_FLAG_GPU_UNCACHED,
74       .slab_size = 16 * 1024,
75       .label = "Device RW uncached memory pool",
76       .owns_bos = false,
77       .needs_locking = true,
78       .prealloc = false,
79    };
80 
81    panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, &rw_nc_pool_props);
82 
83    struct panvk_pool_properties exec_pool_props = {
84       .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
85       .slab_size = 16 * 1024,
86       .label = "Device executable memory pool (shaders)",
87       .owns_bos = false,
88       .needs_locking = true,
89       .prealloc = false,
90    };
91 
92    panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props);
93 }
94 
95 static void
panvk_device_cleanup_mempools(struct panvk_device * dev)96 panvk_device_cleanup_mempools(struct panvk_device *dev)
97 {
98    panvk_pool_cleanup(&dev->mempools.rw);
99    panvk_pool_cleanup(&dev->mempools.exec);
100 }
101 
102 static VkResult
panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buf,void ** map_out)103 panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd,
104                                struct vk_meta_device *meta, VkBuffer buf,
105                                void **map_out)
106 {
107    VK_FROM_HANDLE(panvk_buffer, buffer, buf);
108    struct panvk_cmd_buffer *cmdbuf =
109       container_of(cmd, struct panvk_cmd_buffer, vk);
110    struct panfrost_ptr mem =
111       panvk_cmd_alloc_dev_mem(cmdbuf, desc, buffer->vk.size, 64);
112 
113    if (!mem.gpu)
114       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
115 
116    buffer->dev_addr = mem.gpu;
117    *map_out = mem.cpu;
118    return VK_SUCCESS;
119 }
120 
121 static VkResult
panvk_meta_init(struct panvk_device * device)122 panvk_meta_init(struct panvk_device *device)
123 {
124    const struct vk_physical_device *pdev = device->vk.physical;
125 
126    VkResult result = vk_meta_device_init(&device->vk, &device->meta);
127    if (result != VK_SUCCESS)
128       return result;
129 
130    device->meta.use_stencil_export = true;
131    device->meta.max_bind_map_buffer_size_B = 64 * 1024;
132    device->meta.cmd_bind_map_buffer = panvk_meta_cmd_bind_map_buffer;
133 
134    /* Assume a maximum of 1024 bytes per worgroup and choose the workgroup size
135     * accordingly. */
136    for (uint32_t i = 0;
137         i < ARRAY_SIZE(device->meta.buffer_access.optimal_wg_size); i++) {
138       device->meta.buffer_access.optimal_wg_size[i] =
139          MIN2(1024 >> i, pdev->properties.maxComputeWorkGroupSize[0]);
140    }
141 
142 #if PAN_ARCH <= 7
143    panvk_per_arch(meta_desc_copy_init)(device);
144 #endif
145 
146    return VK_SUCCESS;
147 }
148 
149 static void
panvk_meta_cleanup(struct panvk_device * device)150 panvk_meta_cleanup(struct panvk_device *device)
151 {
152 #if PAN_ARCH <= 7
153    panvk_per_arch(meta_desc_copy_cleanup)(device);
154 #endif
155 
156    vk_meta_device_finish(&device->vk, &device->meta);
157 }
158 
159 static void
panvk_preload_blitter_init(struct panvk_device * device)160 panvk_preload_blitter_init(struct panvk_device *device)
161 {
162    const struct panvk_physical_device *physical_device =
163       to_panvk_physical_device(device->vk.physical);
164 
165    struct panvk_pool_properties bin_pool_props = {
166       .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
167       .slab_size = 16 * 1024,
168       .label = "panvk_meta blitter binary pool",
169       .owns_bos = true,
170       .needs_locking = false,
171       .prealloc = false,
172    };
173    panvk_pool_init(&device->blitter.bin_pool, device, NULL, &bin_pool_props);
174 
175    struct panvk_pool_properties desc_pool_props = {
176       .create_flags = 0,
177       .slab_size = 16 * 1024,
178       .label = "panvk_meta blitter descriptor pool",
179       .owns_bos = true,
180       .needs_locking = false,
181       .prealloc = false,
182    };
183    panvk_pool_init(&device->blitter.desc_pool, device, NULL, &desc_pool_props);
184 
185    pan_blend_shader_cache_init(&device->blitter.blend_shader_cache,
186                                physical_device->kmod.props.gpu_prod_id);
187    GENX(pan_blitter_cache_init)
188    (&device->blitter.cache, physical_device->kmod.props.gpu_prod_id,
189     &device->blitter.blend_shader_cache, &device->blitter.bin_pool.base,
190     &device->blitter.desc_pool.base);
191 }
192 
193 static void
panvk_preload_blitter_cleanup(struct panvk_device * device)194 panvk_preload_blitter_cleanup(struct panvk_device *device)
195 {
196    GENX(pan_blitter_cache_cleanup)(&device->blitter.cache);
197    pan_blend_shader_cache_cleanup(&device->blitter.blend_shader_cache);
198    panvk_pool_cleanup(&device->blitter.desc_pool);
199    panvk_pool_cleanup(&device->blitter.bin_pool);
200 }
201 
202 /* Always reserve the lower 32MB. */
203 #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull
204 
205 VkResult
panvk_per_arch(create_device)206 panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
207                               const VkDeviceCreateInfo *pCreateInfo,
208                               const VkAllocationCallbacks *pAllocator,
209                               VkDevice *pDevice)
210 {
211    struct panvk_instance *instance =
212       to_panvk_instance(physical_device->vk.instance);
213    VkResult result;
214    struct panvk_device *device;
215 
216    device = vk_zalloc2(&instance->vk.alloc, pAllocator, sizeof(*device), 8,
217                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
218    if (!device)
219       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
220 
221    struct vk_device_dispatch_table dispatch_table;
222 
223    /* For secondary command buffer support, overwrite any command entrypoints
224     * in the main device-level dispatch table with
225     * vk_cmd_enqueue_unless_primary_Cmd*.
226     */
227    vk_device_dispatch_table_from_entrypoints(
228       &dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
229 
230    vk_device_dispatch_table_from_entrypoints(
231       &dispatch_table, &panvk_per_arch(device_entrypoints), false);
232    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
233                                              &panvk_device_entrypoints, false);
234    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
235                                              &wsi_device_entrypoints, false);
236 
237    /* Populate our primary cmd_dispatch table. */
238    vk_device_dispatch_table_from_entrypoints(
239       &device->cmd_dispatch, &panvk_per_arch(device_entrypoints), true);
240    vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
241                                              &panvk_device_entrypoints, false);
242    vk_device_dispatch_table_from_entrypoints(
243       &device->cmd_dispatch, &vk_common_device_entrypoints, false);
244 
245    result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
246                            pCreateInfo, pAllocator);
247    if (result != VK_SUCCESS)
248       goto err_free_dev;
249 
250    /* Must be done after vk_device_init() because this function memset(0) the
251     * whole struct.
252     */
253    device->vk.command_dispatch_table = &device->cmd_dispatch;
254    device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
255    device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
256    device->vk.submit_mode = VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
257 
258    device->kmod.allocator = (struct pan_kmod_allocator){
259       .zalloc = panvk_kmod_zalloc,
260       .free = panvk_kmod_free,
261       .priv = &device->vk.alloc,
262    };
263    device->kmod.dev =
264       pan_kmod_dev_create(dup(physical_device->kmod.dev->fd),
265                           PAN_KMOD_DEV_FLAG_OWNS_FD, &device->kmod.allocator);
266 
267    if (!device->kmod.dev) {
268       result = vk_errorf(instance, panvk_errno_to_vk_error(), "cannot create device");
269       goto err_finish_dev;
270    }
271 
272    if (instance->debug_flags &
273        (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC | PANVK_DEBUG_DUMP))
274       device->debug.decode_ctx = pandecode_create_context(false);
275 
276    /* 32bit address space, with the lower 32MB reserved. We clamp
277     * things so it matches kmod VA range limitations.
278     */
279    uint64_t user_va_start = panfrost_clamp_to_usable_va_range(
280       device->kmod.dev, PANVK_VA_RESERVE_BOTTOM);
281    uint64_t user_va_end =
282       panfrost_clamp_to_usable_va_range(device->kmod.dev, 1ull << 32);
283    uint32_t vm_flags = PAN_ARCH <= 7 ? PAN_KMOD_VM_FLAG_AUTO_VA : 0;
284 
285    util_vma_heap_init(&device->as.heap, user_va_start,
286                       user_va_end - user_va_start);
287 
288    device->kmod.vm =
289       pan_kmod_vm_create(device->kmod.dev, vm_flags,
290                          user_va_start, user_va_end - user_va_start);
291 
292    if (!device->kmod.vm) {
293       result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
294       goto err_destroy_kdev;
295    }
296 
297    panvk_device_init_mempools(device);
298 
299 #if PAN_ARCH <= 9
300    device->tiler_heap = panvk_priv_bo_create(
301       device, 128 * 1024 * 1024,
302       PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT,
303       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
304 
305    if (!device->tiler_heap) {
306       result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
307       goto err_free_priv_bos;
308    }
309 #endif
310 
311    device->sample_positions =
312       panvk_priv_bo_create(device, panfrost_sample_positions_buffer_size(), 0,
313                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
314 
315    if (!device->sample_positions) {
316       result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
317       goto err_free_priv_bos;
318    }
319 
320    panfrost_upload_sample_positions(device->sample_positions->addr.host);
321 
322    vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
323 
324    result = panvk_per_arch(blend_shader_cache_init)(device);
325 
326    if (result != VK_SUCCESS)
327       goto err_free_priv_bos;
328 
329    panvk_preload_blitter_init(device);
330 
331    result = panvk_meta_init(device);
332    if (result != VK_SUCCESS)
333       goto err_cleanup_blitter;
334 
335    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
336       const VkDeviceQueueCreateInfo *queue_create =
337          &pCreateInfo->pQueueCreateInfos[i];
338       uint32_t qfi = queue_create->queueFamilyIndex;
339       device->queues[qfi] =
340          vk_alloc(&device->vk.alloc,
341                   queue_create->queueCount * sizeof(struct panvk_queue), 8,
342                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
343       if (!device->queues[qfi]) {
344          result = VK_ERROR_OUT_OF_HOST_MEMORY;
345          goto err_finish_queues;
346       }
347 
348       memset(device->queues[qfi], 0,
349              queue_create->queueCount * sizeof(struct panvk_queue));
350 
351       device->queue_count[qfi] = queue_create->queueCount;
352 
353       for (unsigned q = 0; q < queue_create->queueCount; q++) {
354          result = panvk_per_arch(queue_init)(device, &device->queues[qfi][q], q,
355                                              queue_create);
356          if (result != VK_SUCCESS)
357             goto err_finish_queues;
358       }
359    }
360 
361    *pDevice = panvk_device_to_handle(device);
362    return VK_SUCCESS;
363 
364 err_finish_queues:
365    for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
366       for (unsigned q = 0; q < device->queue_count[i]; q++)
367          panvk_per_arch(queue_finish)(&device->queues[i][q]);
368       if (device->queue_count[i])
369          vk_object_free(&device->vk, NULL, device->queues[i]);
370    }
371 
372    panvk_meta_cleanup(device);
373 
374 err_cleanup_blitter:
375    panvk_preload_blitter_cleanup(device);
376    panvk_per_arch(blend_shader_cache_cleanup)(device);
377 
378 err_free_priv_bos:
379    panvk_priv_bo_unref(device->sample_positions);
380    panvk_priv_bo_unref(device->tiler_heap);
381    panvk_device_cleanup_mempools(device);
382    pan_kmod_vm_destroy(device->kmod.vm);
383    util_vma_heap_finish(&device->as.heap);
384 
385 err_destroy_kdev:
386    pan_kmod_dev_destroy(device->kmod.dev);
387 
388 err_finish_dev:
389    vk_device_finish(&device->vk);
390 
391 err_free_dev:
392    vk_free(&device->vk.alloc, device);
393    return result;
394 }
395 
396 void
panvk_per_arch(destroy_device)397 panvk_per_arch(destroy_device)(struct panvk_device *device,
398                                const VkAllocationCallbacks *pAllocator)
399 {
400    if (!device)
401       return;
402 
403    for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
404       for (unsigned q = 0; q < device->queue_count[i]; q++)
405          panvk_per_arch(queue_finish)(&device->queues[i][q]);
406       if (device->queue_count[i])
407          vk_object_free(&device->vk, NULL, device->queues[i]);
408    }
409 
410    panvk_meta_cleanup(device);
411    panvk_preload_blitter_cleanup(device);
412    panvk_per_arch(blend_shader_cache_cleanup)(device);
413    panvk_priv_bo_unref(device->tiler_heap);
414    panvk_priv_bo_unref(device->sample_positions);
415    panvk_device_cleanup_mempools(device);
416    pan_kmod_vm_destroy(device->kmod.vm);
417    util_vma_heap_finish(&device->as.heap);
418 
419    if (device->debug.decode_ctx)
420       pandecode_destroy_context(device->debug.decode_ctx);
421 
422    pan_kmod_dev_destroy(device->kmod.dev);
423    vk_device_finish(&device->vk);
424    vk_free(&device->vk.alloc, device);
425 }
426 
427 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderAreaGranularity)428 panvk_per_arch(GetRenderAreaGranularity)(VkDevice device,
429                                          VkRenderPass renderPass,
430                                          VkExtent2D *pGranularity)
431 {
432    *pGranularity = (VkExtent2D){32, 32};
433 }
434 
435 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderingAreaGranularityKHR)436 panvk_per_arch(GetRenderingAreaGranularityKHR)(
437    VkDevice _device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
438    VkExtent2D *pGranularity)
439 {
440    *pGranularity = (VkExtent2D){32, 32};
441 }
442