1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_image.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * SPDX-License-Identifier: MIT
10 */
11
12 #include "vk_cmd_enqueue_entrypoints.h"
13 #include "vk_common_entrypoints.h"
14
15 #include "panvk_buffer.h"
16 #include "panvk_cmd_alloc.h"
17 #include "panvk_cmd_buffer.h"
18 #include "panvk_device.h"
19 #include "panvk_entrypoints.h"
20 #include "panvk_instance.h"
21 #include "panvk_macros.h"
22 #include "panvk_physical_device.h"
23 #include "panvk_priv_bo.h"
24 #include "panvk_queue.h"
25
26 #include "genxml/decode.h"
27 #include "genxml/gen_macros.h"
28
29 #include "kmod/pan_kmod.h"
30 #include "pan_props.h"
31 #include "pan_samples.h"
32
33 static void *
panvk_kmod_zalloc(const struct pan_kmod_allocator * allocator,size_t size,bool transient)34 panvk_kmod_zalloc(const struct pan_kmod_allocator *allocator, size_t size,
35 bool transient)
36 {
37 const VkAllocationCallbacks *vkalloc = allocator->priv;
38
39 void *obj = vk_zalloc(vkalloc, size, 8,
40 transient ? VK_SYSTEM_ALLOCATION_SCOPE_COMMAND
41 : VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
42
43 /* We force errno to -ENOMEM on host allocation failures so we can properly
44 * report it back as VK_ERROR_OUT_OF_HOST_MEMORY. */
45 errno = obj ? 0 : -ENOMEM;
46
47 return obj;
48 }
49
50 static void
panvk_kmod_free(const struct pan_kmod_allocator * allocator,void * data)51 panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data)
52 {
53 const VkAllocationCallbacks *vkalloc = allocator->priv;
54
55 return vk_free(vkalloc, data);
56 }
57
58 static void
panvk_device_init_mempools(struct panvk_device * dev)59 panvk_device_init_mempools(struct panvk_device *dev)
60 {
61 struct panvk_pool_properties rw_pool_props = {
62 .create_flags = 0,
63 .slab_size = 16 * 1024,
64 .label = "Device RW cached memory pool",
65 .owns_bos = false,
66 .needs_locking = true,
67 .prealloc = false,
68 };
69
70 panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props);
71
72 struct panvk_pool_properties rw_nc_pool_props = {
73 .create_flags = PAN_KMOD_BO_FLAG_GPU_UNCACHED,
74 .slab_size = 16 * 1024,
75 .label = "Device RW uncached memory pool",
76 .owns_bos = false,
77 .needs_locking = true,
78 .prealloc = false,
79 };
80
81 panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, &rw_nc_pool_props);
82
83 struct panvk_pool_properties exec_pool_props = {
84 .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
85 .slab_size = 16 * 1024,
86 .label = "Device executable memory pool (shaders)",
87 .owns_bos = false,
88 .needs_locking = true,
89 .prealloc = false,
90 };
91
92 panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props);
93 }
94
95 static void
panvk_device_cleanup_mempools(struct panvk_device * dev)96 panvk_device_cleanup_mempools(struct panvk_device *dev)
97 {
98 panvk_pool_cleanup(&dev->mempools.rw);
99 panvk_pool_cleanup(&dev->mempools.exec);
100 }
101
102 static VkResult
panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer * cmd,struct vk_meta_device * meta,VkBuffer buf,void ** map_out)103 panvk_meta_cmd_bind_map_buffer(struct vk_command_buffer *cmd,
104 struct vk_meta_device *meta, VkBuffer buf,
105 void **map_out)
106 {
107 VK_FROM_HANDLE(panvk_buffer, buffer, buf);
108 struct panvk_cmd_buffer *cmdbuf =
109 container_of(cmd, struct panvk_cmd_buffer, vk);
110 struct panfrost_ptr mem =
111 panvk_cmd_alloc_dev_mem(cmdbuf, desc, buffer->vk.size, 64);
112
113 if (!mem.gpu)
114 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
115
116 buffer->dev_addr = mem.gpu;
117 *map_out = mem.cpu;
118 return VK_SUCCESS;
119 }
120
121 static VkResult
panvk_meta_init(struct panvk_device * device)122 panvk_meta_init(struct panvk_device *device)
123 {
124 const struct vk_physical_device *pdev = device->vk.physical;
125
126 VkResult result = vk_meta_device_init(&device->vk, &device->meta);
127 if (result != VK_SUCCESS)
128 return result;
129
130 device->meta.use_stencil_export = true;
131 device->meta.max_bind_map_buffer_size_B = 64 * 1024;
132 device->meta.cmd_bind_map_buffer = panvk_meta_cmd_bind_map_buffer;
133
134 /* Assume a maximum of 1024 bytes per worgroup and choose the workgroup size
135 * accordingly. */
136 for (uint32_t i = 0;
137 i < ARRAY_SIZE(device->meta.buffer_access.optimal_wg_size); i++) {
138 device->meta.buffer_access.optimal_wg_size[i] =
139 MIN2(1024 >> i, pdev->properties.maxComputeWorkGroupSize[0]);
140 }
141
142 #if PAN_ARCH <= 7
143 panvk_per_arch(meta_desc_copy_init)(device);
144 #endif
145
146 return VK_SUCCESS;
147 }
148
149 static void
panvk_meta_cleanup(struct panvk_device * device)150 panvk_meta_cleanup(struct panvk_device *device)
151 {
152 #if PAN_ARCH <= 7
153 panvk_per_arch(meta_desc_copy_cleanup)(device);
154 #endif
155
156 vk_meta_device_finish(&device->vk, &device->meta);
157 }
158
159 static void
panvk_preload_blitter_init(struct panvk_device * device)160 panvk_preload_blitter_init(struct panvk_device *device)
161 {
162 const struct panvk_physical_device *physical_device =
163 to_panvk_physical_device(device->vk.physical);
164
165 struct panvk_pool_properties bin_pool_props = {
166 .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
167 .slab_size = 16 * 1024,
168 .label = "panvk_meta blitter binary pool",
169 .owns_bos = true,
170 .needs_locking = false,
171 .prealloc = false,
172 };
173 panvk_pool_init(&device->blitter.bin_pool, device, NULL, &bin_pool_props);
174
175 struct panvk_pool_properties desc_pool_props = {
176 .create_flags = 0,
177 .slab_size = 16 * 1024,
178 .label = "panvk_meta blitter descriptor pool",
179 .owns_bos = true,
180 .needs_locking = false,
181 .prealloc = false,
182 };
183 panvk_pool_init(&device->blitter.desc_pool, device, NULL, &desc_pool_props);
184
185 pan_blend_shader_cache_init(&device->blitter.blend_shader_cache,
186 physical_device->kmod.props.gpu_prod_id);
187 GENX(pan_blitter_cache_init)
188 (&device->blitter.cache, physical_device->kmod.props.gpu_prod_id,
189 &device->blitter.blend_shader_cache, &device->blitter.bin_pool.base,
190 &device->blitter.desc_pool.base);
191 }
192
193 static void
panvk_preload_blitter_cleanup(struct panvk_device * device)194 panvk_preload_blitter_cleanup(struct panvk_device *device)
195 {
196 GENX(pan_blitter_cache_cleanup)(&device->blitter.cache);
197 pan_blend_shader_cache_cleanup(&device->blitter.blend_shader_cache);
198 panvk_pool_cleanup(&device->blitter.desc_pool);
199 panvk_pool_cleanup(&device->blitter.bin_pool);
200 }
201
202 /* Always reserve the lower 32MB. */
203 #define PANVK_VA_RESERVE_BOTTOM 0x2000000ull
204
205 VkResult
panvk_per_arch(create_device)206 panvk_per_arch(create_device)(struct panvk_physical_device *physical_device,
207 const VkDeviceCreateInfo *pCreateInfo,
208 const VkAllocationCallbacks *pAllocator,
209 VkDevice *pDevice)
210 {
211 struct panvk_instance *instance =
212 to_panvk_instance(physical_device->vk.instance);
213 VkResult result;
214 struct panvk_device *device;
215
216 device = vk_zalloc2(&instance->vk.alloc, pAllocator, sizeof(*device), 8,
217 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
218 if (!device)
219 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
220
221 struct vk_device_dispatch_table dispatch_table;
222
223 /* For secondary command buffer support, overwrite any command entrypoints
224 * in the main device-level dispatch table with
225 * vk_cmd_enqueue_unless_primary_Cmd*.
226 */
227 vk_device_dispatch_table_from_entrypoints(
228 &dispatch_table, &vk_cmd_enqueue_unless_primary_device_entrypoints, true);
229
230 vk_device_dispatch_table_from_entrypoints(
231 &dispatch_table, &panvk_per_arch(device_entrypoints), false);
232 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
233 &panvk_device_entrypoints, false);
234 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
235 &wsi_device_entrypoints, false);
236
237 /* Populate our primary cmd_dispatch table. */
238 vk_device_dispatch_table_from_entrypoints(
239 &device->cmd_dispatch, &panvk_per_arch(device_entrypoints), true);
240 vk_device_dispatch_table_from_entrypoints(&device->cmd_dispatch,
241 &panvk_device_entrypoints, false);
242 vk_device_dispatch_table_from_entrypoints(
243 &device->cmd_dispatch, &vk_common_device_entrypoints, false);
244
245 result = vk_device_init(&device->vk, &physical_device->vk, &dispatch_table,
246 pCreateInfo, pAllocator);
247 if (result != VK_SUCCESS)
248 goto err_free_dev;
249
250 /* Must be done after vk_device_init() because this function memset(0) the
251 * whole struct.
252 */
253 device->vk.command_dispatch_table = &device->cmd_dispatch;
254 device->vk.command_buffer_ops = &panvk_per_arch(cmd_buffer_ops);
255 device->vk.shader_ops = &panvk_per_arch(device_shader_ops);
256 device->vk.submit_mode = VK_QUEUE_SUBMIT_MODE_THREADED_ON_DEMAND;
257
258 device->kmod.allocator = (struct pan_kmod_allocator){
259 .zalloc = panvk_kmod_zalloc,
260 .free = panvk_kmod_free,
261 .priv = &device->vk.alloc,
262 };
263 device->kmod.dev =
264 pan_kmod_dev_create(dup(physical_device->kmod.dev->fd),
265 PAN_KMOD_DEV_FLAG_OWNS_FD, &device->kmod.allocator);
266
267 if (!device->kmod.dev) {
268 result = vk_errorf(instance, panvk_errno_to_vk_error(), "cannot create device");
269 goto err_finish_dev;
270 }
271
272 if (instance->debug_flags &
273 (PANVK_DEBUG_TRACE | PANVK_DEBUG_SYNC | PANVK_DEBUG_DUMP))
274 device->debug.decode_ctx = pandecode_create_context(false);
275
276 /* 32bit address space, with the lower 32MB reserved. We clamp
277 * things so it matches kmod VA range limitations.
278 */
279 uint64_t user_va_start = panfrost_clamp_to_usable_va_range(
280 device->kmod.dev, PANVK_VA_RESERVE_BOTTOM);
281 uint64_t user_va_end =
282 panfrost_clamp_to_usable_va_range(device->kmod.dev, 1ull << 32);
283 uint32_t vm_flags = PAN_ARCH <= 7 ? PAN_KMOD_VM_FLAG_AUTO_VA : 0;
284
285 util_vma_heap_init(&device->as.heap, user_va_start,
286 user_va_end - user_va_start);
287
288 device->kmod.vm =
289 pan_kmod_vm_create(device->kmod.dev, vm_flags,
290 user_va_start, user_va_end - user_va_start);
291
292 if (!device->kmod.vm) {
293 result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
294 goto err_destroy_kdev;
295 }
296
297 panvk_device_init_mempools(device);
298
299 #if PAN_ARCH <= 9
300 device->tiler_heap = panvk_priv_bo_create(
301 device, 128 * 1024 * 1024,
302 PAN_KMOD_BO_FLAG_NO_MMAP | PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT,
303 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
304
305 if (!device->tiler_heap) {
306 result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
307 goto err_free_priv_bos;
308 }
309 #endif
310
311 device->sample_positions =
312 panvk_priv_bo_create(device, panfrost_sample_positions_buffer_size(), 0,
313 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
314
315 if (!device->sample_positions) {
316 result = vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
317 goto err_free_priv_bos;
318 }
319
320 panfrost_upload_sample_positions(device->sample_positions->addr.host);
321
322 vk_device_set_drm_fd(&device->vk, device->kmod.dev->fd);
323
324 result = panvk_per_arch(blend_shader_cache_init)(device);
325
326 if (result != VK_SUCCESS)
327 goto err_free_priv_bos;
328
329 panvk_preload_blitter_init(device);
330
331 result = panvk_meta_init(device);
332 if (result != VK_SUCCESS)
333 goto err_cleanup_blitter;
334
335 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
336 const VkDeviceQueueCreateInfo *queue_create =
337 &pCreateInfo->pQueueCreateInfos[i];
338 uint32_t qfi = queue_create->queueFamilyIndex;
339 device->queues[qfi] =
340 vk_alloc(&device->vk.alloc,
341 queue_create->queueCount * sizeof(struct panvk_queue), 8,
342 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
343 if (!device->queues[qfi]) {
344 result = VK_ERROR_OUT_OF_HOST_MEMORY;
345 goto err_finish_queues;
346 }
347
348 memset(device->queues[qfi], 0,
349 queue_create->queueCount * sizeof(struct panvk_queue));
350
351 device->queue_count[qfi] = queue_create->queueCount;
352
353 for (unsigned q = 0; q < queue_create->queueCount; q++) {
354 result = panvk_per_arch(queue_init)(device, &device->queues[qfi][q], q,
355 queue_create);
356 if (result != VK_SUCCESS)
357 goto err_finish_queues;
358 }
359 }
360
361 *pDevice = panvk_device_to_handle(device);
362 return VK_SUCCESS;
363
364 err_finish_queues:
365 for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
366 for (unsigned q = 0; q < device->queue_count[i]; q++)
367 panvk_per_arch(queue_finish)(&device->queues[i][q]);
368 if (device->queue_count[i])
369 vk_object_free(&device->vk, NULL, device->queues[i]);
370 }
371
372 panvk_meta_cleanup(device);
373
374 err_cleanup_blitter:
375 panvk_preload_blitter_cleanup(device);
376 panvk_per_arch(blend_shader_cache_cleanup)(device);
377
378 err_free_priv_bos:
379 panvk_priv_bo_unref(device->sample_positions);
380 panvk_priv_bo_unref(device->tiler_heap);
381 panvk_device_cleanup_mempools(device);
382 pan_kmod_vm_destroy(device->kmod.vm);
383 util_vma_heap_finish(&device->as.heap);
384
385 err_destroy_kdev:
386 pan_kmod_dev_destroy(device->kmod.dev);
387
388 err_finish_dev:
389 vk_device_finish(&device->vk);
390
391 err_free_dev:
392 vk_free(&device->vk.alloc, device);
393 return result;
394 }
395
396 void
panvk_per_arch(destroy_device)397 panvk_per_arch(destroy_device)(struct panvk_device *device,
398 const VkAllocationCallbacks *pAllocator)
399 {
400 if (!device)
401 return;
402
403 for (unsigned i = 0; i < PANVK_MAX_QUEUE_FAMILIES; i++) {
404 for (unsigned q = 0; q < device->queue_count[i]; q++)
405 panvk_per_arch(queue_finish)(&device->queues[i][q]);
406 if (device->queue_count[i])
407 vk_object_free(&device->vk, NULL, device->queues[i]);
408 }
409
410 panvk_meta_cleanup(device);
411 panvk_preload_blitter_cleanup(device);
412 panvk_per_arch(blend_shader_cache_cleanup)(device);
413 panvk_priv_bo_unref(device->tiler_heap);
414 panvk_priv_bo_unref(device->sample_positions);
415 panvk_device_cleanup_mempools(device);
416 pan_kmod_vm_destroy(device->kmod.vm);
417 util_vma_heap_finish(&device->as.heap);
418
419 if (device->debug.decode_ctx)
420 pandecode_destroy_context(device->debug.decode_ctx);
421
422 pan_kmod_dev_destroy(device->kmod.dev);
423 vk_device_finish(&device->vk);
424 vk_free(&device->vk.alloc, device);
425 }
426
427 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderAreaGranularity)428 panvk_per_arch(GetRenderAreaGranularity)(VkDevice device,
429 VkRenderPass renderPass,
430 VkExtent2D *pGranularity)
431 {
432 *pGranularity = (VkExtent2D){32, 32};
433 }
434
435 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(GetRenderingAreaGranularityKHR)436 panvk_per_arch(GetRenderingAreaGranularityKHR)(
437 VkDevice _device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
438 VkExtent2D *pGranularity)
439 {
440 *pGranularity = (VkExtent2D){32, 32};
441 }
442