1 // SPDX-License-Identifier: GPL-2.0 or MIT
2 /* Copyright 2023 Collabora ltd. */
3
4 #include <linux/iosys-map.h>
5 #include <linux/rwsem.h>
6
7 #include <drm/panthor_drm.h>
8
9 #include "panthor_device.h"
10 #include "panthor_gem.h"
11 #include "panthor_heap.h"
12 #include "panthor_mmu.h"
13 #include "panthor_regs.h"
14
15 /*
16 * The GPU heap context is an opaque structure used by the GPU to track the
17 * heap allocations. The driver should only touch it to initialize it (zero all
18 * fields). Because the CPU and GPU can both access this structure it is
19 * required to be GPU cache line aligned.
20 */
21 #define HEAP_CONTEXT_SIZE 32
22
23 /**
24 * struct panthor_heap_chunk_header - Heap chunk header
25 */
26 struct panthor_heap_chunk_header {
27 /**
28 * @next: Next heap chunk in the list.
29 *
30 * This is a GPU VA.
31 */
32 u64 next;
33
34 /** @unknown: MBZ. */
35 u32 unknown[14];
36 };
37
38 /**
39 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
40 */
41 struct panthor_heap_chunk {
42 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
43 struct list_head node;
44
45 /** @bo: Buffer object backing the heap chunk. */
46 struct panthor_kernel_bo *bo;
47 };
48
49 /**
50 * struct panthor_heap - Structure used to manage tiler heap contexts.
51 */
52 struct panthor_heap {
53 /** @chunks: List containing all heap chunks allocated so far. */
54 struct list_head chunks;
55
56 /** @lock: Lock protecting insertion in the chunks list. */
57 struct mutex lock;
58
59 /** @chunk_size: Size of each chunk. */
60 u32 chunk_size;
61
62 /** @max_chunks: Maximum number of chunks. */
63 u32 max_chunks;
64
65 /**
66 * @target_in_flight: Number of in-flight render passes after which
67 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
68 */
69 u32 target_in_flight;
70
71 /** @chunk_count: Number of heap chunks currently allocated. */
72 u32 chunk_count;
73 };
74
75 #define MAX_HEAPS_PER_POOL 128
76
77 /**
78 * struct panthor_heap_pool - Pool of heap contexts
79 *
80 * The pool is attached to a panthor_file and can't be shared across processes.
81 */
82 struct panthor_heap_pool {
83 /** @refcount: Reference count. */
84 struct kref refcount;
85
86 /** @ptdev: Device. */
87 struct panthor_device *ptdev;
88
89 /** @vm: VM this pool is bound to. */
90 struct panthor_vm *vm;
91
92 /** @lock: Lock protecting access to @xa. */
93 struct rw_semaphore lock;
94
95 /** @xa: Array storing panthor_heap objects. */
96 struct xarray xa;
97
98 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
99 struct panthor_kernel_bo *gpu_contexts;
100
101 /** @size: Size of all chunks across all heaps in the pool. */
102 atomic_t size;
103 };
104
panthor_heap_ctx_stride(struct panthor_device * ptdev)105 static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
106 {
107 u32 l2_features = ptdev->gpu_info.l2_features;
108 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
109
110 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
111 }
112
panthor_get_heap_ctx_offset(struct panthor_heap_pool * pool,int id)113 static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
114 {
115 return panthor_heap_ctx_stride(pool->ptdev) * id;
116 }
117
panthor_get_heap_ctx(struct panthor_heap_pool * pool,int id)118 static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
119 {
120 return pool->gpu_contexts->kmap +
121 panthor_get_heap_ctx_offset(pool, id);
122 }
123
panthor_free_heap_chunk(struct panthor_heap_pool * pool,struct panthor_heap * heap,struct panthor_heap_chunk * chunk)124 static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
125 struct panthor_heap *heap,
126 struct panthor_heap_chunk *chunk)
127 {
128 mutex_lock(&heap->lock);
129 list_del(&chunk->node);
130 heap->chunk_count--;
131 mutex_unlock(&heap->lock);
132
133 atomic_sub(heap->chunk_size, &pool->size);
134
135 panthor_kernel_bo_destroy(chunk->bo);
136 kfree(chunk);
137 }
138
panthor_alloc_heap_chunk(struct panthor_heap_pool * pool,struct panthor_heap * heap,bool initial_chunk)139 static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
140 struct panthor_heap *heap,
141 bool initial_chunk)
142 {
143 struct panthor_heap_chunk *chunk;
144 struct panthor_heap_chunk_header *hdr;
145 int ret;
146
147 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
148 if (!chunk)
149 return -ENOMEM;
150
151 chunk->bo = panthor_kernel_bo_create(pool->ptdev, pool->vm, heap->chunk_size,
152 DRM_PANTHOR_BO_NO_MMAP,
153 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
154 PANTHOR_VM_KERNEL_AUTO_VA);
155 if (IS_ERR(chunk->bo)) {
156 ret = PTR_ERR(chunk->bo);
157 goto err_free_chunk;
158 }
159
160 ret = panthor_kernel_bo_vmap(chunk->bo);
161 if (ret)
162 goto err_destroy_bo;
163
164 hdr = chunk->bo->kmap;
165 memset(hdr, 0, sizeof(*hdr));
166
167 if (initial_chunk && !list_empty(&heap->chunks)) {
168 struct panthor_heap_chunk *prev_chunk;
169 u64 prev_gpuva;
170
171 prev_chunk = list_first_entry(&heap->chunks,
172 struct panthor_heap_chunk,
173 node);
174
175 prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
176 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
177 (heap->chunk_size >> 12);
178 }
179
180 panthor_kernel_bo_vunmap(chunk->bo);
181
182 mutex_lock(&heap->lock);
183 list_add(&chunk->node, &heap->chunks);
184 heap->chunk_count++;
185 mutex_unlock(&heap->lock);
186
187 atomic_add(heap->chunk_size, &pool->size);
188
189 return 0;
190
191 err_destroy_bo:
192 panthor_kernel_bo_destroy(chunk->bo);
193
194 err_free_chunk:
195 kfree(chunk);
196
197 return ret;
198 }
199
panthor_free_heap_chunks(struct panthor_heap_pool * pool,struct panthor_heap * heap)200 static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
201 struct panthor_heap *heap)
202 {
203 struct panthor_heap_chunk *chunk, *tmp;
204
205 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
206 panthor_free_heap_chunk(pool, heap, chunk);
207 }
208
panthor_alloc_heap_chunks(struct panthor_heap_pool * pool,struct panthor_heap * heap,u32 chunk_count)209 static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
210 struct panthor_heap *heap,
211 u32 chunk_count)
212 {
213 int ret;
214 u32 i;
215
216 for (i = 0; i < chunk_count; i++) {
217 ret = panthor_alloc_heap_chunk(pool, heap, true);
218 if (ret)
219 return ret;
220 }
221
222 return 0;
223 }
224
225 static int
panthor_heap_destroy_locked(struct panthor_heap_pool * pool,u32 handle)226 panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
227 {
228 struct panthor_heap *heap;
229
230 heap = xa_erase(&pool->xa, handle);
231 if (!heap)
232 return -EINVAL;
233
234 panthor_free_heap_chunks(pool, heap);
235 mutex_destroy(&heap->lock);
236 kfree(heap);
237 return 0;
238 }
239
240 /**
241 * panthor_heap_destroy() - Destroy a heap context
242 * @pool: Pool this context belongs to.
243 * @handle: Handle returned by panthor_heap_create().
244 */
panthor_heap_destroy(struct panthor_heap_pool * pool,u32 handle)245 int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
246 {
247 int ret;
248
249 down_write(&pool->lock);
250 ret = panthor_heap_destroy_locked(pool, handle);
251 up_write(&pool->lock);
252
253 return ret;
254 }
255
256 /**
257 * panthor_heap_create() - Create a heap context
258 * @pool: Pool to instantiate the heap context from.
259 * @initial_chunk_count: Number of chunk allocated at initialization time.
260 * Must be at least 1.
261 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
262 * [128k:8M] range.
263 * @max_chunks: Maximum number of chunks that can be allocated.
264 * @target_in_flight: Maximum number of in-flight render passes.
265 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
266 * context.
267 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
268 * assigned to the heap context.
269 *
270 * Return: a positive handle on success, a negative error otherwise.
271 */
panthor_heap_create(struct panthor_heap_pool * pool,u32 initial_chunk_count,u32 chunk_size,u32 max_chunks,u32 target_in_flight,u64 * heap_ctx_gpu_va,u64 * first_chunk_gpu_va)272 int panthor_heap_create(struct panthor_heap_pool *pool,
273 u32 initial_chunk_count,
274 u32 chunk_size,
275 u32 max_chunks,
276 u32 target_in_flight,
277 u64 *heap_ctx_gpu_va,
278 u64 *first_chunk_gpu_va)
279 {
280 struct panthor_heap *heap;
281 struct panthor_heap_chunk *first_chunk;
282 struct panthor_vm *vm;
283 int ret = 0;
284 u32 id;
285
286 if (initial_chunk_count == 0)
287 return -EINVAL;
288
289 if (initial_chunk_count > max_chunks)
290 return -EINVAL;
291
292 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
293 chunk_size < SZ_128K || chunk_size > SZ_8M)
294 return -EINVAL;
295
296 down_read(&pool->lock);
297 vm = panthor_vm_get(pool->vm);
298 up_read(&pool->lock);
299
300 /* The pool has been destroyed, we can't create a new heap. */
301 if (!vm)
302 return -EINVAL;
303
304 heap = kzalloc(sizeof(*heap), GFP_KERNEL);
305 if (!heap) {
306 ret = -ENOMEM;
307 goto err_put_vm;
308 }
309
310 mutex_init(&heap->lock);
311 INIT_LIST_HEAD(&heap->chunks);
312 heap->chunk_size = chunk_size;
313 heap->max_chunks = max_chunks;
314 heap->target_in_flight = target_in_flight;
315
316 ret = panthor_alloc_heap_chunks(pool, heap, initial_chunk_count);
317 if (ret)
318 goto err_free_heap;
319
320 first_chunk = list_first_entry(&heap->chunks,
321 struct panthor_heap_chunk,
322 node);
323 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
324
325 down_write(&pool->lock);
326 /* The pool has been destroyed, we can't create a new heap. */
327 if (!pool->vm) {
328 ret = -EINVAL;
329 } else {
330 ret = xa_alloc(&pool->xa, &id, heap,
331 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
332 if (!ret) {
333 void *gpu_ctx = panthor_get_heap_ctx(pool, id);
334
335 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
336 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
337 panthor_get_heap_ctx_offset(pool, id);
338 }
339 }
340 up_write(&pool->lock);
341
342 if (ret)
343 goto err_free_heap;
344
345 panthor_vm_put(vm);
346 return id;
347
348 err_free_heap:
349 panthor_free_heap_chunks(pool, heap);
350 mutex_destroy(&heap->lock);
351 kfree(heap);
352
353 err_put_vm:
354 panthor_vm_put(vm);
355 return ret;
356 }
357
358 /**
359 * panthor_heap_return_chunk() - Return an unused heap chunk
360 * @pool: The pool this heap belongs to.
361 * @heap_gpu_va: The GPU address of the heap context.
362 * @chunk_gpu_va: The chunk VA to return.
363 *
364 * This function is used when a chunk allocated with panthor_heap_grow()
365 * couldn't be linked to the heap context through the FW interface because
366 * the group requesting the allocation was scheduled out in the meantime.
367 */
panthor_heap_return_chunk(struct panthor_heap_pool * pool,u64 heap_gpu_va,u64 chunk_gpu_va)368 int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
369 u64 heap_gpu_va,
370 u64 chunk_gpu_va)
371 {
372 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
373 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
374 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
375 struct panthor_heap *heap;
376 int ret;
377
378 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
379 return -EINVAL;
380
381 down_read(&pool->lock);
382 heap = xa_load(&pool->xa, heap_id);
383 if (!heap) {
384 ret = -EINVAL;
385 goto out_unlock;
386 }
387
388 chunk_gpu_va &= GENMASK_ULL(63, 12);
389
390 mutex_lock(&heap->lock);
391 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
392 if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
393 removed = chunk;
394 list_del(&chunk->node);
395 heap->chunk_count--;
396 atomic_sub(heap->chunk_size, &pool->size);
397 break;
398 }
399 }
400 mutex_unlock(&heap->lock);
401
402 if (removed) {
403 panthor_kernel_bo_destroy(chunk->bo);
404 kfree(chunk);
405 ret = 0;
406 } else {
407 ret = -EINVAL;
408 }
409
410 out_unlock:
411 up_read(&pool->lock);
412 return ret;
413 }
414
415 /**
416 * panthor_heap_grow() - Make a heap context grow.
417 * @pool: The pool this heap belongs to.
418 * @heap_gpu_va: The GPU address of the heap context.
419 * @renderpasses_in_flight: Number of render passes currently in-flight.
420 * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
421 * @new_chunk_gpu_va: Pointer used to return the chunk VA.
422 *
423 * Return:
424 * - 0 if a new heap was allocated
425 * - -ENOMEM if the tiler context reached the maximum number of chunks
426 * or if too many render passes are in-flight
427 * or if the allocation failed
428 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
429 */
panthor_heap_grow(struct panthor_heap_pool * pool,u64 heap_gpu_va,u32 renderpasses_in_flight,u32 pending_frag_count,u64 * new_chunk_gpu_va)430 int panthor_heap_grow(struct panthor_heap_pool *pool,
431 u64 heap_gpu_va,
432 u32 renderpasses_in_flight,
433 u32 pending_frag_count,
434 u64 *new_chunk_gpu_va)
435 {
436 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
437 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
438 struct panthor_heap_chunk *chunk;
439 struct panthor_heap *heap;
440 int ret;
441
442 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
443 return -EINVAL;
444
445 down_read(&pool->lock);
446 heap = xa_load(&pool->xa, heap_id);
447 if (!heap) {
448 ret = -EINVAL;
449 goto out_unlock;
450 }
451
452 /* If we reached the target in-flight render passes, or if we
453 * reached the maximum number of chunks, let the FW figure another way to
454 * find some memory (wait for render passes to finish, or call the exception
455 * handler provided by the userspace driver, if any).
456 */
457 if (renderpasses_in_flight > heap->target_in_flight ||
458 heap->chunk_count >= heap->max_chunks) {
459 ret = -ENOMEM;
460 goto out_unlock;
461 }
462
463 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
464 * which goes through the blocking allocation path. Ultimately, we
465 * want a non-blocking allocation, so we can immediately report to the
466 * FW when the system is running out of memory. In that case, the FW
467 * can call a user-provided exception handler, which might try to free
468 * some tiler memory by issuing an intermediate fragment job. If the
469 * exception handler can't do anything, it will flag the queue as
470 * faulty so the job that triggered this tiler chunk allocation and all
471 * further jobs in this queue fail immediately instead of having to
472 * wait for the job timeout.
473 */
474 ret = panthor_alloc_heap_chunk(pool, heap, false);
475 if (ret)
476 goto out_unlock;
477
478 chunk = list_first_entry(&heap->chunks,
479 struct panthor_heap_chunk,
480 node);
481 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
482 (heap->chunk_size >> 12);
483 ret = 0;
484
485 out_unlock:
486 up_read(&pool->lock);
487 return ret;
488 }
489
panthor_heap_pool_release(struct kref * refcount)490 static void panthor_heap_pool_release(struct kref *refcount)
491 {
492 struct panthor_heap_pool *pool =
493 container_of(refcount, struct panthor_heap_pool, refcount);
494
495 xa_destroy(&pool->xa);
496 kfree(pool);
497 }
498
499 /**
500 * panthor_heap_pool_put() - Release a heap pool reference
501 * @pool: Pool to release the reference on. Can be NULL.
502 */
panthor_heap_pool_put(struct panthor_heap_pool * pool)503 void panthor_heap_pool_put(struct panthor_heap_pool *pool)
504 {
505 if (pool)
506 kref_put(&pool->refcount, panthor_heap_pool_release);
507 }
508
509 /**
510 * panthor_heap_pool_get() - Get a heap pool reference
511 * @pool: Pool to get the reference on. Can be NULL.
512 *
513 * Return: @pool.
514 */
515 struct panthor_heap_pool *
panthor_heap_pool_get(struct panthor_heap_pool * pool)516 panthor_heap_pool_get(struct panthor_heap_pool *pool)
517 {
518 if (pool)
519 kref_get(&pool->refcount);
520
521 return pool;
522 }
523
524 /**
525 * panthor_heap_pool_create() - Create a heap pool
526 * @ptdev: Device.
527 * @vm: The VM this heap pool will be attached to.
528 *
529 * Heap pools might contain up to 128 heap contexts, and are per-VM.
530 *
531 * Return: A valid pointer on success, a negative error code otherwise.
532 */
533 struct panthor_heap_pool *
panthor_heap_pool_create(struct panthor_device * ptdev,struct panthor_vm * vm)534 panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
535 {
536 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
537 panthor_heap_ctx_stride(ptdev),
538 4096);
539 struct panthor_heap_pool *pool;
540 int ret = 0;
541
542 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
543 if (!pool)
544 return ERR_PTR(-ENOMEM);
545
546 /* We want a weak ref here: the heap pool belongs to the VM, so we're
547 * sure that, as long as the heap pool exists, the VM exists too.
548 */
549 pool->vm = vm;
550 pool->ptdev = ptdev;
551 init_rwsem(&pool->lock);
552 xa_init_flags(&pool->xa, XA_FLAGS_ALLOC);
553 kref_init(&pool->refcount);
554
555 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
556 DRM_PANTHOR_BO_NO_MMAP,
557 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
558 PANTHOR_VM_KERNEL_AUTO_VA);
559 if (IS_ERR(pool->gpu_contexts)) {
560 ret = PTR_ERR(pool->gpu_contexts);
561 goto err_destroy_pool;
562 }
563
564 ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
565 if (ret)
566 goto err_destroy_pool;
567
568 atomic_add(pool->gpu_contexts->obj->size, &pool->size);
569
570 return pool;
571
572 err_destroy_pool:
573 panthor_heap_pool_destroy(pool);
574 return ERR_PTR(ret);
575 }
576
577 /**
578 * panthor_heap_pool_destroy() - Destroy a heap pool.
579 * @pool: Pool to destroy.
580 *
581 * This function destroys all heap contexts and their resources. Thus
582 * preventing any use of the heap context or the chunk attached to them
583 * after that point.
584 *
585 * If the GPU still has access to some heap contexts, a fault should be
586 * triggered, which should flag the command stream groups using these
587 * context as faulty.
588 *
589 * The heap pool object is only released when all references to this pool
590 * are released.
591 */
panthor_heap_pool_destroy(struct panthor_heap_pool * pool)592 void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
593 {
594 struct panthor_heap *heap;
595 unsigned long i;
596
597 if (!pool)
598 return;
599
600 down_write(&pool->lock);
601 xa_for_each(&pool->xa, i, heap)
602 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
603
604 if (!IS_ERR_OR_NULL(pool->gpu_contexts)) {
605 atomic_sub(pool->gpu_contexts->obj->size, &pool->size);
606 panthor_kernel_bo_destroy(pool->gpu_contexts);
607 }
608
609 /* Reflects the fact the pool has been destroyed. */
610 pool->vm = NULL;
611 up_write(&pool->lock);
612
613 panthor_heap_pool_put(pool);
614 }
615
616 /**
617 * panthor_heap_pool_size() - Get a heap pool's total size
618 * @pool: Pool whose total chunks size to return
619 *
620 * Returns the aggregated size of all chunks for all heaps in the pool
621 *
622 */
panthor_heap_pool_size(struct panthor_heap_pool * pool)623 size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
624 {
625 if (!pool)
626 return 0;
627
628 return atomic_read(&pool->size);
629 }
630