1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file iris_bufmgr.c
25 *
26 * The Iris buffer manager.
27 *
28 * XXX: write better comments
29 * - BOs
30 * - Explain BO cache
31 * - main interface to GEM in the kernel
32 */
33
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73
74 #include <xf86drm.h>
75
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86 * leaked. All because it does not call VG(cli_free) from its
87 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88 * and allocation, we mark it available for use upon mmapping and remove
89 * it upon unmapping.
90 */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93
94 /* On FreeBSD PAGE_SIZE is already defined in
95 * /usr/include/machine/param.h that is indirectly
96 * included here.
97 */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101
102 #define WARN_ONCE(cond, fmt...) do { \
103 if (unlikely(cond)) { \
104 static bool _warned = false; \
105 if (!_warned) { \
106 fprintf(stderr, "WARNING: "); \
107 fprintf(stderr, fmt); \
108 _warned = true; \
109 } \
110 } \
111 } while (0)
112
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114
115 /**
116 * For debugging purposes, this returns a time in seconds.
117 */
118 static double
get_time(void)119 get_time(void)
120 {
121 struct timespec tp;
122
123 clock_gettime(CLOCK_MONOTONIC, &tp);
124
125 return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131 int c, old;
132 c = p_atomic_read(v);
133 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134 c = old;
135 return c == unless;
136 }
137
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141 const char *names[] = {
142 [IRIS_MEMZONE_SHADER] = "shader",
143 [IRIS_MEMZONE_BINDER] = "binder",
144 [IRIS_MEMZONE_SCRATCH] = "scratchsurf",
145 [IRIS_MEMZONE_SURFACE] = "surface",
146 [IRIS_MEMZONE_DYNAMIC] = "dynamic",
147 [IRIS_MEMZONE_OTHER] = "other",
148 [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149 };
150 assert(memzone < ARRAY_SIZE(names));
151 return names[memzone];
152 }
153
154 struct bo_cache_bucket {
155 /** List of cached BOs. */
156 struct list_head head;
157
158 /** Size of this bucket, in bytes. */
159 uint64_t size;
160 };
161
162 struct bo_export {
163 /** File descriptor associated with a handle export. */
164 int drm_fd;
165
166 /** GEM handle in drm_fd */
167 uint32_t gem_handle;
168
169 struct list_head link;
170 };
171
172 struct iris_memregion {
173 struct intel_memory_class_instance *region;
174 uint64_t size;
175 };
176
177 #define NUM_SLAB_ALLOCATORS 3
178
179 struct iris_slab {
180 struct pb_slab base;
181
182 /** The BO representing the entire slab */
183 struct iris_bo *bo;
184
185 /** Array of iris_bo structs representing BOs allocated out of this slab */
186 struct iris_bo *entries;
187 };
188
189 #define BUCKET_ARRAY_SIZE (14 * 4)
190
191 struct iris_bucket_cache {
192 struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193 int num_buckets;
194 };
195
196 struct iris_bufmgr {
197 /**
198 * List into the list of bufmgr.
199 */
200 struct list_head link;
201
202 uint32_t refcount;
203
204 int fd;
205
206 simple_mtx_t lock;
207 simple_mtx_t bo_deps_lock;
208
209 /** Array of lists of cached gem objects of power-of-two sizes */
210 struct iris_bucket_cache *bucket_cache;
211
212 time_t time;
213
214 struct hash_table *name_table;
215 struct hash_table *handle_table;
216
217 /**
218 * List of BOs which we've effectively freed, but are hanging on to
219 * until they're idle before closing and returning the VMA.
220 */
221 struct list_head zombie_list;
222
223 struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224
225 struct iris_memregion vram, sys;
226
227 /* Used only when use_global_vm is true. */
228 uint32_t global_vm_id;
229
230 int next_screen_id;
231
232 struct intel_device_info devinfo;
233 const struct iris_kmd_backend *kmd_backend;
234 struct intel_bind_timeline bind_timeline; /* Xe only */
235 bool bo_reuse:1;
236 bool use_global_vm:1;
237
238 struct intel_aux_map_context *aux_map_ctx;
239
240 struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241
242 struct iris_border_color_pool border_color_pool;
243
244 struct iris_bo *dummy_aux_bo;
245 };
246
247 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
248 static struct list_head global_bufmgr_list = {
249 .next = &global_bufmgr_list,
250 .prev = &global_bufmgr_list,
251 };
252
253 static void bo_free(struct iris_bo *bo);
254
255 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)256 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
257 {
258 struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
259 struct iris_bo *bo = entry ? entry->data : NULL;
260
261 if (bo) {
262 assert(iris_bo_is_external(bo));
263 assert(iris_bo_is_real(bo));
264 assert(!bo->real.reusable);
265
266 /* Being non-reusable, the BO cannot be in the cache lists, but it
267 * may be in the zombie list if it had reached zero references, but
268 * we hadn't yet closed it...and then reimported the same BO. If it
269 * is, then remove it since it's now been resurrected.
270 */
271 if (list_is_linked(&bo->head))
272 list_del(&bo->head);
273
274 iris_bo_reference(bo);
275 }
276
277 return bo;
278 }
279
280 /**
281 * This function finds the correct bucket fit for the input size.
282 * The function works with O(1) complexity when the requested size
283 * was queried instead of iterating the size through all the buckets.
284 */
285 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)286 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
287 enum iris_heap heap, unsigned flags)
288 {
289 if (flags & BO_ALLOC_PROTECTED)
290 return NULL;
291
292 /* TODO: Enable bo cache for compressed bos
293 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11362
294 */
295 if (bufmgr->devinfo.verx10 == 200 && (flags & BO_ALLOC_COMPRESSED))
296 return NULL;
297
298 const struct intel_device_info *devinfo = &bufmgr->devinfo;
299 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
300
301 if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
302 (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
303 return NULL;
304
305 /* Calculating the pages and rounding up to the page size. */
306 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
307
308 /* Row Bucket sizes clz((x-1) | 3) Row Column
309 * in pages stride size
310 * 0: 1 2 3 4 -> 30 30 30 30 4 1
311 * 1: 5 6 7 8 -> 29 29 29 29 4 1
312 * 2: 10 12 14 16 -> 28 28 28 28 8 2
313 * 3: 20 24 28 32 -> 27 27 27 27 16 4
314 */
315 const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
316 const unsigned row_max_pages = 4 << row;
317
318 /* The '& ~2' is the special case for row 1. In row 1, max pages /
319 * 2 is 2, but the previous row maximum is zero (because there is
320 * no previous row). All row maximum sizes are power of 2, so that
321 * is the only case where that bit will be set.
322 */
323 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
324 int col_size_log2 = row - 1;
325 col_size_log2 += (col_size_log2 < 0);
326
327 const unsigned col = (pages - prev_row_max_pages +
328 ((1 << col_size_log2) - 1)) >> col_size_log2;
329
330 /* Calculating the index based on the row and column. */
331 const unsigned index = (row * 4) + (col - 1);
332
333 return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
334 }
335
336 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)337 iris_memzone_for_address(uint64_t address)
338 {
339 STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START);
340 STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_SCRATCH_START);
341 STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
342 STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START);
343 STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START);
344 STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
345
346 if (address >= IRIS_MEMZONE_OTHER_START)
347 return IRIS_MEMZONE_OTHER;
348
349 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
350 return IRIS_MEMZONE_BORDER_COLOR_POOL;
351
352 if (address > IRIS_MEMZONE_DYNAMIC_START)
353 return IRIS_MEMZONE_DYNAMIC;
354
355 if (address >= IRIS_MEMZONE_SURFACE_START)
356 return IRIS_MEMZONE_SURFACE;
357
358 if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
359 return IRIS_MEMZONE_BINDER;
360
361 if (address >= IRIS_MEMZONE_SCRATCH_START)
362 return IRIS_MEMZONE_SCRATCH;
363
364 return IRIS_MEMZONE_SHADER;
365 }
366
367 /**
368 * Allocate a section of virtual memory for a buffer, assigning an address.
369 *
370 * This uses either the bucket allocator for the given size, or the large
371 * object allocator (util_vma).
372 */
373 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)374 vma_alloc(struct iris_bufmgr *bufmgr,
375 enum iris_memory_zone memzone,
376 uint64_t size,
377 uint64_t alignment)
378 {
379 simple_mtx_assert_locked(&bufmgr->lock);
380
381 const unsigned _2mb = 2 * 1024 * 1024;
382
383 /* Force minimum alignment based on device requirements */
384 assert((alignment & (alignment - 1)) == 0);
385 alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
386
387 /* If the allocation is a multiple of 2MB, ensure the virtual address is
388 * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
389 */
390 if (size % _2mb == 0)
391 alignment = MAX2(alignment, _2mb);
392
393 if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
394 return IRIS_BORDER_COLOR_POOL_ADDRESS;
395
396 uint64_t addr =
397 util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
398
399 assert((addr >> 48ull) == 0);
400 assert((addr % alignment) == 0);
401
402 return intel_canonical_address(addr);
403 }
404
405 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)406 vma_free(struct iris_bufmgr *bufmgr,
407 uint64_t address,
408 uint64_t size)
409 {
410 simple_mtx_assert_locked(&bufmgr->lock);
411
412 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
413 return;
414
415 /* Un-canonicalize the address. */
416 address = intel_48b_address(address);
417
418 if (address == 0ull)
419 return;
420
421 enum iris_memory_zone memzone = iris_memzone_for_address(address);
422
423 assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
424
425 util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
426 }
427
428 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
429 * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
430 * destroyed by the caller after the execbuf ioctl.
431 */
432 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)433 iris_bo_export_sync_state(struct iris_bo *bo)
434 {
435 struct iris_bufmgr *bufmgr = bo->bufmgr;
436 int drm_fd = iris_bufmgr_get_fd(bufmgr);
437
438 struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
439
440 struct dma_buf_export_sync_file export_sync_file_ioctl = {
441 .flags = DMA_BUF_SYNC_RW, /* TODO */
442 .fd = -1,
443 };
444 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
445 &export_sync_file_ioctl)) {
446 fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
447 errno);
448 goto error_export;
449 }
450
451 int sync_file_fd = export_sync_file_ioctl.fd;
452 assert(sync_file_fd >= 0);
453
454 struct drm_syncobj_handle syncobj_import_ioctl = {
455 .handle = iris_syncobj->handle,
456 .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
457 .fd = sync_file_fd,
458 };
459 if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
460 &syncobj_import_ioctl)) {
461 fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
462 errno);
463 }
464
465 close(sync_file_fd);
466
467 return iris_syncobj;
468 error_export:
469 iris_syncobj_destroy(bufmgr, iris_syncobj);
470 return NULL;
471 }
472
473 /* Import the state of a sync_file_fd (which we should have gotten from
474 * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
475 * state.
476 */
477 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)478 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
479 {
480 struct dma_buf_import_sync_file import_sync_file_ioctl = {
481 .flags = DMA_BUF_SYNC_WRITE,
482 .fd = sync_file_fd,
483 };
484 if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
485 &import_sync_file_ioctl))
486 fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
487 errno);
488 }
489
490 /* A timeout of 0 just checks for busyness. */
491 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)492 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
493 {
494 int ret = 0;
495 struct iris_bufmgr *bufmgr = bo->bufmgr;
496 const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
497 struct iris_syncobj *external_implicit_syncobj = NULL;
498
499 /* If we know it's idle, don't bother with the kernel round trip.
500 * Can't do that for Xe KMD with external BOs since we have to check the
501 * implicit synchronization information.
502 */
503 if (!is_external && bo->idle)
504 return 0;
505
506 simple_mtx_lock(&bufmgr->bo_deps_lock);
507
508 const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
509 uint32_t *handles = handles_len <= 32 ?
510 (uint32_t *)alloca(handles_len * sizeof(*handles)) :
511 (uint32_t *)malloc(handles_len * sizeof(*handles));
512 int handle_count = 0;
513
514 if (is_external) {
515 external_implicit_syncobj = iris_bo_export_sync_state(bo);
516 if (external_implicit_syncobj)
517 handles[handle_count++] = external_implicit_syncobj->handle;
518 }
519
520 for (int d = 0; d < bo->deps_size; d++) {
521 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
522 struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
523 struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
524 if (r)
525 handles[handle_count++] = r->handle;
526 if (w)
527 handles[handle_count++] = w->handle;
528 }
529 }
530
531 if (handle_count == 0)
532 goto out;
533
534 /* Unlike the gem wait, negative values are not infinite here. */
535 int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
536 if (timeout_abs < 0)
537 timeout_abs = INT64_MAX;
538
539 struct drm_syncobj_wait args = {
540 .handles = (uintptr_t) handles,
541 .timeout_nsec = timeout_abs,
542 .count_handles = handle_count,
543 .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
544 };
545
546 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
547 if (ret != 0) {
548 ret = -errno;
549 goto out;
550 }
551
552 /* We just waited everything, so clean all the deps. */
553 for (int d = 0; d < bo->deps_size; d++) {
554 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
555 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
556 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
557 }
558 }
559
560 out:
561 if (handles_len > 32)
562 free(handles);
563 if (external_implicit_syncobj)
564 iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
565
566 simple_mtx_unlock(&bufmgr->bo_deps_lock);
567 return ret;
568 }
569
570 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)571 iris_bo_busy_syncobj(struct iris_bo *bo)
572 {
573 return iris_bo_wait_syncobj(bo, 0) == -ETIME;
574 }
575
576 bool
iris_bo_busy(struct iris_bo * bo)577 iris_bo_busy(struct iris_bo *bo)
578 {
579 bool busy;
580
581 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
582 case INTEL_KMD_TYPE_I915:
583 if (iris_bo_is_external(bo))
584 busy = iris_i915_bo_busy_gem(bo);
585 else
586 busy = iris_bo_busy_syncobj(bo);
587 break;
588 case INTEL_KMD_TYPE_XE:
589 busy = iris_bo_busy_syncobj(bo);
590 break;
591 default:
592 unreachable("missing");
593 busy = true;
594 }
595
596 bo->idle = !busy;
597
598 return busy;
599 }
600
601 /**
602 * Specify the volatility of the buffer.
603 * \param bo Buffer to create a name for
604 * \param state The purgeable status
605 *
606 * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
607 * reclaimed under memory pressure. If you subsequently require the buffer,
608 * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
609 *
610 * Returns true if the buffer was retained, or false if it was discarded
611 * whilst marked as IRIS_MADVICE_DONT_NEED.
612 */
613 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)614 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
615 {
616 /* We can't madvise suballocated BOs. */
617 assert(iris_bo_is_real(bo));
618
619 return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
620 }
621
622 static struct iris_bo *
bo_calloc(void)623 bo_calloc(void)
624 {
625 struct iris_bo *bo = calloc(1, sizeof(*bo));
626 if (!bo)
627 return NULL;
628
629 list_inithead(&bo->real.exports);
630
631 bo->hash = _mesa_hash_pointer(bo);
632
633 return bo;
634 }
635
636 static void
bo_unmap(struct iris_bo * bo)637 bo_unmap(struct iris_bo *bo)
638 {
639 assert(iris_bo_is_real(bo));
640
641 VG_NOACCESS(bo->real.map, bo->size);
642 os_munmap(bo->real.map, bo->size);
643 bo->real.map = NULL;
644 }
645
646 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)647 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
648 {
649 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
650 struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
651
652 if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
653 return slabs;
654 }
655
656 unreachable("should have found a valid slab for this size");
657 }
658
659 /* Return the power of two size of a slab entry matching the input size. */
660 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)661 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
662 {
663 unsigned entry_size = util_next_power_of_two(size);
664 unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
665
666 return MAX2(entry_size, min_entry_size);
667 }
668
669 /* Return the slab entry alignment. */
670 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)671 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
672 {
673 unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
674
675 if (size <= entry_size * 3 / 4)
676 return entry_size / 4;
677
678 return entry_size;
679 }
680
681 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)682 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
683 {
684 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
685
686 return !iris_bo_busy(bo);
687 }
688
689 static void
iris_slab_free(void * priv,struct pb_slab * pslab)690 iris_slab_free(void *priv, struct pb_slab *pslab)
691 {
692 struct iris_bufmgr *bufmgr = priv;
693 struct iris_slab *slab = (void *) pslab;
694 struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
695
696 assert(!slab->bo->aux_map_address);
697
698 /* Since we're freeing the whole slab, all buffers allocated out of it
699 * must be reclaimable. We require buffers to be idle to be reclaimed
700 * (see iris_can_reclaim_slab()), so we know all entries must be idle.
701 * Therefore, we can safely unmap their aux table entries.
702 */
703 for (unsigned i = 0; i < pslab->num_entries; i++) {
704 struct iris_bo *bo = &slab->entries[i];
705 if (aux_map_ctx && bo->aux_map_address) {
706 intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
707 bo->aux_map_address = 0;
708 }
709
710 /* Unref read/write dependency syncobjs and free the array. */
711 for (int d = 0; d < bo->deps_size; d++) {
712 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
713 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
714 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
715 }
716 }
717 free(bo->deps);
718 }
719
720 iris_bo_unreference(slab->bo);
721
722 free(slab->entries);
723 free(slab);
724 }
725
726 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)727 iris_slab_alloc(void *priv,
728 unsigned heap,
729 unsigned entry_size,
730 unsigned group_index)
731 {
732 struct iris_bufmgr *bufmgr = priv;
733 struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
734 uint32_t flags = BO_ALLOC_NO_SUBALLOC;
735 unsigned slab_size = 0;
736 /* We only support slab allocation for IRIS_MEMZONE_OTHER */
737 enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
738
739 if (!slab)
740 return NULL;
741
742 struct pb_slabs *slabs = bufmgr->bo_slabs;
743
744 /* Determine the slab buffer size. */
745 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
746 unsigned max_entry_size =
747 1 << (slabs[i].min_order + slabs[i].num_orders - 1);
748
749 if (entry_size <= max_entry_size) {
750 /* The slab size is twice the size of the largest possible entry. */
751 slab_size = max_entry_size * 2;
752
753 if (!util_is_power_of_two_nonzero(entry_size)) {
754 assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
755
756 /* If the entry size is 3/4 of a power of two, we would waste
757 * space and not gain anything if we allocated only twice the
758 * power of two for the backing buffer:
759 *
760 * 2 * 3/4 = 1.5 usable with buffer size 2
761 *
762 * Allocating 5 times the entry size leads us to the next power
763 * of two and results in a much better memory utilization:
764 *
765 * 5 * 3/4 = 3.75 usable with buffer size 4
766 */
767 if (entry_size * 5 > slab_size)
768 slab_size = util_next_power_of_two(entry_size * 5);
769 }
770
771 /* The largest slab should have the same size as the PTE fragment
772 * size to get faster address translation.
773 *
774 * TODO: move this to intel_device_info?
775 */
776 const unsigned pte_size = 2 * 1024 * 1024;
777
778 if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
779 slab_size = pte_size;
780
781 break;
782 }
783 }
784 assert(slab_size != 0);
785
786 switch (heap) {
787 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
788 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
789 flags |= BO_ALLOC_COMPRESSED;
790 break;
791 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
792 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
793 flags |= BO_ALLOC_SMEM;
794 break;
795 case IRIS_HEAP_DEVICE_LOCAL:
796 flags |= BO_ALLOC_LMEM;
797 break;
798 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
799 flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
800 break;
801 default:
802 flags |= BO_ALLOC_PLAIN;
803 }
804
805 slab->bo =
806 iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
807 if (!slab->bo)
808 goto fail;
809
810 slab_size = slab->bo->size;
811
812 slab->base.num_entries = slab_size / entry_size;
813 slab->base.num_free = slab->base.num_entries;
814 slab->base.group_index = group_index;
815 slab->base.entry_size = entry_size;
816 slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
817 if (!slab->entries)
818 goto fail_bo;
819
820 list_inithead(&slab->base.free);
821
822 for (unsigned i = 0; i < slab->base.num_entries; i++) {
823 struct iris_bo *bo = &slab->entries[i];
824
825 bo->size = entry_size;
826 bo->bufmgr = bufmgr;
827 bo->hash = _mesa_hash_pointer(bo);
828 bo->gem_handle = 0;
829 bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
830 bo->aux_map_address = 0;
831 bo->index = -1;
832 bo->refcount = 0;
833 bo->idle = true;
834 bo->zeroed = slab->bo->zeroed;
835
836 bo->slab.entry.slab = &slab->base;
837
838 bo->slab.real = iris_get_backing_bo(slab->bo);
839
840 list_addtail(&bo->slab.entry.head, &slab->base.free);
841 }
842
843 return &slab->base;
844
845 fail_bo:
846 iris_bo_unreference(slab->bo);
847 fail:
848 free(slab);
849 return NULL;
850 }
851
852 /**
853 * Selects a heap for the given buffer allocation flags.
854 *
855 * This determines the cacheability, coherency, and mmap mode settings.
856 */
857 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)858 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
859 {
860 const struct intel_device_info *devinfo = &bufmgr->devinfo;
861
862 if (bufmgr->vram.size > 0) {
863 if (flags & BO_ALLOC_COMPRESSED)
864 return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
865
866 /* Discrete GPUs currently always snoop CPU caches. */
867 if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_COHERENT))
868 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
869
870 if ((flags & BO_ALLOC_LMEM) ||
871 ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
872
873 if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
874 return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
875
876 return IRIS_HEAP_DEVICE_LOCAL;
877 }
878
879 return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
880 } else if (devinfo->has_llc) {
881 assert(!(flags & BO_ALLOC_LMEM));
882
883 if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
884 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
885
886 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
887 } else {
888 assert(!devinfo->has_llc);
889 assert(!(flags & BO_ALLOC_LMEM));
890
891 if (flags & BO_ALLOC_COMPRESSED)
892 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
893
894 if (flags & BO_ALLOC_COHERENT)
895 return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
896
897 return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
898 }
899 }
900
901 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)902 zero_bo(struct iris_bufmgr *bufmgr,
903 unsigned flags,
904 struct iris_bo *bo)
905 {
906 assert(flags & BO_ALLOC_ZEROED);
907
908 if (bo->zeroed)
909 return true;
910
911 if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
912 /* With flat CCS, all allocations in LMEM have memory ranges with
913 * corresponding CCS elements. These elements are only accessible
914 * through GPU commands, but we don't issue GPU commands here.
915 */
916 return false;
917 }
918
919 void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
920 if (!map)
921 return false;
922
923 memset(map, 0, bo->size);
924 bo->zeroed = true;
925 return true;
926 }
927
928 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)929 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
930 const char *name,
931 uint64_t size,
932 uint32_t alignment,
933 unsigned flags)
934 {
935 if (flags & BO_ALLOC_NO_SUBALLOC)
936 return NULL;
937
938 struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
939 unsigned max_slab_entry_size =
940 1 << (last_slab->min_order + last_slab->num_orders - 1);
941
942 if (size > max_slab_entry_size)
943 return NULL;
944
945 struct pb_slab_entry *entry;
946
947 enum iris_heap heap = flags_to_heap(bufmgr, flags);
948
949 unsigned alloc_size = size;
950
951 /* Always use slabs for sizes less than 4 KB because the kernel aligns
952 * everything to 4 KB.
953 */
954 if (size < alignment && alignment <= 4 * 1024)
955 alloc_size = alignment;
956
957 if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
958 /* 3/4 allocations can return too small alignment.
959 * Try again with a power of two allocation size.
960 */
961 unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
962
963 if (alignment <= pot_size) {
964 /* This size works but wastes some memory to fulfill the alignment. */
965 alloc_size = pot_size;
966 } else {
967 /* can't fulfill alignment requirements */
968 return NULL;
969 }
970 }
971
972 struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
973 entry = pb_slab_alloc(slabs, alloc_size, heap);
974 if (!entry) {
975 /* Clean up and try again... */
976 pb_slabs_reclaim(slabs);
977
978 entry = pb_slab_alloc(slabs, alloc_size, heap);
979 }
980 if (!entry)
981 return NULL;
982
983 struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
984
985 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
986 /* This buffer was associated with an aux-buffer range. We only allow
987 * slab allocated buffers to be reclaimed when idle (not in use by an
988 * executing batch). (See iris_can_reclaim_slab().) So we know that
989 * our previous aux mapping is no longer in use, and we can safely
990 * remove it.
991 */
992 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
993 bo->size);
994 bo->aux_map_address = 0;
995 }
996
997 p_atomic_set(&bo->refcount, 1);
998 bo->name = name;
999 bo->size = size;
1000
1001 /* Zero the contents if necessary. If this fails, fall back to
1002 * allocating a fresh BO, which will always be zeroed by the kernel.
1003 */
1004 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1005 pb_slab_free(slabs, &bo->slab.entry);
1006 return NULL;
1007 }
1008
1009 return bo;
1010 }
1011
1012 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1013 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1014 struct bo_cache_bucket *bucket,
1015 uint32_t alignment,
1016 enum iris_memory_zone memzone,
1017 enum iris_mmap_mode mmap_mode,
1018 unsigned flags,
1019 bool match_zone)
1020 {
1021 if (!bucket)
1022 return NULL;
1023
1024 struct iris_bo *bo = NULL;
1025
1026 simple_mtx_assert_locked(&bufmgr->lock);
1027
1028 list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1029 assert(iris_bo_is_real(cur));
1030
1031 /* Find one that's got the right mapping type. We used to swap maps
1032 * around but the kernel doesn't allow this on discrete GPUs.
1033 */
1034 if (mmap_mode != cur->real.mmap_mode)
1035 continue;
1036
1037 /* Try a little harder to find one that's already in the right memzone */
1038 if (match_zone && memzone != iris_memzone_for_address(cur->address))
1039 continue;
1040
1041 if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1042 continue;
1043
1044 /* If the last BO in the cache is busy, there are no idle BOs. Bail,
1045 * either falling back to a non-matching memzone, or if that fails,
1046 * allocating a fresh buffer.
1047 */
1048 if (iris_bo_busy(cur))
1049 return NULL;
1050
1051 list_del(&cur->head);
1052
1053 /* Tell the kernel we need this BO and check if it still exist */
1054 if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1055 /* This BO was purged, throw it out and keep looking. */
1056 bo_free(cur);
1057 continue;
1058 }
1059
1060 if (cur->aux_map_address) {
1061 /* This buffer was associated with an aux-buffer range. We make sure
1062 * that buffers are not reused from the cache while the buffer is (busy)
1063 * being used by an executing batch. Since we are here, the buffer is no
1064 * longer being used by a batch and the buffer was deleted (in order to
1065 * end up in the cache). Therefore its old aux-buffer range can be
1066 * removed from the aux-map.
1067 */
1068 if (cur->bufmgr->aux_map_ctx)
1069 intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1070 cur->size);
1071 cur->aux_map_address = 0;
1072 }
1073
1074 /* If the cached BO isn't in the right memory zone, or the alignment
1075 * isn't sufficient, free the old memory and assign it a new address.
1076 */
1077 if (memzone != iris_memzone_for_address(cur->address) ||
1078 cur->address % alignment != 0) {
1079 if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1080 DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1081 bo_free(cur);
1082 continue;
1083 }
1084
1085 vma_free(bufmgr, cur->address, cur->size);
1086 cur->address = 0ull;
1087 }
1088
1089 bo = cur;
1090 break;
1091 }
1092
1093 if (!bo)
1094 return NULL;
1095
1096 /* Zero the contents if necessary. If this fails, fall back to
1097 * allocating a fresh BO, which will always be zeroed by the kernel.
1098 */
1099 assert(bo->zeroed == false);
1100 if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1101 bo_free(bo);
1102 return NULL;
1103 }
1104
1105 return bo;
1106 }
1107
1108 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1109 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1110 {
1111 struct iris_bo *bo = bo_calloc();
1112 if (!bo)
1113 return NULL;
1114
1115 /* Try to allocate memory in multiples of 2MB, as this allows us to use
1116 * 64K pages rather than the less-efficient 4K pages. Most BOs smaller
1117 * than 64MB should hit the BO cache or slab allocations anyway, so this
1118 * shouldn't waste too much memory. We do exclude small (< 1MB) sizes to
1119 * be defensive in case any of those bypass the caches and end up here.
1120 */
1121 if (bo_size >= 1024 * 1024)
1122 bo_size = align64(bo_size, 2 * 1024 * 1024);
1123
1124 bo->real.heap = flags_to_heap(bufmgr, flags);
1125
1126 const struct intel_memory_class_instance *regions[2];
1127 uint16_t num_regions = 0;
1128
1129 if (bufmgr->vram.size > 0) {
1130 switch (bo->real.heap) {
1131 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1132 /* For vram allocations, still use system memory as a fallback. */
1133 regions[num_regions++] = bufmgr->vram.region;
1134 regions[num_regions++] = bufmgr->sys.region;
1135 break;
1136 case IRIS_HEAP_DEVICE_LOCAL:
1137 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1138 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1139 regions[num_regions++] = bufmgr->vram.region;
1140 break;
1141 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1142 regions[num_regions++] = bufmgr->sys.region;
1143 break;
1144 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1145 /* not valid, compressed in discrete is always created with
1146 * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1147 */
1148 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1149 /* not valid; discrete cards always enable snooping */
1150 case IRIS_HEAP_MAX:
1151 unreachable("invalid heap for BO");
1152 }
1153 } else {
1154 regions[num_regions++] = bufmgr->sys.region;
1155 }
1156
1157 bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1158 num_regions, bo_size,
1159 bo->real.heap, flags);
1160 if (bo->gem_handle == 0) {
1161 free(bo);
1162 return NULL;
1163 }
1164 bo->bufmgr = bufmgr;
1165 bo->size = bo_size;
1166 bo->idle = true;
1167 bo->zeroed = true;
1168 bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1169
1170 return bo;
1171 }
1172
1173 const char *
1174 iris_heap_to_string[IRIS_HEAP_MAX] = {
1175 [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1176 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1177 [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1178 [IRIS_HEAP_DEVICE_LOCAL] = "local",
1179 [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1180 [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1181 [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1182 };
1183
1184 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1185 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1186 {
1187 const struct intel_device_info *devinfo = &bufmgr->devinfo;
1188
1189 switch (heap) {
1190 case IRIS_HEAP_DEVICE_LOCAL:
1191 return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1192 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1193 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1194 return IRIS_MMAP_WC;
1195 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1196 return IRIS_MMAP_WB;
1197 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1198 return IRIS_MMAP_WC;
1199 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1200 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1201 /* compressed bos are not mmaped */
1202 return IRIS_MMAP_NONE;
1203 default:
1204 unreachable("invalid heap");
1205 }
1206 }
1207
1208 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1209 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1210 const char *name,
1211 uint64_t size,
1212 uint32_t alignment,
1213 enum iris_memory_zone memzone,
1214 unsigned flags)
1215 {
1216 struct iris_bo *bo;
1217 unsigned int page_size = getpagesize();
1218 enum iris_heap heap = flags_to_heap(bufmgr, flags);
1219 struct bo_cache_bucket *bucket =
1220 bucket_for_size(bufmgr, size, heap, flags);
1221
1222 if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT))
1223 flags |= BO_ALLOC_NO_SUBALLOC;
1224
1225 /* By default, capture all driver-internal buffers like shader kernels,
1226 * surface states, dynamic states, border colors, and so on.
1227 */
1228 if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1229 flags |= BO_ALLOC_CAPTURE;
1230
1231 bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1232
1233 if (bo)
1234 return bo;
1235
1236 /* Round the size up to the bucket size, or if we don't have caching
1237 * at this size, a multiple of the page size.
1238 */
1239 uint64_t bo_size =
1240 bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1241 enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1242
1243 simple_mtx_lock(&bufmgr->lock);
1244
1245 /* Get a buffer out of the cache if available. First, we try to find
1246 * one with a matching memory zone so we can avoid reallocating VMA.
1247 */
1248 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1249 flags, true);
1250
1251 /* If that fails, we try for any cached BO, without matching memzone. */
1252 if (!bo) {
1253 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1254 flags, false);
1255 }
1256
1257 simple_mtx_unlock(&bufmgr->lock);
1258
1259 if (!bo) {
1260 bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1261 if (!bo)
1262 return NULL;
1263 }
1264
1265 if (bo->address == 0ull) {
1266 simple_mtx_lock(&bufmgr->lock);
1267 bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1268 simple_mtx_unlock(&bufmgr->lock);
1269
1270 if (bo->address == 0ull)
1271 goto err_free;
1272
1273 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1274 goto err_vm_alloc;
1275 }
1276
1277 bo->name = name;
1278 p_atomic_set(&bo->refcount, 1);
1279 bo->real.reusable = bucket && bufmgr->bo_reuse;
1280 bo->real.protected = flags & BO_ALLOC_PROTECTED;
1281 bo->index = -1;
1282 bo->real.prime_fd = -1;
1283
1284 assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1285 bo->real.mmap_mode = mmap_mode;
1286
1287 /* On integrated GPUs, enable snooping to ensure coherency if needed.
1288 * For discrete, we instead use SMEM and avoid WB maps for coherency.
1289 */
1290 if ((flags & BO_ALLOC_COHERENT) &&
1291 !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1292 if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1293 goto err_free;
1294 }
1295
1296 DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1297 bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1298 (unsigned long long) size);
1299
1300 return bo;
1301
1302 err_vm_alloc:
1303 simple_mtx_lock(&bufmgr->lock);
1304 vma_free(bufmgr, bo->address, bo->size);
1305 simple_mtx_unlock(&bufmgr->lock);
1306 err_free:
1307 simple_mtx_lock(&bufmgr->lock);
1308 bo_free(bo);
1309 simple_mtx_unlock(&bufmgr->lock);
1310 return NULL;
1311 }
1312
1313 static int
iris_bo_close(int fd,uint32_t gem_handle)1314 iris_bo_close(int fd, uint32_t gem_handle)
1315 {
1316 struct drm_gem_close close = {
1317 .handle = gem_handle,
1318 };
1319 return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1320 }
1321
1322 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1323 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1324 void *ptr, size_t size,
1325 enum iris_memory_zone memzone)
1326 {
1327 struct iris_bo *bo;
1328
1329 bo = bo_calloc();
1330 if (!bo)
1331 return NULL;
1332
1333 bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1334 if (bo->gem_handle == 0)
1335 goto err_free;
1336
1337 bo->name = name;
1338 bo->size = size;
1339 bo->real.map = ptr;
1340 bo->real.userptr = true;
1341
1342 bo->bufmgr = bufmgr;
1343
1344 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1345 bo->real.capture = true;
1346
1347 simple_mtx_lock(&bufmgr->lock);
1348 bo->address = vma_alloc(bufmgr, memzone, size, 1);
1349 simple_mtx_unlock(&bufmgr->lock);
1350
1351 if (bo->address == 0ull)
1352 goto err_close;
1353
1354 p_atomic_set(&bo->refcount, 1);
1355 bo->index = -1;
1356 bo->idle = true;
1357 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1358 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1359 bo->real.prime_fd = -1;
1360
1361 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1362 goto err_vma_free;
1363
1364 return bo;
1365
1366 err_vma_free:
1367 simple_mtx_lock(&bufmgr->lock);
1368 vma_free(bufmgr, bo->address, bo->size);
1369 simple_mtx_unlock(&bufmgr->lock);
1370 err_close:
1371 bufmgr->kmd_backend->gem_close(bufmgr, bo);
1372 err_free:
1373 free(bo);
1374 return NULL;
1375 }
1376
1377 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1378 needs_prime_fd(struct iris_bufmgr *bufmgr)
1379 {
1380 return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1381 }
1382
1383 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1384 iris_bo_set_prime_fd(struct iris_bo *bo)
1385 {
1386 struct iris_bufmgr *bufmgr = bo->bufmgr;
1387
1388 if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1389 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1390 DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1391 fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1392 bo->name, bo->gem_handle);
1393 return false;
1394 }
1395 }
1396
1397 return true;
1398 }
1399
1400 /**
1401 * Returns a iris_bo wrapping the given buffer object handle.
1402 *
1403 * This can be used when one application needs to pass a buffer object
1404 * to another.
1405 */
1406 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1407 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1408 const char *name, unsigned int handle)
1409 {
1410 struct iris_bo *bo;
1411
1412 /* At the moment most applications only have a few named bo.
1413 * For instance, in a DRI client only the render buffers passed
1414 * between X and the client are named. And since X returns the
1415 * alternating names for the front/back buffer a linear search
1416 * provides a sufficiently fast match.
1417 */
1418 simple_mtx_lock(&bufmgr->lock);
1419 bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1420 if (bo)
1421 goto out;
1422
1423 struct drm_gem_open open_arg = { .name = handle };
1424 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1425 if (ret != 0) {
1426 DBG("Couldn't reference %s handle 0x%08x: %s\n",
1427 name, handle, strerror(errno));
1428 bo = NULL;
1429 goto out;
1430 }
1431 /* Now see if someone has used a prime handle to get this
1432 * object from the kernel before by looking through the list
1433 * again for a matching gem_handle
1434 */
1435 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1436 if (bo)
1437 goto out;
1438
1439 bo = bo_calloc();
1440 if (!bo) {
1441 struct iris_bo close_bo = {
1442 .gem_handle = open_arg.handle,
1443 };
1444 bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1445 goto out;
1446 }
1447
1448 p_atomic_set(&bo->refcount, 1);
1449
1450 bo->size = open_arg.size;
1451 bo->bufmgr = bufmgr;
1452 bo->gem_handle = open_arg.handle;
1453 bo->name = name;
1454 bo->index = -1;
1455 bo->real.global_name = handle;
1456 bo->real.prime_fd = -1;
1457 bo->real.reusable = false;
1458 bo->real.imported = true;
1459 /* Xe KMD expects at least 1-way coherency for imports */
1460 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1461 bo->real.mmap_mode = IRIS_MMAP_NONE;
1462 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1463 bo->real.capture = true;
1464 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1465 if (bo->address == 0ull)
1466 goto err_free;
1467
1468 if (!iris_bo_set_prime_fd(bo))
1469 goto err_vm_alloc;
1470
1471 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1472 goto err_vm_alloc;
1473
1474 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1475 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1476
1477 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1478
1479 out:
1480 simple_mtx_unlock(&bufmgr->lock);
1481 return bo;
1482
1483 err_vm_alloc:
1484 vma_free(bufmgr, bo->address, bo->size);
1485 err_free:
1486 bo_free(bo);
1487 simple_mtx_unlock(&bufmgr->lock);
1488 return NULL;
1489 }
1490
1491 static void
bo_close(struct iris_bo * bo)1492 bo_close(struct iris_bo *bo)
1493 {
1494 struct iris_bufmgr *bufmgr = bo->bufmgr;
1495
1496 simple_mtx_assert_locked(&bufmgr->lock);
1497 assert(iris_bo_is_real(bo));
1498
1499 if (iris_bo_is_external(bo)) {
1500 struct hash_entry *entry;
1501
1502 if (bo->real.global_name) {
1503 entry = _mesa_hash_table_search(bufmgr->name_table,
1504 &bo->real.global_name);
1505 _mesa_hash_table_remove(bufmgr->name_table, entry);
1506 }
1507
1508 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1509 _mesa_hash_table_remove(bufmgr->handle_table, entry);
1510
1511 list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1512 iris_bo_close(export->drm_fd, export->gem_handle);
1513
1514 list_del(&export->link);
1515 free(export);
1516 }
1517 } else {
1518 assert(list_is_empty(&bo->real.exports));
1519 }
1520
1521 /* Unbind and return the VMA for reuse */
1522 if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1523 vma_free(bo->bufmgr, bo->address, bo->size);
1524 else
1525 DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1526
1527 if (bo->real.prime_fd != -1)
1528 close(bo->real.prime_fd);
1529
1530 /* Close this object */
1531 if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1532 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1533 bo->gem_handle, bo->name, strerror(errno));
1534 }
1535
1536 if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1537 intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1538 bo->size);
1539 }
1540
1541 for (int d = 0; d < bo->deps_size; d++) {
1542 for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1543 iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1544 iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1545 }
1546 }
1547 free(bo->deps);
1548
1549 free(bo);
1550 }
1551
1552 static void
bo_free(struct iris_bo * bo)1553 bo_free(struct iris_bo *bo)
1554 {
1555 struct iris_bufmgr *bufmgr = bo->bufmgr;
1556
1557 simple_mtx_assert_locked(&bufmgr->lock);
1558 assert(iris_bo_is_real(bo));
1559
1560 if (!bo->real.userptr && bo->real.map)
1561 bo_unmap(bo);
1562
1563 if (bo->idle || !iris_bo_busy(bo)) {
1564 bo_close(bo);
1565 } else {
1566 /* Defer closing the GEM BO and returning the VMA for reuse until the
1567 * BO is idle. Just move it to the dead list for now.
1568 */
1569 list_addtail(&bo->head, &bufmgr->zombie_list);
1570 }
1571 }
1572
1573 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1574 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1575 {
1576 if (bufmgr->vram.size) {
1577 return intel_vram_all_mappable(&bufmgr->devinfo) ?
1578 IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1579 }
1580
1581 return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1582 IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1583 }
1584
1585 /** Frees all cached buffers significantly older than @time. */
1586 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1587 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1588 {
1589 simple_mtx_assert_locked(&bufmgr->lock);
1590
1591 if (bufmgr->time == time)
1592 return;
1593
1594 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1595 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1596
1597 for (int i = 0; i < cache->num_buckets; i++) {
1598 struct bo_cache_bucket *bucket = &cache->bucket[i];
1599
1600 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1601 if (time - bo->real.free_time <= 1)
1602 break;
1603
1604 list_del(&bo->head);
1605
1606 bo_free(bo);
1607 }
1608 }
1609 }
1610
1611 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1612 /* Stop once we reach a busy BO - all others past this point were
1613 * freed more recently so are likely also busy.
1614 */
1615 if (!bo->idle && iris_bo_busy(bo))
1616 break;
1617
1618 list_del(&bo->head);
1619 bo_close(bo);
1620 }
1621
1622 bufmgr->time = time;
1623 }
1624
1625 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1626 bo_unreference_final(struct iris_bo *bo, time_t time)
1627 {
1628 struct iris_bufmgr *bufmgr = bo->bufmgr;
1629
1630 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1631
1632 assert(iris_bo_is_real(bo));
1633
1634 struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1635 bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1636
1637 /* Put the buffer into our internal cache for reuse if we can. */
1638 if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1639 bo->real.free_time = time;
1640 bo->name = NULL;
1641
1642 list_addtail(&bo->head, &bucket->head);
1643 } else {
1644 bo_free(bo);
1645 }
1646 }
1647
1648 void
iris_bo_unreference(struct iris_bo * bo)1649 iris_bo_unreference(struct iris_bo *bo)
1650 {
1651 if (bo == NULL)
1652 return;
1653
1654 assert(p_atomic_read(&bo->refcount) > 0);
1655
1656 if (atomic_add_unless(&bo->refcount, -1, 1)) {
1657 struct iris_bufmgr *bufmgr = bo->bufmgr;
1658 struct timespec time;
1659
1660 clock_gettime(CLOCK_MONOTONIC, &time);
1661
1662 bo->zeroed = false;
1663 if (bo->gem_handle == 0) {
1664 pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1665 } else {
1666 simple_mtx_lock(&bufmgr->lock);
1667
1668 if (p_atomic_dec_zero(&bo->refcount)) {
1669 bo_unreference_final(bo, time.tv_sec);
1670 cleanup_bo_cache(bufmgr, time.tv_sec);
1671 }
1672
1673 simple_mtx_unlock(&bufmgr->lock);
1674 }
1675 }
1676 }
1677
1678 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1679 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1680 struct iris_bo *bo,
1681 const char *action)
1682 {
1683 bool busy = dbg && !bo->idle;
1684 double elapsed = unlikely(busy) ? -get_time() : 0.0;
1685
1686 iris_bo_wait_rendering(bo);
1687
1688 if (unlikely(busy)) {
1689 elapsed += get_time();
1690 if (elapsed > 1e-5) /* 0.01ms */ {
1691 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1692 action, bo->name, elapsed * 1000);
1693 }
1694 }
1695 }
1696
1697 static void
print_flags(unsigned flags)1698 print_flags(unsigned flags)
1699 {
1700 if (flags & MAP_READ)
1701 DBG("READ ");
1702 if (flags & MAP_WRITE)
1703 DBG("WRITE ");
1704 if (flags & MAP_ASYNC)
1705 DBG("ASYNC ");
1706 if (flags & MAP_PERSISTENT)
1707 DBG("PERSISTENT ");
1708 if (flags & MAP_COHERENT)
1709 DBG("COHERENT ");
1710 if (flags & MAP_RAW)
1711 DBG("RAW ");
1712 DBG("\n");
1713 }
1714
1715 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1716 iris_bo_map(struct util_debug_callback *dbg,
1717 struct iris_bo *bo, unsigned flags)
1718 {
1719 struct iris_bufmgr *bufmgr = bo->bufmgr;
1720 void *map = NULL;
1721
1722 if (bo->gem_handle == 0) {
1723 struct iris_bo *real = iris_get_backing_bo(bo);
1724 uint64_t offset = bo->address - real->address;
1725 map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1726 } else {
1727 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1728 if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1729 return NULL;
1730
1731 if (!bo->real.map) {
1732 DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1733 map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1734 if (!map) {
1735 return NULL;
1736 }
1737
1738 VG_DEFINED(map, bo->size);
1739
1740 if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1741 VG_NOACCESS(map, bo->size);
1742 os_munmap(map, bo->size);
1743 }
1744 }
1745 assert(bo->real.map);
1746 map = bo->real.map;
1747 }
1748
1749 DBG("iris_bo_map: %d (%s) -> %p\n",
1750 bo->gem_handle, bo->name, bo->real.map);
1751 print_flags(flags);
1752
1753 if (!(flags & MAP_ASYNC)) {
1754 bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1755 }
1756
1757 return map;
1758 }
1759
1760 /**
1761 * Waits on a BO for the given amount of time.
1762 *
1763 * @bo: buffer object to wait for
1764 * @timeout_ns: amount of time to wait in nanoseconds.
1765 * If value is less than 0, an infinite wait will occur.
1766 *
1767 * Returns 0 if the wait was successful ie. the last batch referencing the
1768 * object has completed within the allotted time. Otherwise some negative return
1769 * value describes the error. Of particular interest is -ETIME when the wait has
1770 * failed to yield the desired result.
1771 *
1772 * Similar to iris_bo_wait_rendering except a timeout parameter allows
1773 * the operation to give up after a certain amount of time. Another subtle
1774 * difference is the internal locking semantics are different (this variant does
1775 * not hold the lock for the duration of the wait). This makes the wait subject
1776 * to a larger userspace race window.
1777 *
1778 * The implementation shall wait until the object is no longer actively
1779 * referenced within a batch buffer at the time of the call. The wait will
1780 * not guarantee that the buffer is re-issued via another thread, or an flinked
1781 * handle. Userspace must make sure this race does not occur if such precision
1782 * is important.
1783 *
1784 * Note that some kernels have broken the infinite wait for negative values
1785 * promise, upgrade to latest stable kernels if this is the case.
1786 */
1787 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1788 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1789 {
1790 int ret;
1791
1792 switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1793 case INTEL_KMD_TYPE_I915:
1794 if (iris_bo_is_external(bo))
1795 ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1796 else
1797 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1798 break;
1799 case INTEL_KMD_TYPE_XE:
1800 ret = iris_bo_wait_syncobj(bo, timeout_ns);
1801 break;
1802 default:
1803 unreachable("missing");
1804 ret = -1;
1805 }
1806
1807 bo->idle = ret == 0;
1808
1809 return ret;
1810 }
1811
1812 /** Waits for all GPU rendering with the object to have completed. */
1813 void
iris_bo_wait_rendering(struct iris_bo * bo)1814 iris_bo_wait_rendering(struct iris_bo *bo)
1815 {
1816 /* We require a kernel recent enough for WAIT_IOCTL support.
1817 * See intel_init_bufmgr()
1818 */
1819 iris_bo_wait(bo, -1);
1820 }
1821
1822 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1823 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1824 {
1825 switch (bufmgr->devinfo.kmd_type) {
1826 case INTEL_KMD_TYPE_I915:
1827 /* Nothing to do in i915 */
1828 break;
1829 case INTEL_KMD_TYPE_XE:
1830 intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1831 iris_xe_destroy_global_vm(bufmgr);
1832 break;
1833 default:
1834 unreachable("missing");
1835 }
1836 }
1837
1838 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1839 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1840 {
1841 iris_bo_unreference(bufmgr->dummy_aux_bo);
1842
1843 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1844
1845 /* Free aux-map buffers */
1846 intel_aux_map_finish(bufmgr->aux_map_ctx);
1847
1848 /* bufmgr will no longer try to free VMA entries in the aux-map */
1849 bufmgr->aux_map_ctx = NULL;
1850
1851 for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1852 if (bufmgr->bo_slabs[i].groups)
1853 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1854 }
1855
1856 simple_mtx_lock(&bufmgr->lock);
1857
1858 /* Free any cached buffer objects we were going to reuse */
1859 for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1860 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1861
1862 for (int i = 0; i < cache->num_buckets; i++) {
1863 struct bo_cache_bucket *bucket = &cache->bucket[i];
1864
1865 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1866 list_del(&bo->head);
1867
1868 bo_free(bo);
1869 }
1870 }
1871 }
1872 free(bufmgr->bucket_cache);
1873
1874 /* Close any buffer objects on the dead list. */
1875 list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1876 list_del(&bo->head);
1877 bo_close(bo);
1878 }
1879
1880 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1881 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1882
1883 for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1884 util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1885
1886 iris_bufmgr_destroy_global_vm(bufmgr);
1887
1888 close(bufmgr->fd);
1889
1890 simple_mtx_unlock(&bufmgr->lock);
1891
1892 simple_mtx_destroy(&bufmgr->lock);
1893 simple_mtx_destroy(&bufmgr->bo_deps_lock);
1894
1895 free(bufmgr);
1896 }
1897
1898 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1899 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1900 {
1901 struct iris_bufmgr *bufmgr = bo->bufmgr;
1902
1903 if (!bufmgr->devinfo.has_tiling_uapi) {
1904 *tiling = 0;
1905 return 0;
1906 }
1907
1908 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1909 return iris_i915_bo_get_tiling(bo, tiling);
1910 }
1911
1912 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1913 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1914 {
1915 struct iris_bufmgr *bufmgr = bo->bufmgr;
1916
1917 /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1918 * actually not supported by the kernel in those cases.
1919 */
1920 if (!bufmgr->devinfo.has_tiling_uapi)
1921 return 0;
1922
1923 assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1924 return iris_i915_bo_set_tiling(bo, surf);
1925 }
1926
1927 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1928 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1929 const uint64_t modifier)
1930 {
1931 uint32_t handle;
1932 struct iris_bo *bo;
1933
1934 simple_mtx_lock(&bufmgr->lock);
1935 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1936 if (ret) {
1937 DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1938 strerror(errno));
1939 simple_mtx_unlock(&bufmgr->lock);
1940 return NULL;
1941 }
1942
1943 /*
1944 * See if the kernel has already returned this buffer to us. Just as
1945 * for named buffers, we must not create two bo's pointing at the same
1946 * kernel object
1947 */
1948 bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1949 if (bo)
1950 goto out;
1951
1952 bo = bo_calloc();
1953 if (!bo)
1954 goto out;
1955
1956 p_atomic_set(&bo->refcount, 1);
1957
1958 /* Determine size of bo. The fd-to-handle ioctl really should
1959 * return the size, but it doesn't. If we have kernel 3.12 or
1960 * later, we can lseek on the prime fd to get the size. Older
1961 * kernels will just fail, in which case we fall back to the
1962 * provided (estimated or guess size). */
1963 ret = lseek(prime_fd, 0, SEEK_END);
1964 if (ret != -1)
1965 bo->size = ret;
1966
1967 bo->bufmgr = bufmgr;
1968 bo->name = "prime";
1969 bo->index = -1;
1970 bo->real.reusable = false;
1971 bo->real.imported = true;
1972 /* Xe KMD expects at least 1-way coherency for imports */
1973 bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1974 bo->real.mmap_mode = IRIS_MMAP_NONE;
1975 if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1976 bo->real.capture = true;
1977 bo->gem_handle = handle;
1978 bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1979
1980 uint64_t alignment = 1;
1981
1982 /* When an aux map will be used, there is an alignment requirement on the
1983 * main surface from the mapping granularity. Some planes of the image may
1984 * have smaller alignment requirements, but this one should work for all.
1985 */
1986 if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1987 alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1988
1989 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1990 if (bo->address == 0ull)
1991 goto err_free;
1992
1993 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1994 goto err_vm_alloc;
1995
1996 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1997
1998 out:
1999 simple_mtx_unlock(&bufmgr->lock);
2000 return bo;
2001
2002 err_vm_alloc:
2003 vma_free(bufmgr, bo->address, bo->size);
2004 err_free:
2005 bo_free(bo);
2006 simple_mtx_unlock(&bufmgr->lock);
2007 return NULL;
2008 }
2009
2010 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2011 iris_bo_mark_exported_locked(struct iris_bo *bo)
2012 {
2013 struct iris_bufmgr *bufmgr = bo->bufmgr;
2014
2015 /* We cannot export suballocated BOs. */
2016 assert(iris_bo_is_real(bo));
2017 simple_mtx_assert_locked(&bufmgr->lock);
2018
2019 if (!iris_bo_is_external(bo))
2020 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2021
2022 if (!bo->real.exported) {
2023 /* If a BO is going to be used externally, it could be sent to the
2024 * display HW. So make sure our CPU mappings don't assume cache
2025 * coherency since display is outside that cache.
2026 */
2027 bo->real.exported = true;
2028 bo->real.reusable = false;
2029 }
2030 }
2031
2032 void
iris_bo_mark_exported(struct iris_bo * bo)2033 iris_bo_mark_exported(struct iris_bo *bo)
2034 {
2035 struct iris_bufmgr *bufmgr = bo->bufmgr;
2036
2037 /* We cannot export suballocated BOs. */
2038 assert(iris_bo_is_real(bo));
2039
2040 if (bo->real.exported) {
2041 assert(!bo->real.reusable);
2042 return;
2043 }
2044
2045 simple_mtx_lock(&bufmgr->lock);
2046 iris_bo_mark_exported_locked(bo);
2047 simple_mtx_unlock(&bufmgr->lock);
2048
2049 iris_bo_set_prime_fd(bo);
2050 }
2051
2052 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2053 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2054 {
2055 struct iris_bufmgr *bufmgr = bo->bufmgr;
2056
2057 /* We cannot export suballocated BOs. */
2058 assert(iris_bo_is_real(bo));
2059
2060 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2061 DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2062 return -errno;
2063
2064 iris_bo_mark_exported(bo);
2065
2066 return 0;
2067 }
2068
2069 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2070 iris_bo_export_gem_handle(struct iris_bo *bo)
2071 {
2072 /* We cannot export suballocated BOs. */
2073 assert(iris_bo_is_real(bo));
2074
2075 iris_bo_mark_exported(bo);
2076
2077 return bo->gem_handle;
2078 }
2079
2080 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2081 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2082 {
2083 struct iris_bufmgr *bufmgr = bo->bufmgr;
2084
2085 /* We cannot export suballocated BOs. */
2086 assert(iris_bo_is_real(bo));
2087
2088 if (!bo->real.global_name) {
2089 struct drm_gem_flink flink = { .handle = bo->gem_handle };
2090
2091 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2092 return -errno;
2093
2094 simple_mtx_lock(&bufmgr->lock);
2095 if (!bo->real.global_name) {
2096 iris_bo_mark_exported_locked(bo);
2097 bo->real.global_name = flink.name;
2098 _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2099 }
2100 simple_mtx_unlock(&bufmgr->lock);
2101
2102 iris_bo_set_prime_fd(bo);
2103 }
2104
2105 *name = bo->real.global_name;
2106 return 0;
2107 }
2108
2109 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2110 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2111 uint32_t *out_handle)
2112 {
2113 /* We cannot export suballocated BOs. */
2114 assert(iris_bo_is_real(bo));
2115
2116 /* Only add the new GEM handle to the list of export if it belongs to a
2117 * different GEM device. Otherwise we might close the same buffer multiple
2118 * times.
2119 */
2120 struct iris_bufmgr *bufmgr = bo->bufmgr;
2121 int ret = os_same_file_description(drm_fd, bufmgr->fd);
2122 WARN_ONCE(ret < 0,
2123 "Kernel has no file descriptor comparison support: %s\n",
2124 strerror(errno));
2125 if (ret == 0) {
2126 *out_handle = iris_bo_export_gem_handle(bo);
2127 return 0;
2128 }
2129
2130 struct bo_export *export = calloc(1, sizeof(*export));
2131 if (!export)
2132 return -ENOMEM;
2133
2134 export->drm_fd = drm_fd;
2135
2136 int dmabuf_fd = -1;
2137 int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2138 if (err) {
2139 free(export);
2140 return err;
2141 }
2142
2143 simple_mtx_lock(&bufmgr->lock);
2144 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2145 close(dmabuf_fd);
2146 if (err) {
2147 simple_mtx_unlock(&bufmgr->lock);
2148 free(export);
2149 return err;
2150 }
2151
2152 bool found = false;
2153 list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2154 if (iter->drm_fd != drm_fd)
2155 continue;
2156 /* Here we assume that for a given DRM fd, we'll always get back the
2157 * same GEM handle for a given buffer.
2158 */
2159 assert(iter->gem_handle == export->gem_handle);
2160 free(export);
2161 export = iter;
2162 found = true;
2163 break;
2164 }
2165 if (!found)
2166 list_addtail(&export->link, &bo->real.exports);
2167
2168 simple_mtx_unlock(&bufmgr->lock);
2169
2170 *out_handle = export->gem_handle;
2171
2172 return 0;
2173 }
2174
2175 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2176 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2177 {
2178 struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2179 unsigned int i = cache->num_buckets++;
2180
2181 list_inithead(&cache->bucket[i].head);
2182 cache->bucket[i].size = size;
2183
2184 assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2185 assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2186 assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2187 }
2188
2189 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2190 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2191 {
2192 uint64_t size, cache_max_size = 64 * 1024 * 1024;
2193
2194 /* OK, so power of two buckets was too wasteful of memory.
2195 * Give 3 other sizes between each power of two, to hopefully
2196 * cover things accurately enough. (The alternative is
2197 * probably to just go for exact matching of sizes, and assume
2198 * that for things like composited window resize the tiled
2199 * width/height alignment and rounding of sizes to pages will
2200 * get us useful cache hit rates anyway)
2201 */
2202 add_bucket(bufmgr, PAGE_SIZE, heap);
2203 add_bucket(bufmgr, PAGE_SIZE * 2, heap);
2204 add_bucket(bufmgr, PAGE_SIZE * 3, heap);
2205
2206 /* Initialize the linked lists for BO reuse cache. */
2207 for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
2208 add_bucket(bufmgr, size, heap);
2209
2210 add_bucket(bufmgr, size + size * 1 / 4, heap);
2211 add_bucket(bufmgr, size + size * 2 / 4, heap);
2212 add_bucket(bufmgr, size + size * 3 / 4, heap);
2213 }
2214 }
2215
2216 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2217 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2218 {
2219 struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2220 if (!buf)
2221 return NULL;
2222
2223 struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2224
2225 unsigned int page_size = getpagesize();
2226 size = MAX2(ALIGN(size, page_size), page_size);
2227
2228 struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2229 if (!bo) {
2230 free(buf);
2231 return NULL;
2232 }
2233
2234 simple_mtx_lock(&bufmgr->lock);
2235
2236 bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2237 if (bo->address == 0ull)
2238 goto err_free;
2239
2240 if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2241 goto err_vm_alloc;
2242
2243 simple_mtx_unlock(&bufmgr->lock);
2244
2245 bo->name = "aux-map";
2246 p_atomic_set(&bo->refcount, 1);
2247 bo->index = -1;
2248 bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2249 bo->real.prime_fd = -1;
2250
2251 buf->driver_bo = bo;
2252 buf->gpu = bo->address;
2253 buf->gpu_end = buf->gpu + bo->size;
2254 buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2255 return buf;
2256
2257 err_vm_alloc:
2258 vma_free(bufmgr, bo->address, bo->size);
2259 err_free:
2260 free(buf);
2261 bo_free(bo);
2262 simple_mtx_unlock(&bufmgr->lock);
2263 return NULL;
2264 }
2265
2266 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2267 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2268 {
2269 iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2270 free(buffer);
2271 }
2272
2273 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2274 .alloc = intel_aux_map_buffer_alloc,
2275 .free = intel_aux_map_buffer_free,
2276 };
2277
2278 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2279 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2280 struct intel_device_info *devinfo)
2281 {
2282 bufmgr->sys.region = &devinfo->mem.sram.mem;
2283 bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2284
2285 /* When the resizable bar feature is disabled,
2286 * then vram.mappable.size is only 256MB.
2287 * The second half of the total size is in the vram.unmappable.size
2288 * variable.
2289 */
2290 bufmgr->vram.region = &devinfo->mem.vram.mem;
2291 bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2292 devinfo->mem.vram.unmappable.size;
2293
2294 return true;
2295 }
2296
2297 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2298 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2299 {
2300 switch (bufmgr->devinfo.kmd_type) {
2301 case INTEL_KMD_TYPE_I915:
2302 bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2303 /* i915 don't require VM, so returning true even if use_global_vm is false */
2304 return true;
2305 case INTEL_KMD_TYPE_XE:
2306 if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2307 return false;
2308
2309 bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2310 /* Xe requires VM */
2311 return bufmgr->use_global_vm;
2312 default:
2313 unreachable("missing");
2314 return false;
2315 }
2316 }
2317
2318 /**
2319 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2320 * and manage map buffer objections.
2321 *
2322 * \param fd File descriptor of the opened DRM device.
2323 */
2324 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2325 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2326 {
2327 if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2328 return NULL;
2329
2330 struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2331 if (bufmgr == NULL)
2332 return NULL;
2333
2334 /* Handles to buffer objects belong to the device fd and are not
2335 * reference counted by the kernel. If the same fd is used by
2336 * multiple parties (threads sharing the same screen bufmgr, or
2337 * even worse the same device fd passed to multiple libraries)
2338 * ownership of those handles is shared by those independent parties.
2339 *
2340 * Don't do this! Ensure that each library/bufmgr has its own device
2341 * fd so that its namespace does not clash with another.
2342 */
2343 bufmgr->fd = os_dupfd_cloexec(fd);
2344 if (bufmgr->fd == -1)
2345 goto error_dup;
2346
2347 p_atomic_set(&bufmgr->refcount, 1);
2348
2349 simple_mtx_init(&bufmgr->lock, mtx_plain);
2350 simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2351
2352 list_inithead(&bufmgr->zombie_list);
2353
2354 bufmgr->devinfo = *devinfo;
2355 devinfo = &bufmgr->devinfo;
2356 bufmgr->bo_reuse = bo_reuse;
2357 iris_bufmgr_get_meminfo(bufmgr, devinfo);
2358 bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2359
2360 intel_common_update_device_info(bufmgr->fd, devinfo);
2361
2362 if (!iris_bufmgr_init_global_vm(bufmgr))
2363 goto error_init_vm;
2364
2365 STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2366 const uint64_t _4GB = 1ull << 32;
2367 const uint64_t _2GB = 1ul << 31;
2368
2369 /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2370 const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2371
2372 const struct {
2373 uint64_t start;
2374 uint64_t size;
2375 } vma[IRIS_MEMZONE_COUNT] = {
2376 [IRIS_MEMZONE_SHADER] = {
2377 .start = PAGE_SIZE,
2378 .size = _4GB_minus_1 - PAGE_SIZE
2379 },
2380 [IRIS_MEMZONE_BINDER] = {
2381 .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2382 .size = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2383 },
2384 [IRIS_MEMZONE_SCRATCH] = {
2385 .start = IRIS_MEMZONE_SCRATCH_START,
2386 .size = IRIS_SCRATCH_ZONE_SIZE
2387 },
2388 [IRIS_MEMZONE_SURFACE] = {
2389 .start = IRIS_MEMZONE_SURFACE_START,
2390 .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2391 },
2392 [IRIS_MEMZONE_DYNAMIC] = {
2393 .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2394
2395 /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2396 *
2397 * "PSDunit is dropping MSB of the blend state pointer from SD
2398 * FIFO [...] Limit the Blend State Pointer to < 2G"
2399 *
2400 * We restrict the dynamic state pool to 2GB so that we don't ever
2401 * get a BLEND_STATE pointer with the MSB set. We aren't likely to
2402 * need the full 4GB for dynamic state anyway.
2403 */
2404 .size = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2405 - IRIS_BORDER_COLOR_POOL_SIZE
2406 },
2407 [IRIS_MEMZONE_OTHER] = {
2408 .start = IRIS_MEMZONE_OTHER_START,
2409
2410 /* Leave the last 4GB out of the high vma range, so that no state
2411 * base address + size can overflow 48 bits.
2412 */
2413 .size = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2414 },
2415 };
2416
2417 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2418 util_vma_heap_init(&bufmgr->vma_allocator[i],
2419 vma[i].start, vma[i].size);
2420 }
2421
2422 if (INTEL_DEBUG(DEBUG_HEAPS)) {
2423 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2424 fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2425 memzone_name(i), vma[i].start,
2426 vma[i].start + vma[i].size - 1);
2427 }
2428 }
2429
2430 bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2431 sizeof(*bufmgr->bucket_cache));
2432 if (!bufmgr->bucket_cache)
2433 goto error_bucket_cache;
2434 for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2435 init_cache_buckets(bufmgr, h);
2436
2437 unsigned min_slab_order = 8; /* 256 bytes */
2438 unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2439 unsigned num_slab_orders_per_allocator =
2440 (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2441
2442 /* Divide the size order range among slab managers. */
2443 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2444 unsigned min_order = min_slab_order;
2445 unsigned max_order =
2446 MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2447
2448 if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2449 iris_get_heap_max(bufmgr), true, bufmgr,
2450 iris_can_reclaim_slab,
2451 iris_slab_alloc,
2452 (void *) iris_slab_free)) {
2453 goto error_slabs_init;
2454 }
2455 min_slab_order = max_order + 1;
2456 }
2457
2458 bufmgr->name_table =
2459 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2460 bufmgr->handle_table =
2461 _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2462
2463 if (devinfo->has_aux_map) {
2464 bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2465 devinfo);
2466 assert(bufmgr->aux_map_ctx);
2467 }
2468
2469 iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2470
2471 if (intel_needs_workaround(devinfo, 14019708328)) {
2472 bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2473 IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2474 if (!bufmgr->dummy_aux_bo)
2475 goto error_dummy_aux;
2476 }
2477
2478 return bufmgr;
2479
2480 error_dummy_aux:
2481 iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2482 intel_aux_map_finish(bufmgr->aux_map_ctx);
2483 _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2484 _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2485 error_slabs_init:
2486 for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2487 if (!bufmgr->bo_slabs[i].groups)
2488 break;
2489
2490 pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2491 }
2492 free(bufmgr->bucket_cache);
2493 error_bucket_cache:
2494 for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2495 util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2496 iris_bufmgr_destroy_global_vm(bufmgr);
2497 error_init_vm:
2498 close(bufmgr->fd);
2499 error_dup:
2500 free(bufmgr);
2501 return NULL;
2502 }
2503
2504 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2505 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2506 {
2507 p_atomic_inc(&bufmgr->refcount);
2508 return bufmgr;
2509 }
2510
2511 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2512 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2513 {
2514 simple_mtx_lock(&global_bufmgr_list_mutex);
2515 if (p_atomic_dec_zero(&bufmgr->refcount)) {
2516 list_del(&bufmgr->link);
2517 iris_bufmgr_destroy(bufmgr);
2518 }
2519 simple_mtx_unlock(&global_bufmgr_list_mutex);
2520 }
2521
2522 /** Returns a new unique id, to be used by screens. */
2523 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2524 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2525 {
2526 return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2527 }
2528
2529 /**
2530 * Gets an already existing GEM buffer manager or create a new one.
2531 *
2532 * \param fd File descriptor of the opened DRM device.
2533 */
2534 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2535 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2536 {
2537 struct intel_device_info devinfo;
2538 struct stat st;
2539
2540 if (fstat(fd, &st))
2541 return NULL;
2542
2543 struct iris_bufmgr *bufmgr = NULL;
2544
2545 simple_mtx_lock(&global_bufmgr_list_mutex);
2546 list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2547 struct stat iter_st;
2548 if (fstat(iter_bufmgr->fd, &iter_st))
2549 continue;
2550
2551 if (st.st_rdev == iter_st.st_rdev) {
2552 assert(iter_bufmgr->bo_reuse == bo_reuse);
2553 bufmgr = iris_bufmgr_ref(iter_bufmgr);
2554 goto unlock;
2555 }
2556 }
2557
2558 if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2559 return NULL;
2560
2561 if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2562 return NULL;
2563
2564 bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2565 if (bufmgr)
2566 list_addtail(&bufmgr->link, &global_bufmgr_list);
2567
2568 unlock:
2569 simple_mtx_unlock(&global_bufmgr_list_mutex);
2570
2571 return bufmgr;
2572 }
2573
2574 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2575 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2576 {
2577 return bufmgr->fd;
2578 }
2579
2580 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2581 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2582 {
2583 return bufmgr->aux_map_ctx;
2584 }
2585
2586 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2587 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2588 {
2589 return &bufmgr->bo_deps_lock;
2590 }
2591
2592 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2593 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2594 {
2595 return &bufmgr->border_color_pool;
2596 }
2597
2598 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2599 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2600 {
2601 return bufmgr->vram.size;
2602 }
2603
2604 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2605 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2606 {
2607 return bufmgr->sys.size;
2608 }
2609
2610 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2611 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2612 {
2613 return &bufmgr->devinfo;
2614 }
2615
2616 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2617 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2618 {
2619 return bufmgr->kmd_backend;
2620 }
2621
2622 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2623 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2624 {
2625 return bufmgr->global_vm_id;
2626 }
2627
2628 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2629 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2630 {
2631 return bufmgr->use_global_vm;
2632 }
2633
2634 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2635 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2636 {
2637 return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2638 }
2639
2640 /**
2641 * Return the pat entry based on the bo heap and allocation flags.
2642 */
2643 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2644 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2645 enum iris_heap heap)
2646 {
2647 switch (heap) {
2648 case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2649 return &devinfo->pat.cached_coherent;
2650 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2651 return &devinfo->pat.writecombining;
2652 case IRIS_HEAP_DEVICE_LOCAL:
2653 case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2654 case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2655 return &devinfo->pat.writecombining;
2656 case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2657 case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2658 return &devinfo->pat.compressed;
2659 default:
2660 unreachable("invalid heap for platforms using PAT entries");
2661 }
2662 }
2663
2664 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2665 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2666 {
2667 return &bufmgr->bind_timeline;
2668 }
2669
2670 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2671 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2672 {
2673 return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2674 }
2675