1 /*
2 * Copyright © 2011 Marek Olšák <[email protected]>
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_drm_cs.h"
8
9 #include "util/u_hash_table.h"
10 #include "util/u_memory.h"
11 #include "util/u_thread.h"
12 #include "util/os_mman.h"
13 #include "util/os_time.h"
14
15 #include "frontend/drm_driver.h"
16
17 #include <sys/ioctl.h>
18 #include <xf86drm.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <inttypes.h>
23
24 static struct pb_buffer_lean *
25 radeon_winsys_bo_create(struct radeon_winsys *rws,
26 uint64_t size,
27 unsigned alignment,
28 enum radeon_bo_domain domain,
29 enum radeon_bo_flag flags);
30
radeon_bo(struct pb_buffer_lean * bo)31 static inline struct radeon_bo *radeon_bo(struct pb_buffer_lean *bo)
32 {
33 return (struct radeon_bo *)bo;
34 }
35
36 struct radeon_bo_va_hole {
37 struct list_head list;
38 uint64_t offset;
39 uint64_t size;
40 };
41
radeon_real_bo_is_busy(struct radeon_bo * bo)42 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
43 {
44 struct drm_radeon_gem_busy args = {0};
45
46 args.handle = bo->handle;
47 return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
48 &args, sizeof(args)) != 0;
49 }
50
radeon_bo_is_busy(struct radeon_winsys * rws,struct radeon_bo * bo)51 static bool radeon_bo_is_busy(struct radeon_winsys *rws, struct radeon_bo *bo)
52 {
53 unsigned num_idle;
54 bool busy = false;
55
56 if (bo->handle)
57 return radeon_real_bo_is_busy(bo);
58
59 mtx_lock(&bo->rws->bo_fence_lock);
60 for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
61 if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
62 busy = true;
63 break;
64 }
65 radeon_ws_bo_reference(rws, &bo->u.slab.fences[num_idle], NULL);
66 }
67 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
68 (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
69 bo->u.slab.num_fences -= num_idle;
70 mtx_unlock(&bo->rws->bo_fence_lock);
71
72 return busy;
73 }
74
radeon_real_bo_wait_idle(struct radeon_bo * bo)75 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
76 {
77 struct drm_radeon_gem_wait_idle args = {0};
78
79 args.handle = bo->handle;
80 while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
81 &args, sizeof(args)) == -EBUSY);
82 }
83
radeon_bo_wait_idle(struct radeon_winsys * rws,struct radeon_bo * bo)84 static void radeon_bo_wait_idle(struct radeon_winsys *rws, struct radeon_bo *bo)
85 {
86 if (bo->handle) {
87 radeon_real_bo_wait_idle(bo);
88 } else {
89 mtx_lock(&bo->rws->bo_fence_lock);
90 while (bo->u.slab.num_fences) {
91 struct radeon_bo *fence = NULL;
92 radeon_ws_bo_reference(rws, &fence, bo->u.slab.fences[0]);
93 mtx_unlock(&bo->rws->bo_fence_lock);
94
95 /* Wait without holding the fence lock. */
96 radeon_real_bo_wait_idle(fence);
97
98 mtx_lock(&bo->rws->bo_fence_lock);
99 if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
100 radeon_ws_bo_reference(rws, &bo->u.slab.fences[0], NULL);
101 memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
102 (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
103 bo->u.slab.num_fences--;
104 }
105 radeon_ws_bo_reference(rws, &fence, NULL);
106 }
107 mtx_unlock(&bo->rws->bo_fence_lock);
108 }
109 }
110
radeon_bo_wait(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,uint64_t timeout,unsigned usage)111 static bool radeon_bo_wait(struct radeon_winsys *rws,
112 struct pb_buffer_lean *_buf, uint64_t timeout,
113 unsigned usage)
114 {
115 struct radeon_bo *bo = radeon_bo(_buf);
116 int64_t abs_timeout;
117
118 /* No timeout. Just query. */
119 if (timeout == 0)
120 return !bo->num_active_ioctls && !radeon_bo_is_busy(rws, bo);
121
122 abs_timeout = os_time_get_absolute_timeout(timeout);
123
124 /* Wait if any ioctl is being submitted with this buffer. */
125 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
126 return false;
127
128 /* Infinite timeout. */
129 if (abs_timeout == OS_TIMEOUT_INFINITE) {
130 radeon_bo_wait_idle(rws, bo);
131 return true;
132 }
133
134 /* Other timeouts need to be emulated with a loop. */
135 while (radeon_bo_is_busy(rws, bo)) {
136 if (os_time_get_nano() >= abs_timeout)
137 return false;
138 os_time_sleep(10);
139 }
140
141 return true;
142 }
143
get_valid_domain(enum radeon_bo_domain domain)144 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
145 {
146 /* Zero domains the driver doesn't understand. */
147 domain &= RADEON_DOMAIN_VRAM_GTT;
148
149 /* If no domain is set, we must set something... */
150 if (!domain)
151 domain = RADEON_DOMAIN_VRAM_GTT;
152
153 return domain;
154 }
155
radeon_bo_get_initial_domain(struct pb_buffer_lean * buf)156 static enum radeon_bo_domain radeon_bo_get_initial_domain(
157 struct pb_buffer_lean *buf)
158 {
159 struct radeon_bo *bo = (struct radeon_bo*)buf;
160 struct drm_radeon_gem_op args;
161
162 memset(&args, 0, sizeof(args));
163 args.handle = bo->handle;
164 args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
165
166 if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
167 &args, sizeof(args))) {
168 fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
169 bo, bo->handle);
170 /* Default domain as returned by get_valid_domain. */
171 return RADEON_DOMAIN_VRAM_GTT;
172 }
173
174 /* GEM domains and winsys domains are defined the same. */
175 return get_valid_domain(args.value);
176 }
177
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)178 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
179 struct radeon_vm_heap *heap,
180 uint64_t size, uint64_t alignment)
181 {
182 struct radeon_bo_va_hole *hole, *n;
183 uint64_t offset = 0, waste = 0;
184
185 /* All VM address space holes will implicitly start aligned to the
186 * size alignment, so we don't need to sanitize the alignment here
187 */
188 size = align(size, info->gart_page_size);
189
190 mtx_lock(&heap->mutex);
191 /* first look for a hole */
192 LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
193 offset = hole->offset;
194 waste = offset % alignment;
195 waste = waste ? alignment - waste : 0;
196 offset += waste;
197 if (offset >= (hole->offset + hole->size)) {
198 continue;
199 }
200 if (!waste && hole->size == size) {
201 offset = hole->offset;
202 list_del(&hole->list);
203 FREE(hole);
204 mtx_unlock(&heap->mutex);
205 return offset;
206 }
207 if ((hole->size - waste) > size) {
208 if (waste) {
209 n = CALLOC_STRUCT(radeon_bo_va_hole);
210 n->size = waste;
211 n->offset = hole->offset;
212 list_add(&n->list, &hole->list);
213 }
214 hole->size -= (size + waste);
215 hole->offset += size + waste;
216 mtx_unlock(&heap->mutex);
217 return offset;
218 }
219 if ((hole->size - waste) == size) {
220 hole->size = waste;
221 mtx_unlock(&heap->mutex);
222 return offset;
223 }
224 }
225
226 offset = heap->start;
227 waste = offset % alignment;
228 waste = waste ? alignment - waste : 0;
229
230 if (offset + waste + size > heap->end) {
231 mtx_unlock(&heap->mutex);
232 return 0;
233 }
234
235 if (waste) {
236 n = CALLOC_STRUCT(radeon_bo_va_hole);
237 n->size = waste;
238 n->offset = offset;
239 list_add(&n->list, &heap->holes);
240 }
241 offset += waste;
242 heap->start += size + waste;
243 mtx_unlock(&heap->mutex);
244 return offset;
245 }
246
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)247 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
248 uint64_t size, uint64_t alignment)
249 {
250 uint64_t va = 0;
251
252 /* Try to allocate from the 64-bit address space first.
253 * If it doesn't exist (start = 0) or if it doesn't have enough space,
254 * fall back to the 32-bit address space.
255 */
256 if (ws->vm64.start)
257 va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
258 if (!va)
259 va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
260 return va;
261 }
262
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(const struct radeon_info *info,
264 struct radeon_vm_heap *heap,
265 uint64_t va, uint64_t size)
266 {
267 struct radeon_bo_va_hole *hole = NULL;
268
269 size = align(size, info->gart_page_size);
270
271 mtx_lock(&heap->mutex);
272 if ((va + size) == heap->start) {
273 heap->start = va;
274 /* Delete uppermost hole if it reaches the new top */
275 if (!list_is_empty(&heap->holes)) {
276 hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
277 if ((hole->offset + hole->size) == va) {
278 heap->start = hole->offset;
279 list_del(&hole->list);
280 FREE(hole);
281 }
282 }
283 } else {
284 struct radeon_bo_va_hole *next;
285
286 hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
287 LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
288 if (next->offset < va)
289 break;
290 hole = next;
291 }
292
293 if (&hole->list != &heap->holes) {
294 /* Grow upper hole if it's adjacent */
295 if (hole->offset == (va + size)) {
296 hole->offset = va;
297 hole->size += size;
298 /* Merge lower hole if it's adjacent */
299 if (next != hole && &next->list != &heap->holes &&
300 (next->offset + next->size) == va) {
301 next->size += hole->size;
302 list_del(&hole->list);
303 FREE(hole);
304 }
305 goto out;
306 }
307 }
308
309 /* Grow lower hole if it's adjacent */
310 if (next != hole && &next->list != &heap->holes &&
311 (next->offset + next->size) == va) {
312 next->size += size;
313 goto out;
314 }
315
316 /* FIXME on allocation failure we just lose virtual address space
317 * maybe print a warning
318 */
319 next = CALLOC_STRUCT(radeon_bo_va_hole);
320 if (next) {
321 next->size = size;
322 next->offset = va;
323 list_add(&next->list, &hole->list);
324 }
325 }
326 out:
327 mtx_unlock(&heap->mutex);
328 }
329
radeon_bo_destroy(void * winsys,struct pb_buffer_lean * _buf)330 void radeon_bo_destroy(void *winsys, struct pb_buffer_lean *_buf)
331 {
332 struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
333 struct radeon_drm_winsys *rws = bo->rws;
334 struct drm_gem_close args;
335
336 assert(bo->handle && "must not be called for slab entries");
337
338 memset(&args, 0, sizeof(args));
339
340 mtx_lock(&rws->bo_handles_mutex);
341 /* radeon_winsys_bo_from_handle might have revived the bo */
342 if (pipe_is_referenced(&bo->base.reference)) {
343 mtx_unlock(&rws->bo_handles_mutex);
344 return;
345 }
346 _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
347 if (bo->flink_name) {
348 _mesa_hash_table_remove_key(rws->bo_names,
349 (void*)(uintptr_t)bo->flink_name);
350 }
351 mtx_unlock(&rws->bo_handles_mutex);
352
353 if (bo->u.real.ptr)
354 os_munmap(bo->u.real.ptr, bo->base.size);
355
356 if (rws->info.r600_has_virtual_memory) {
357 if (rws->va_unmap_working) {
358 struct drm_radeon_gem_va va;
359
360 va.handle = bo->handle;
361 va.vm_id = 0;
362 va.operation = RADEON_VA_UNMAP;
363 va.flags = RADEON_VM_PAGE_READABLE |
364 RADEON_VM_PAGE_WRITEABLE |
365 RADEON_VM_PAGE_SNOOPED;
366 va.offset = bo->va;
367
368 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
369 sizeof(va)) != 0 &&
370 va.operation == RADEON_VA_RESULT_ERROR) {
371 fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
372 fprintf(stderr, "radeon: size : %"PRIu64" bytes\n", bo->base.size);
373 fprintf(stderr, "radeon: va : 0x%"PRIx64"\n", bo->va);
374 }
375 }
376
377 radeon_bomgr_free_va(&rws->info,
378 bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
379 bo->va, bo->base.size);
380 }
381
382 /* Close object. */
383 args.handle = bo->handle;
384 drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
385
386 mtx_destroy(&bo->u.real.map_mutex);
387
388 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
389 rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
390 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
391 rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
392
393 if (bo->u.real.map_count >= 1) {
394 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
395 bo->rws->mapped_vram -= bo->base.size;
396 else
397 bo->rws->mapped_gtt -= bo->base.size;
398 bo->rws->num_mapped_buffers--;
399 }
400
401 FREE(bo);
402 }
403
radeon_bo_destroy_or_cache(void * winsys,struct pb_buffer_lean * _buf)404 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer_lean *_buf)
405 {
406 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys *)winsys;
407 struct radeon_bo *bo = radeon_bo(_buf);
408
409 assert(bo->handle && "must not be called for slab entries");
410
411 if (bo->u.real.use_reusable_pool)
412 pb_cache_add_buffer(&rws->bo_cache, &bo->u.real.cache_entry);
413 else
414 radeon_bo_destroy(NULL, _buf);
415 }
416
radeon_bo_do_map(struct radeon_bo * bo)417 void *radeon_bo_do_map(struct radeon_bo *bo)
418 {
419 struct drm_radeon_gem_mmap args = {0};
420 void *ptr;
421 unsigned offset;
422
423 /* If the buffer is created from user memory, return the user pointer. */
424 if (bo->user_ptr)
425 return bo->user_ptr;
426
427 if (bo->handle) {
428 offset = 0;
429 } else {
430 offset = bo->va - bo->u.slab.real->va;
431 bo = bo->u.slab.real;
432 }
433
434 /* Map the buffer. */
435 mtx_lock(&bo->u.real.map_mutex);
436 /* Return the pointer if it's already mapped. */
437 if (bo->u.real.ptr) {
438 bo->u.real.map_count++;
439 mtx_unlock(&bo->u.real.map_mutex);
440 return (uint8_t*)bo->u.real.ptr + offset;
441 }
442 args.handle = bo->handle;
443 args.offset = 0;
444 args.size = (uint64_t)bo->base.size;
445 if (drmCommandWriteRead(bo->rws->fd,
446 DRM_RADEON_GEM_MMAP,
447 &args,
448 sizeof(args))) {
449 mtx_unlock(&bo->u.real.map_mutex);
450 fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
451 bo, bo->handle);
452 return NULL;
453 }
454
455 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
456 bo->rws->fd, args.addr_ptr);
457 if (ptr == MAP_FAILED) {
458 /* Clear the cache and try again. */
459 pb_cache_release_all_buffers(&bo->rws->bo_cache);
460
461 ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
462 bo->rws->fd, args.addr_ptr);
463 if (ptr == MAP_FAILED) {
464 mtx_unlock(&bo->u.real.map_mutex);
465 fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
466 return NULL;
467 }
468 }
469 bo->u.real.ptr = ptr;
470 bo->u.real.map_count = 1;
471
472 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
473 bo->rws->mapped_vram += bo->base.size;
474 else
475 bo->rws->mapped_gtt += bo->base.size;
476 bo->rws->num_mapped_buffers++;
477
478 mtx_unlock(&bo->u.real.map_mutex);
479 return (uint8_t*)bo->u.real.ptr + offset;
480 }
481
radeon_bo_map(struct radeon_winsys * rws,struct pb_buffer_lean * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)482 static void *radeon_bo_map(struct radeon_winsys *rws,
483 struct pb_buffer_lean *buf,
484 struct radeon_cmdbuf *rcs,
485 enum pipe_map_flags usage)
486 {
487 struct radeon_bo *bo = (struct radeon_bo*)buf;
488 struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
489
490 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
491 if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
492 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
493 if (usage & PIPE_MAP_DONTBLOCK) {
494 if (!(usage & PIPE_MAP_WRITE)) {
495 /* Mapping for read.
496 *
497 * Since we are mapping for read, we don't need to wait
498 * if the GPU is using the buffer for read too
499 * (neither one is changing it).
500 *
501 * Only check whether the buffer is being used for write. */
502 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
503 cs->flush_cs(cs->flush_data,
504 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
505 return NULL;
506 }
507
508 if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
509 RADEON_USAGE_WRITE)) {
510 return NULL;
511 }
512 } else {
513 if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
514 cs->flush_cs(cs->flush_data,
515 RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
516 return NULL;
517 }
518
519 if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
520 RADEON_USAGE_READWRITE)) {
521 return NULL;
522 }
523 }
524 } else {
525 uint64_t time = os_time_get_nano();
526
527 if (!(usage & PIPE_MAP_WRITE)) {
528 /* Mapping for read.
529 *
530 * Since we are mapping for read, we don't need to wait
531 * if the GPU is using the buffer for read too
532 * (neither one is changing it).
533 *
534 * Only check whether the buffer is being used for write. */
535 if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
536 cs->flush_cs(cs->flush_data,
537 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
538 }
539 radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
540 RADEON_USAGE_WRITE);
541 } else {
542 /* Mapping for write. */
543 if (cs) {
544 if (radeon_bo_is_referenced_by_cs(cs, bo)) {
545 cs->flush_cs(cs->flush_data,
546 RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
547 } else {
548 /* Try to avoid busy-waiting in radeon_bo_wait. */
549 if (p_atomic_read(&bo->num_active_ioctls))
550 radeon_drm_cs_sync_flush(rcs);
551 }
552 }
553
554 radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
555 RADEON_USAGE_READWRITE);
556 }
557
558 bo->rws->buffer_wait_time += os_time_get_nano() - time;
559 }
560 }
561
562 return radeon_bo_do_map(bo);
563 }
564
radeon_bo_unmap(struct radeon_winsys * rws,struct pb_buffer_lean * _buf)565 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *_buf)
566 {
567 struct radeon_bo *bo = (struct radeon_bo*)_buf;
568
569 if (bo->user_ptr)
570 return;
571
572 if (!bo->handle)
573 bo = bo->u.slab.real;
574
575 mtx_lock(&bo->u.real.map_mutex);
576 if (!bo->u.real.ptr) {
577 mtx_unlock(&bo->u.real.map_mutex);
578 return; /* it's not been mapped */
579 }
580
581 assert(bo->u.real.map_count);
582 if (--bo->u.real.map_count) {
583 mtx_unlock(&bo->u.real.map_mutex);
584 return; /* it's been mapped multiple times */
585 }
586
587 os_munmap(bo->u.real.ptr, bo->base.size);
588 bo->u.real.ptr = NULL;
589
590 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
591 bo->rws->mapped_vram -= bo->base.size;
592 else
593 bo->rws->mapped_gtt -= bo->base.size;
594 bo->rws->num_mapped_buffers--;
595
596 mtx_unlock(&bo->u.real.map_mutex);
597 }
598
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)599 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
600 unsigned size, unsigned alignment,
601 unsigned initial_domains,
602 unsigned flags,
603 int heap)
604 {
605 struct radeon_bo *bo;
606 struct drm_radeon_gem_create args;
607 int r;
608
609 memset(&args, 0, sizeof(args));
610
611 assert(initial_domains);
612 assert((initial_domains &
613 ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
614
615 args.size = size;
616 args.alignment = alignment;
617 args.initial_domain = initial_domains;
618 args.flags = 0;
619
620 /* If VRAM is just stolen system memory, allow both VRAM and
621 * GTT, whichever has free space. If a buffer is evicted from
622 * VRAM to GTT, it will stay there.
623 */
624 if (!rws->info.has_dedicated_vram)
625 args.initial_domain |= RADEON_DOMAIN_GTT;
626
627 if (flags & RADEON_FLAG_GTT_WC)
628 args.flags |= RADEON_GEM_GTT_WC;
629 if (flags & RADEON_FLAG_NO_CPU_ACCESS)
630 args.flags |= RADEON_GEM_NO_CPU_ACCESS;
631
632 if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
633 &args, sizeof(args))) {
634 fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
635 fprintf(stderr, "radeon: size : %u bytes\n", size);
636 fprintf(stderr, "radeon: alignment : %u bytes\n", alignment);
637 fprintf(stderr, "radeon: domains : %u\n", args.initial_domain);
638 fprintf(stderr, "radeon: flags : %u\n", args.flags);
639 return NULL;
640 }
641
642 assert(args.handle != 0);
643
644 bo = CALLOC_STRUCT(radeon_bo);
645 if (!bo)
646 return NULL;
647
648 pipe_reference_init(&bo->base.reference, 1);
649 bo->base.alignment_log2 = util_logbase2(alignment);
650 bo->base.usage = 0;
651 bo->base.size = size;
652 bo->rws = rws;
653 bo->handle = args.handle;
654 bo->va = 0;
655 bo->initial_domain = initial_domains;
656 bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
657 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
658
659 if (heap >= 0) {
660 pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
661 heap);
662 }
663
664 if (rws->info.r600_has_virtual_memory) {
665 struct drm_radeon_gem_va va;
666 unsigned va_gap_size;
667
668 va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
669
670 if (flags & RADEON_FLAG_32BIT) {
671 bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
672 size + va_gap_size, alignment);
673 assert(bo->va + size < rws->vm32.end);
674 } else {
675 bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
676 }
677
678 va.handle = bo->handle;
679 va.vm_id = 0;
680 va.operation = RADEON_VA_MAP;
681 va.flags = RADEON_VM_PAGE_READABLE |
682 RADEON_VM_PAGE_WRITEABLE |
683 RADEON_VM_PAGE_SNOOPED;
684 va.offset = bo->va;
685 r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
686 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
687 fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
688 fprintf(stderr, "radeon: size : %d bytes\n", size);
689 fprintf(stderr, "radeon: alignment : %d bytes\n", alignment);
690 fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
691 fprintf(stderr, "radeon: va : 0x%016llx\n", (unsigned long long)bo->va);
692 radeon_bo_destroy(NULL, &bo->base);
693 return NULL;
694 }
695 mtx_lock(&rws->bo_handles_mutex);
696 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
697 struct pb_buffer_lean *b = &bo->base;
698 struct radeon_bo *old_bo =
699 _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
700
701 mtx_unlock(&rws->bo_handles_mutex);
702 radeon_bo_reference(&rws->base, &b, &old_bo->base);
703 return radeon_bo(b);
704 }
705
706 _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
707 mtx_unlock(&rws->bo_handles_mutex);
708 }
709
710 if (initial_domains & RADEON_DOMAIN_VRAM)
711 rws->allocated_vram += align(size, rws->info.gart_page_size);
712 else if (initial_domains & RADEON_DOMAIN_GTT)
713 rws->allocated_gtt += align(size, rws->info.gart_page_size);
714
715 return bo;
716 }
717
radeon_bo_can_reclaim(void * winsys,struct pb_buffer_lean * _buf)718 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer_lean *_buf)
719 {
720 struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
721
722 if (radeon_bo_is_referenced_by_any_cs(bo))
723 return false;
724
725 return radeon_bo_wait(winsys, (struct pb_buffer_lean*)_buf, 0, RADEON_USAGE_READWRITE);
726 }
727
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)728 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
729 {
730 struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
731
732 return radeon_bo_can_reclaim(priv, &bo->base);
733 }
734
radeon_bo_slab_destroy(void * winsys,struct pb_buffer_lean * _buf)735 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer_lean *_buf)
736 {
737 struct radeon_bo *bo = radeon_bo(_buf);
738
739 assert(!bo->handle);
740
741 pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
742 }
743
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)744 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
745 unsigned entry_size,
746 unsigned group_index)
747 {
748 struct radeon_drm_winsys *ws = priv;
749 struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
750 enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
751 enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
752 unsigned base_hash;
753
754 if (!slab)
755 return NULL;
756
757 slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
758 64 * 1024, 64 * 1024,
759 domains, flags));
760 if (!slab->buffer)
761 goto fail;
762
763 assert(slab->buffer->handle);
764
765 slab->base.num_entries = slab->buffer->base.size / entry_size;
766 slab->base.num_free = slab->base.num_entries;
767 slab->base.group_index = group_index;
768 slab->base.entry_size = entry_size;
769 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
770 if (!slab->entries)
771 goto fail_buffer;
772
773 list_inithead(&slab->base.free);
774
775 base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
776
777 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
778 struct radeon_bo *bo = &slab->entries[i];
779
780 bo->base.alignment_log2 = util_logbase2(entry_size);
781 bo->base.usage = slab->buffer->base.usage;
782 bo->base.size = entry_size;
783 bo->rws = ws;
784 bo->va = slab->buffer->va + i * entry_size;
785 bo->initial_domain = domains;
786 bo->hash = base_hash + i;
787 bo->u.slab.entry.slab = &slab->base;
788 bo->u.slab.real = slab->buffer;
789
790 list_addtail(&bo->u.slab.entry.head, &slab->base.free);
791 }
792
793 return &slab->base;
794
795 fail_buffer:
796 radeon_ws_bo_reference(&ws->base, &slab->buffer, NULL);
797 fail:
798 FREE(slab);
799 return NULL;
800 }
801
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)802 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
803 {
804 struct radeon_winsys *rws = (struct radeon_winsys *)priv;
805 struct radeon_slab *slab = (struct radeon_slab *)pslab;
806
807 for (unsigned i = 0; i < slab->base.num_entries; ++i) {
808 struct radeon_bo *bo = &slab->entries[i];
809 for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
810 radeon_ws_bo_reference(rws, &bo->u.slab.fences[j], NULL);
811 FREE(bo->u.slab.fences);
812 }
813
814 FREE(slab->entries);
815 radeon_ws_bo_reference(rws, &slab->buffer, NULL);
816 FREE(slab);
817 }
818
eg_tile_split(unsigned tile_split)819 static unsigned eg_tile_split(unsigned tile_split)
820 {
821 switch (tile_split) {
822 case 0: tile_split = 64; break;
823 case 1: tile_split = 128; break;
824 case 2: tile_split = 256; break;
825 case 3: tile_split = 512; break;
826 default:
827 case 4: tile_split = 1024; break;
828 case 5: tile_split = 2048; break;
829 case 6: tile_split = 4096; break;
830 }
831 return tile_split;
832 }
833
eg_tile_split_rev(unsigned eg_tile_split)834 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
835 {
836 switch (eg_tile_split) {
837 case 64: return 0;
838 case 128: return 1;
839 case 256: return 2;
840 case 512: return 3;
841 default:
842 case 1024: return 4;
843 case 2048: return 5;
844 case 4096: return 6;
845 }
846 }
847
radeon_bo_get_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)848 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
849 struct pb_buffer_lean *_buf,
850 struct radeon_bo_metadata *md,
851 struct radeon_surf *surf)
852 {
853 struct radeon_bo *bo = radeon_bo(_buf);
854 struct drm_radeon_gem_set_tiling args;
855
856 assert(bo->handle && "must not be called for slab entries");
857
858 memset(&args, 0, sizeof(args));
859
860 args.handle = bo->handle;
861
862 drmCommandWriteRead(bo->rws->fd,
863 DRM_RADEON_GEM_GET_TILING,
864 &args,
865 sizeof(args));
866
867 if (surf) {
868 if (args.tiling_flags & RADEON_TILING_MACRO)
869 md->mode = RADEON_SURF_MODE_2D;
870 else if (args.tiling_flags & RADEON_TILING_MICRO)
871 md->mode = RADEON_SURF_MODE_1D;
872 else
873 md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
874
875 surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
876 surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
877 surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
878 surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
879 surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
880
881 if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
882 surf->flags |= RADEON_SURF_SCANOUT;
883 else
884 surf->flags &= ~RADEON_SURF_SCANOUT;
885 return;
886 }
887
888 md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
889 md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
890 if (args.tiling_flags & RADEON_TILING_MICRO)
891 md->u.legacy.microtile = RADEON_LAYOUT_TILED;
892 else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
893 md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
894
895 if (args.tiling_flags & RADEON_TILING_MACRO)
896 md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
897
898 md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
899 md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
900 md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
901 md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
902 md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
903 md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
904 }
905
radeon_bo_set_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)906 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
907 struct pb_buffer_lean *_buf,
908 struct radeon_bo_metadata *md,
909 struct radeon_surf *surf)
910 {
911 struct radeon_bo *bo = radeon_bo(_buf);
912 struct drm_radeon_gem_set_tiling args;
913
914 assert(bo->handle && "must not be called for slab entries");
915
916 memset(&args, 0, sizeof(args));
917
918 os_wait_until_zero(&bo->num_active_ioctls, OS_TIMEOUT_INFINITE);
919
920 if (surf) {
921 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
922 args.tiling_flags |= RADEON_TILING_MICRO;
923 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
924 args.tiling_flags |= RADEON_TILING_MACRO;
925
926 args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
927 RADEON_TILING_EG_BANKW_SHIFT;
928 args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
929 RADEON_TILING_EG_BANKH_SHIFT;
930 if (surf->u.legacy.tile_split) {
931 args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
932 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
933 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
934 }
935 args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
936 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
937
938 if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
939 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
940
941 args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
942 } else {
943 if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
944 args.tiling_flags |= RADEON_TILING_MICRO;
945 else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
946 args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
947
948 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
949 args.tiling_flags |= RADEON_TILING_MACRO;
950
951 args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
952 RADEON_TILING_EG_BANKW_SHIFT;
953 args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
954 RADEON_TILING_EG_BANKH_SHIFT;
955 if (md->u.legacy.tile_split) {
956 args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
957 RADEON_TILING_EG_TILE_SPLIT_MASK) <<
958 RADEON_TILING_EG_TILE_SPLIT_SHIFT;
959 }
960 args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
961 RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
962
963 if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
964 args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
965
966 args.pitch = md->u.legacy.stride;
967 }
968
969 args.handle = bo->handle;
970
971 drmCommandWriteRead(bo->rws->fd,
972 DRM_RADEON_GEM_SET_TILING,
973 &args,
974 sizeof(args));
975 }
976
977 static struct pb_buffer_lean *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)978 radeon_winsys_bo_create(struct radeon_winsys *rws,
979 uint64_t size,
980 unsigned alignment,
981 enum radeon_bo_domain domain,
982 enum radeon_bo_flag flags)
983 {
984 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
985 struct radeon_bo *bo;
986
987 radeon_canonicalize_bo_flags(&domain, &flags);
988
989 assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
990
991 /* Only 32-bit sizes are supported. */
992 if (size > UINT_MAX)
993 return NULL;
994
995 int heap = radeon_get_heap_index(domain, flags);
996
997 /* Sub-allocate small buffers from slabs. */
998 if (heap >= 0 &&
999 size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1000 ws->info.r600_has_virtual_memory &&
1001 alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1002 struct pb_slab_entry *entry;
1003
1004 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1005 if (!entry) {
1006 /* Clear the cache and try again. */
1007 pb_cache_release_all_buffers(&ws->bo_cache);
1008
1009 entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1010 }
1011 if (!entry)
1012 return NULL;
1013
1014 bo = container_of(entry, struct radeon_bo, u.slab.entry);
1015
1016 pipe_reference_init(&bo->base.reference, 1);
1017
1018 return &bo->base;
1019 }
1020
1021 /* Align size to page size. This is the minimum alignment for normal
1022 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1023 * like constant/uniform buffers, can benefit from better and more reuse.
1024 */
1025 size = align(size, ws->info.gart_page_size);
1026 alignment = align(alignment, ws->info.gart_page_size);
1027
1028 bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1029 !(flags & RADEON_FLAG_DISCARDABLE);
1030
1031 /* Shared resources don't use cached heaps. */
1032 if (use_reusable_pool) {
1033 /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1034 heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1035 assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1036
1037 bo = radeon_bo((struct pb_buffer_lean*)pb_cache_reclaim_buffer(&ws->bo_cache, size,
1038 alignment, 0, heap));
1039 if (bo)
1040 return &bo->base;
1041 }
1042
1043 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1044 if (!bo) {
1045 /* Clear the cache and try again. */
1046 if (ws->info.r600_has_virtual_memory)
1047 pb_slabs_reclaim(&ws->bo_slabs);
1048 pb_cache_release_all_buffers(&ws->bo_cache);
1049 bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1050 if (!bo)
1051 return NULL;
1052 }
1053
1054 bo->u.real.use_reusable_pool = use_reusable_pool;
1055
1056 mtx_lock(&ws->bo_handles_mutex);
1057 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1058 mtx_unlock(&ws->bo_handles_mutex);
1059
1060 return &bo->base;
1061 }
1062
radeon_winsys_bo_destroy(struct radeon_winsys * ws,struct pb_buffer_lean * buf)1063 static void radeon_winsys_bo_destroy(struct radeon_winsys *ws, struct pb_buffer_lean *buf)
1064 {
1065 struct radeon_bo *bo = radeon_bo(buf);
1066
1067 if (bo->handle)
1068 radeon_bo_destroy_or_cache(ws, buf);
1069 else
1070 radeon_bo_slab_destroy(ws, buf);
1071 }
1072
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size,enum radeon_bo_flag flags)1073 static struct pb_buffer_lean *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1074 void *pointer, uint64_t size,
1075 enum radeon_bo_flag flags)
1076 {
1077 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1078 struct drm_radeon_gem_userptr args;
1079 struct radeon_bo *bo;
1080 int r;
1081
1082 bo = CALLOC_STRUCT(radeon_bo);
1083 if (!bo)
1084 return NULL;
1085
1086 memset(&args, 0, sizeof(args));
1087 args.addr = (uintptr_t)pointer;
1088 args.size = align(size, ws->info.gart_page_size);
1089 args.flags = RADEON_GEM_USERPTR_ANONONLY |
1090 RADEON_GEM_USERPTR_REGISTER |
1091 RADEON_GEM_USERPTR_VALIDATE;
1092
1093 if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1094 &args, sizeof(args))) {
1095 FREE(bo);
1096 return NULL;
1097 }
1098
1099 assert(args.handle != 0);
1100
1101 mtx_lock(&ws->bo_handles_mutex);
1102
1103 /* Initialize it. */
1104 pipe_reference_init(&bo->base.reference, 1);
1105 bo->handle = args.handle;
1106 bo->base.alignment_log2 = 0;
1107 bo->base.size = size;
1108 bo->rws = ws;
1109 bo->user_ptr = pointer;
1110 bo->va = 0;
1111 bo->initial_domain = RADEON_DOMAIN_GTT;
1112 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1113 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1114
1115 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1116
1117 mtx_unlock(&ws->bo_handles_mutex);
1118
1119 if (ws->info.r600_has_virtual_memory) {
1120 struct drm_radeon_gem_va va;
1121
1122 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1123
1124 va.handle = bo->handle;
1125 va.operation = RADEON_VA_MAP;
1126 va.vm_id = 0;
1127 va.offset = bo->va;
1128 va.flags = RADEON_VM_PAGE_READABLE |
1129 RADEON_VM_PAGE_WRITEABLE |
1130 RADEON_VM_PAGE_SNOOPED;
1131 va.offset = bo->va;
1132 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1133 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1134 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1135 radeon_bo_destroy(NULL, &bo->base);
1136 return NULL;
1137 }
1138 mtx_lock(&ws->bo_handles_mutex);
1139 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1140 struct pb_buffer_lean *b = &bo->base;
1141 struct radeon_bo *old_bo =
1142 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1143
1144 mtx_unlock(&ws->bo_handles_mutex);
1145 radeon_bo_reference(rws, &b, &old_bo->base);
1146 return b;
1147 }
1148
1149 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1150 mtx_unlock(&ws->bo_handles_mutex);
1151 }
1152
1153 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1154
1155 return (struct pb_buffer_lean*)bo;
1156 }
1157
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment,bool is_dri_prime_linear_buffer)1158 static struct pb_buffer_lean *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1159 struct winsys_handle *whandle,
1160 unsigned vm_alignment,
1161 bool is_dri_prime_linear_buffer)
1162 {
1163 struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1164 struct radeon_bo *bo;
1165 int r;
1166 unsigned handle;
1167 uint64_t size = 0;
1168
1169 /* We must maintain a list of pairs <handle, bo>, so that we always return
1170 * the same BO for one particular handle. If we didn't do that and created
1171 * more than one BO for the same handle and then relocated them in a CS,
1172 * we would hit a deadlock in the kernel.
1173 *
1174 * The list of pairs is guarded by a mutex, of course. */
1175 mtx_lock(&ws->bo_handles_mutex);
1176
1177 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1178 /* First check if there already is an existing bo for the handle. */
1179 bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1180 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1181 /* We must first get the GEM handle, as fds are unreliable keys */
1182 r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1183 if (r)
1184 goto fail;
1185 bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1186 } else {
1187 /* Unknown handle type */
1188 goto fail;
1189 }
1190
1191 if (bo) {
1192 /* Increase the refcount. */
1193 p_atomic_inc(&bo->base.reference.count);
1194 goto done;
1195 }
1196
1197 /* There isn't, create a new one. */
1198 bo = CALLOC_STRUCT(radeon_bo);
1199 if (!bo) {
1200 goto fail;
1201 }
1202
1203 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1204 struct drm_gem_open open_arg = {};
1205 memset(&open_arg, 0, sizeof(open_arg));
1206 /* Open the BO. */
1207 open_arg.name = whandle->handle;
1208 if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1209 FREE(bo);
1210 goto fail;
1211 }
1212 handle = open_arg.handle;
1213 size = open_arg.size;
1214 bo->flink_name = whandle->handle;
1215 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1216 size = lseek(whandle->handle, 0, SEEK_END);
1217 /*
1218 * Could check errno to determine whether the kernel is new enough, but
1219 * it doesn't really matter why this failed, just that it failed.
1220 */
1221 if (size == (off_t)-1) {
1222 FREE(bo);
1223 goto fail;
1224 }
1225 lseek(whandle->handle, 0, SEEK_SET);
1226 }
1227
1228 assert(handle != 0);
1229
1230 bo->handle = handle;
1231
1232 /* Initialize it. */
1233 pipe_reference_init(&bo->base.reference, 1);
1234 bo->base.alignment_log2 = 0;
1235 bo->base.size = (unsigned) size;
1236 bo->rws = ws;
1237 bo->va = 0;
1238 bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1239 (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1240
1241 if (bo->flink_name)
1242 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1243
1244 _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1245
1246 done:
1247 mtx_unlock(&ws->bo_handles_mutex);
1248
1249 if (ws->info.r600_has_virtual_memory && !bo->va) {
1250 struct drm_radeon_gem_va va;
1251
1252 bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1253
1254 va.handle = bo->handle;
1255 va.operation = RADEON_VA_MAP;
1256 va.vm_id = 0;
1257 va.offset = bo->va;
1258 va.flags = RADEON_VM_PAGE_READABLE |
1259 RADEON_VM_PAGE_WRITEABLE |
1260 RADEON_VM_PAGE_SNOOPED;
1261 va.offset = bo->va;
1262 r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1263 if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1264 fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1265 radeon_bo_destroy(NULL, &bo->base);
1266 return NULL;
1267 }
1268 mtx_lock(&ws->bo_handles_mutex);
1269 if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1270 struct pb_buffer_lean *b = &bo->base;
1271 struct radeon_bo *old_bo =
1272 _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1273
1274 mtx_unlock(&ws->bo_handles_mutex);
1275 radeon_bo_reference(rws, &b, &old_bo->base);
1276 return b;
1277 }
1278
1279 _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1280 mtx_unlock(&ws->bo_handles_mutex);
1281 }
1282
1283 bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1284
1285 if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1286 ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1287 else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1288 ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1289
1290 return (struct pb_buffer_lean*)bo;
1291
1292 fail:
1293 mtx_unlock(&ws->bo_handles_mutex);
1294 return NULL;
1295 }
1296
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer_lean * buffer,struct winsys_handle * whandle)1297 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1298 struct pb_buffer_lean *buffer,
1299 struct winsys_handle *whandle)
1300 {
1301 struct drm_gem_flink flink;
1302 struct radeon_bo *bo = radeon_bo(buffer);
1303 struct radeon_drm_winsys *ws = bo->rws;
1304
1305 /* Don't allow exports of slab entries. */
1306 if (!bo->handle)
1307 return false;
1308
1309 memset(&flink, 0, sizeof(flink));
1310
1311 bo->u.real.use_reusable_pool = false;
1312
1313 if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1314 if (!bo->flink_name) {
1315 flink.handle = bo->handle;
1316
1317 if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1318 return false;
1319 }
1320
1321 bo->flink_name = flink.name;
1322
1323 mtx_lock(&ws->bo_handles_mutex);
1324 _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1325 mtx_unlock(&ws->bo_handles_mutex);
1326 }
1327 whandle->handle = bo->flink_name;
1328 } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1329 whandle->handle = bo->handle;
1330 } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1331 if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1332 return false;
1333 }
1334
1335 return true;
1336 }
1337
radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean * buf)1338 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean *buf)
1339 {
1340 return ((struct radeon_bo*)buf)->user_ptr != NULL;
1341 }
1342
radeon_winsys_bo_is_suballocated(struct pb_buffer_lean * buf)1343 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer_lean *buf)
1344 {
1345 return !((struct radeon_bo*)buf)->handle;
1346 }
1347
radeon_winsys_bo_va(struct pb_buffer_lean * buf)1348 static uint64_t radeon_winsys_bo_va(struct pb_buffer_lean *buf)
1349 {
1350 return ((struct radeon_bo*)buf)->va;
1351 }
1352
radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean * buf)1353 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean *buf)
1354 {
1355 struct radeon_bo *bo = radeon_bo(buf);
1356
1357 if (bo->handle)
1358 return 0;
1359
1360 return bo->va - bo->u.slab.real->va;
1361 }
1362
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1363 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1364 {
1365 ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1366 ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1367 ws->base.buffer_map = radeon_bo_map;
1368 ws->base.buffer_unmap = radeon_bo_unmap;
1369 ws->base.buffer_wait = radeon_bo_wait;
1370 ws->base.buffer_create = radeon_winsys_bo_create;
1371 ws->base.buffer_destroy = radeon_winsys_bo_destroy;
1372 ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1373 ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1374 ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1375 ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1376 ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1377 ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1378 ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1379 ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1380 }
1381