xref: /aosp_15_r20/external/mesa3d/src/gallium/winsys/radeon/drm/radeon_drm_bo.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2011 Marek Olšák <[email protected]>
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_drm_cs.h"
8 
9 #include "util/u_hash_table.h"
10 #include "util/u_memory.h"
11 #include "util/u_thread.h"
12 #include "util/os_mman.h"
13 #include "util/os_time.h"
14 
15 #include "frontend/drm_driver.h"
16 
17 #include <sys/ioctl.h>
18 #include <xf86drm.h>
19 #include <errno.h>
20 #include <fcntl.h>
21 #include <stdio.h>
22 #include <inttypes.h>
23 
24 static struct pb_buffer_lean *
25 radeon_winsys_bo_create(struct radeon_winsys *rws,
26                         uint64_t size,
27                         unsigned alignment,
28                         enum radeon_bo_domain domain,
29                         enum radeon_bo_flag flags);
30 
radeon_bo(struct pb_buffer_lean * bo)31 static inline struct radeon_bo *radeon_bo(struct pb_buffer_lean *bo)
32 {
33    return (struct radeon_bo *)bo;
34 }
35 
36 struct radeon_bo_va_hole {
37    struct list_head list;
38    uint64_t         offset;
39    uint64_t         size;
40 };
41 
radeon_real_bo_is_busy(struct radeon_bo * bo)42 static bool radeon_real_bo_is_busy(struct radeon_bo *bo)
43 {
44    struct drm_radeon_gem_busy args = {0};
45 
46    args.handle = bo->handle;
47    return drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_BUSY,
48                               &args, sizeof(args)) != 0;
49 }
50 
radeon_bo_is_busy(struct radeon_winsys * rws,struct radeon_bo * bo)51 static bool radeon_bo_is_busy(struct radeon_winsys *rws, struct radeon_bo *bo)
52 {
53    unsigned num_idle;
54    bool busy = false;
55 
56    if (bo->handle)
57       return radeon_real_bo_is_busy(bo);
58 
59    mtx_lock(&bo->rws->bo_fence_lock);
60    for (num_idle = 0; num_idle < bo->u.slab.num_fences; ++num_idle) {
61       if (radeon_real_bo_is_busy(bo->u.slab.fences[num_idle])) {
62          busy = true;
63          break;
64       }
65       radeon_ws_bo_reference(rws, &bo->u.slab.fences[num_idle], NULL);
66    }
67    memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[num_idle],
68          (bo->u.slab.num_fences - num_idle) * sizeof(bo->u.slab.fences[0]));
69    bo->u.slab.num_fences -= num_idle;
70    mtx_unlock(&bo->rws->bo_fence_lock);
71 
72    return busy;
73 }
74 
radeon_real_bo_wait_idle(struct radeon_bo * bo)75 static void radeon_real_bo_wait_idle(struct radeon_bo *bo)
76 {
77    struct drm_radeon_gem_wait_idle args = {0};
78 
79    args.handle = bo->handle;
80    while (drmCommandWrite(bo->rws->fd, DRM_RADEON_GEM_WAIT_IDLE,
81                           &args, sizeof(args)) == -EBUSY);
82 }
83 
radeon_bo_wait_idle(struct radeon_winsys * rws,struct radeon_bo * bo)84 static void radeon_bo_wait_idle(struct radeon_winsys *rws, struct radeon_bo *bo)
85 {
86    if (bo->handle) {
87       radeon_real_bo_wait_idle(bo);
88    } else {
89       mtx_lock(&bo->rws->bo_fence_lock);
90       while (bo->u.slab.num_fences) {
91          struct radeon_bo *fence = NULL;
92          radeon_ws_bo_reference(rws, &fence, bo->u.slab.fences[0]);
93          mtx_unlock(&bo->rws->bo_fence_lock);
94 
95          /* Wait without holding the fence lock. */
96          radeon_real_bo_wait_idle(fence);
97 
98          mtx_lock(&bo->rws->bo_fence_lock);
99          if (bo->u.slab.num_fences && fence == bo->u.slab.fences[0]) {
100             radeon_ws_bo_reference(rws, &bo->u.slab.fences[0], NULL);
101             memmove(&bo->u.slab.fences[0], &bo->u.slab.fences[1],
102                   (bo->u.slab.num_fences - 1) * sizeof(bo->u.slab.fences[0]));
103             bo->u.slab.num_fences--;
104          }
105          radeon_ws_bo_reference(rws, &fence, NULL);
106       }
107       mtx_unlock(&bo->rws->bo_fence_lock);
108    }
109 }
110 
radeon_bo_wait(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,uint64_t timeout,unsigned usage)111 static bool radeon_bo_wait(struct radeon_winsys *rws,
112                            struct pb_buffer_lean *_buf, uint64_t timeout,
113                            unsigned usage)
114 {
115    struct radeon_bo *bo = radeon_bo(_buf);
116    int64_t abs_timeout;
117 
118    /* No timeout. Just query. */
119    if (timeout == 0)
120       return !bo->num_active_ioctls && !radeon_bo_is_busy(rws, bo);
121 
122    abs_timeout = os_time_get_absolute_timeout(timeout);
123 
124    /* Wait if any ioctl is being submitted with this buffer. */
125    if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout))
126       return false;
127 
128    /* Infinite timeout. */
129    if (abs_timeout == OS_TIMEOUT_INFINITE) {
130       radeon_bo_wait_idle(rws, bo);
131       return true;
132    }
133 
134    /* Other timeouts need to be emulated with a loop. */
135    while (radeon_bo_is_busy(rws, bo)) {
136       if (os_time_get_nano() >= abs_timeout)
137          return false;
138       os_time_sleep(10);
139    }
140 
141    return true;
142 }
143 
get_valid_domain(enum radeon_bo_domain domain)144 static enum radeon_bo_domain get_valid_domain(enum radeon_bo_domain domain)
145 {
146    /* Zero domains the driver doesn't understand. */
147    domain &= RADEON_DOMAIN_VRAM_GTT;
148 
149    /* If no domain is set, we must set something... */
150    if (!domain)
151       domain = RADEON_DOMAIN_VRAM_GTT;
152 
153    return domain;
154 }
155 
radeon_bo_get_initial_domain(struct pb_buffer_lean * buf)156 static enum radeon_bo_domain radeon_bo_get_initial_domain(
157       struct pb_buffer_lean *buf)
158 {
159    struct radeon_bo *bo = (struct radeon_bo*)buf;
160    struct drm_radeon_gem_op args;
161 
162    memset(&args, 0, sizeof(args));
163    args.handle = bo->handle;
164    args.op = RADEON_GEM_OP_GET_INITIAL_DOMAIN;
165 
166    if (drmCommandWriteRead(bo->rws->fd, DRM_RADEON_GEM_OP,
167                            &args, sizeof(args))) {
168       fprintf(stderr, "radeon: failed to get initial domain: %p 0x%08X\n",
169               bo, bo->handle);
170       /* Default domain as returned by get_valid_domain. */
171       return RADEON_DOMAIN_VRAM_GTT;
172    }
173 
174    /* GEM domains and winsys domains are defined the same. */
175    return get_valid_domain(args.value);
176 }
177 
radeon_bomgr_find_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t size,uint64_t alignment)178 static uint64_t radeon_bomgr_find_va(const struct radeon_info *info,
179                                      struct radeon_vm_heap *heap,
180                                      uint64_t size, uint64_t alignment)
181 {
182    struct radeon_bo_va_hole *hole, *n;
183    uint64_t offset = 0, waste = 0;
184 
185    /* All VM address space holes will implicitly start aligned to the
186     * size alignment, so we don't need to sanitize the alignment here
187     */
188    size = align(size, info->gart_page_size);
189 
190    mtx_lock(&heap->mutex);
191    /* first look for a hole */
192    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &heap->holes, list) {
193       offset = hole->offset;
194       waste = offset % alignment;
195       waste = waste ? alignment - waste : 0;
196       offset += waste;
197       if (offset >= (hole->offset + hole->size)) {
198          continue;
199       }
200       if (!waste && hole->size == size) {
201          offset = hole->offset;
202          list_del(&hole->list);
203          FREE(hole);
204          mtx_unlock(&heap->mutex);
205          return offset;
206       }
207       if ((hole->size - waste) > size) {
208          if (waste) {
209             n = CALLOC_STRUCT(radeon_bo_va_hole);
210             n->size = waste;
211             n->offset = hole->offset;
212             list_add(&n->list, &hole->list);
213          }
214          hole->size -= (size + waste);
215          hole->offset += size + waste;
216          mtx_unlock(&heap->mutex);
217          return offset;
218       }
219       if ((hole->size - waste) == size) {
220          hole->size = waste;
221          mtx_unlock(&heap->mutex);
222          return offset;
223       }
224    }
225 
226    offset = heap->start;
227    waste = offset % alignment;
228    waste = waste ? alignment - waste : 0;
229 
230    if (offset + waste + size > heap->end) {
231       mtx_unlock(&heap->mutex);
232       return 0;
233    }
234 
235    if (waste) {
236       n = CALLOC_STRUCT(radeon_bo_va_hole);
237       n->size = waste;
238       n->offset = offset;
239       list_add(&n->list, &heap->holes);
240    }
241    offset += waste;
242    heap->start += size + waste;
243    mtx_unlock(&heap->mutex);
244    return offset;
245 }
246 
radeon_bomgr_find_va64(struct radeon_drm_winsys * ws,uint64_t size,uint64_t alignment)247 static uint64_t radeon_bomgr_find_va64(struct radeon_drm_winsys *ws,
248                                        uint64_t size, uint64_t alignment)
249 {
250    uint64_t va = 0;
251 
252    /* Try to allocate from the 64-bit address space first.
253     * If it doesn't exist (start = 0) or if it doesn't have enough space,
254     * fall back to the 32-bit address space.
255     */
256    if (ws->vm64.start)
257       va = radeon_bomgr_find_va(&ws->info, &ws->vm64, size, alignment);
258    if (!va)
259       va = radeon_bomgr_find_va(&ws->info, &ws->vm32, size, alignment);
260    return va;
261 }
262 
radeon_bomgr_free_va(const struct radeon_info * info,struct radeon_vm_heap * heap,uint64_t va,uint64_t size)263 static void radeon_bomgr_free_va(const struct radeon_info *info,
264                                  struct radeon_vm_heap *heap,
265                                  uint64_t va, uint64_t size)
266 {
267    struct radeon_bo_va_hole *hole = NULL;
268 
269    size = align(size, info->gart_page_size);
270 
271    mtx_lock(&heap->mutex);
272    if ((va + size) == heap->start) {
273       heap->start = va;
274       /* Delete uppermost hole if it reaches the new top */
275       if (!list_is_empty(&heap->holes)) {
276          hole = container_of(heap->holes.next, struct radeon_bo_va_hole, list);
277          if ((hole->offset + hole->size) == va) {
278             heap->start = hole->offset;
279             list_del(&hole->list);
280             FREE(hole);
281          }
282       }
283    } else {
284       struct radeon_bo_va_hole *next;
285 
286       hole = container_of(&heap->holes, struct radeon_bo_va_hole, list);
287       LIST_FOR_EACH_ENTRY(next, &heap->holes, list) {
288          if (next->offset < va)
289             break;
290          hole = next;
291       }
292 
293       if (&hole->list != &heap->holes) {
294          /* Grow upper hole if it's adjacent */
295          if (hole->offset == (va + size)) {
296             hole->offset = va;
297             hole->size += size;
298             /* Merge lower hole if it's adjacent */
299             if (next != hole && &next->list != &heap->holes &&
300                 (next->offset + next->size) == va) {
301                next->size += hole->size;
302                list_del(&hole->list);
303                FREE(hole);
304             }
305             goto out;
306          }
307       }
308 
309       /* Grow lower hole if it's adjacent */
310       if (next != hole && &next->list != &heap->holes &&
311           (next->offset + next->size) == va) {
312          next->size += size;
313          goto out;
314       }
315 
316       /* FIXME on allocation failure we just lose virtual address space
317        * maybe print a warning
318        */
319       next = CALLOC_STRUCT(radeon_bo_va_hole);
320       if (next) {
321          next->size = size;
322          next->offset = va;
323          list_add(&next->list, &hole->list);
324       }
325    }
326 out:
327    mtx_unlock(&heap->mutex);
328 }
329 
radeon_bo_destroy(void * winsys,struct pb_buffer_lean * _buf)330 void radeon_bo_destroy(void *winsys, struct pb_buffer_lean *_buf)
331 {
332    struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
333    struct radeon_drm_winsys *rws = bo->rws;
334    struct drm_gem_close args;
335 
336    assert(bo->handle && "must not be called for slab entries");
337 
338    memset(&args, 0, sizeof(args));
339 
340    mtx_lock(&rws->bo_handles_mutex);
341    /* radeon_winsys_bo_from_handle might have revived the bo */
342    if (pipe_is_referenced(&bo->base.reference)) {
343       mtx_unlock(&rws->bo_handles_mutex);
344       return;
345    }
346    _mesa_hash_table_remove_key(rws->bo_handles, (void*)(uintptr_t)bo->handle);
347    if (bo->flink_name) {
348       _mesa_hash_table_remove_key(rws->bo_names,
349                                   (void*)(uintptr_t)bo->flink_name);
350    }
351    mtx_unlock(&rws->bo_handles_mutex);
352 
353    if (bo->u.real.ptr)
354       os_munmap(bo->u.real.ptr, bo->base.size);
355 
356    if (rws->info.r600_has_virtual_memory) {
357       if (rws->va_unmap_working) {
358          struct drm_radeon_gem_va va;
359 
360          va.handle = bo->handle;
361          va.vm_id = 0;
362          va.operation = RADEON_VA_UNMAP;
363          va.flags = RADEON_VM_PAGE_READABLE |
364                     RADEON_VM_PAGE_WRITEABLE |
365                     RADEON_VM_PAGE_SNOOPED;
366          va.offset = bo->va;
367 
368          if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va,
369                                  sizeof(va)) != 0 &&
370              va.operation == RADEON_VA_RESULT_ERROR) {
371             fprintf(stderr, "radeon: Failed to deallocate virtual address for buffer:\n");
372             fprintf(stderr, "radeon:    size      : %"PRIu64" bytes\n", bo->base.size);
373             fprintf(stderr, "radeon:    va        : 0x%"PRIx64"\n", bo->va);
374          }
375       }
376 
377       radeon_bomgr_free_va(&rws->info,
378                            bo->va < rws->vm32.end ? &rws->vm32 : &rws->vm64,
379                            bo->va, bo->base.size);
380    }
381 
382    /* Close object. */
383    args.handle = bo->handle;
384    drmIoctl(rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
385 
386    mtx_destroy(&bo->u.real.map_mutex);
387 
388    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
389       rws->allocated_vram -= align(bo->base.size, rws->info.gart_page_size);
390    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
391       rws->allocated_gtt -= align(bo->base.size, rws->info.gart_page_size);
392 
393    if (bo->u.real.map_count >= 1) {
394       if (bo->initial_domain & RADEON_DOMAIN_VRAM)
395          bo->rws->mapped_vram -= bo->base.size;
396       else
397          bo->rws->mapped_gtt -= bo->base.size;
398       bo->rws->num_mapped_buffers--;
399    }
400 
401    FREE(bo);
402 }
403 
radeon_bo_destroy_or_cache(void * winsys,struct pb_buffer_lean * _buf)404 static void radeon_bo_destroy_or_cache(void *winsys, struct pb_buffer_lean *_buf)
405 {
406    struct radeon_drm_winsys *rws = (struct radeon_drm_winsys *)winsys;
407    struct radeon_bo *bo = radeon_bo(_buf);
408 
409    assert(bo->handle && "must not be called for slab entries");
410 
411    if (bo->u.real.use_reusable_pool)
412       pb_cache_add_buffer(&rws->bo_cache, &bo->u.real.cache_entry);
413    else
414       radeon_bo_destroy(NULL, _buf);
415 }
416 
radeon_bo_do_map(struct radeon_bo * bo)417 void *radeon_bo_do_map(struct radeon_bo *bo)
418 {
419    struct drm_radeon_gem_mmap args = {0};
420    void *ptr;
421    unsigned offset;
422 
423    /* If the buffer is created from user memory, return the user pointer. */
424    if (bo->user_ptr)
425       return bo->user_ptr;
426 
427    if (bo->handle) {
428       offset = 0;
429    } else {
430       offset = bo->va - bo->u.slab.real->va;
431       bo = bo->u.slab.real;
432    }
433 
434    /* Map the buffer. */
435    mtx_lock(&bo->u.real.map_mutex);
436    /* Return the pointer if it's already mapped. */
437    if (bo->u.real.ptr) {
438       bo->u.real.map_count++;
439       mtx_unlock(&bo->u.real.map_mutex);
440       return (uint8_t*)bo->u.real.ptr + offset;
441    }
442    args.handle = bo->handle;
443    args.offset = 0;
444    args.size = (uint64_t)bo->base.size;
445    if (drmCommandWriteRead(bo->rws->fd,
446                            DRM_RADEON_GEM_MMAP,
447                            &args,
448                            sizeof(args))) {
449       mtx_unlock(&bo->u.real.map_mutex);
450       fprintf(stderr, "radeon: gem_mmap failed: %p 0x%08X\n",
451               bo, bo->handle);
452       return NULL;
453    }
454 
455    ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
456                  bo->rws->fd, args.addr_ptr);
457    if (ptr == MAP_FAILED) {
458       /* Clear the cache and try again. */
459       pb_cache_release_all_buffers(&bo->rws->bo_cache);
460 
461       ptr = os_mmap(0, args.size, PROT_READ|PROT_WRITE, MAP_SHARED,
462                     bo->rws->fd, args.addr_ptr);
463       if (ptr == MAP_FAILED) {
464          mtx_unlock(&bo->u.real.map_mutex);
465          fprintf(stderr, "radeon: mmap failed, errno: %i\n", errno);
466          return NULL;
467       }
468    }
469    bo->u.real.ptr = ptr;
470    bo->u.real.map_count = 1;
471 
472    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
473       bo->rws->mapped_vram += bo->base.size;
474    else
475       bo->rws->mapped_gtt += bo->base.size;
476    bo->rws->num_mapped_buffers++;
477 
478    mtx_unlock(&bo->u.real.map_mutex);
479    return (uint8_t*)bo->u.real.ptr + offset;
480 }
481 
radeon_bo_map(struct radeon_winsys * rws,struct pb_buffer_lean * buf,struct radeon_cmdbuf * rcs,enum pipe_map_flags usage)482 static void *radeon_bo_map(struct radeon_winsys *rws,
483                            struct pb_buffer_lean *buf,
484                            struct radeon_cmdbuf *rcs,
485                            enum pipe_map_flags usage)
486 {
487    struct radeon_bo *bo = (struct radeon_bo*)buf;
488    struct radeon_drm_cs *cs = rcs ? radeon_drm_cs(rcs) : NULL;
489 
490    /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */
491    if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
492       /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */
493       if (usage & PIPE_MAP_DONTBLOCK) {
494          if (!(usage & PIPE_MAP_WRITE)) {
495             /* Mapping for read.
496              *
497              * Since we are mapping for read, we don't need to wait
498              * if the GPU is using the buffer for read too
499              * (neither one is changing it).
500              *
501              * Only check whether the buffer is being used for write. */
502             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
503                cs->flush_cs(cs->flush_data,
504                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
505                return NULL;
506             }
507 
508             if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
509                                 RADEON_USAGE_WRITE)) {
510                return NULL;
511             }
512          } else {
513             if (cs && radeon_bo_is_referenced_by_cs(cs, bo)) {
514                cs->flush_cs(cs->flush_data,
515                             RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
516                return NULL;
517             }
518 
519             if (!radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, 0,
520                                 RADEON_USAGE_READWRITE)) {
521                return NULL;
522             }
523          }
524       } else {
525          uint64_t time = os_time_get_nano();
526 
527          if (!(usage & PIPE_MAP_WRITE)) {
528             /* Mapping for read.
529              *
530              * Since we are mapping for read, we don't need to wait
531              * if the GPU is using the buffer for read too
532              * (neither one is changing it).
533              *
534              * Only check whether the buffer is being used for write. */
535             if (cs && radeon_bo_is_referenced_by_cs_for_write(cs, bo)) {
536                cs->flush_cs(cs->flush_data,
537                             RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
538             }
539             radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
540                            RADEON_USAGE_WRITE);
541          } else {
542             /* Mapping for write. */
543             if (cs) {
544                if (radeon_bo_is_referenced_by_cs(cs, bo)) {
545                   cs->flush_cs(cs->flush_data,
546                                RADEON_FLUSH_START_NEXT_GFX_IB_NOW, NULL);
547                } else {
548                   /* Try to avoid busy-waiting in radeon_bo_wait. */
549                   if (p_atomic_read(&bo->num_active_ioctls))
550                      radeon_drm_cs_sync_flush(rcs);
551                }
552             }
553 
554             radeon_bo_wait(rws, (struct pb_buffer_lean*)bo, OS_TIMEOUT_INFINITE,
555                            RADEON_USAGE_READWRITE);
556          }
557 
558          bo->rws->buffer_wait_time += os_time_get_nano() - time;
559       }
560    }
561 
562    return radeon_bo_do_map(bo);
563 }
564 
radeon_bo_unmap(struct radeon_winsys * rws,struct pb_buffer_lean * _buf)565 static void radeon_bo_unmap(struct radeon_winsys *rws, struct pb_buffer_lean *_buf)
566 {
567    struct radeon_bo *bo = (struct radeon_bo*)_buf;
568 
569    if (bo->user_ptr)
570       return;
571 
572    if (!bo->handle)
573       bo = bo->u.slab.real;
574 
575    mtx_lock(&bo->u.real.map_mutex);
576    if (!bo->u.real.ptr) {
577       mtx_unlock(&bo->u.real.map_mutex);
578       return; /* it's not been mapped */
579    }
580 
581    assert(bo->u.real.map_count);
582    if (--bo->u.real.map_count) {
583       mtx_unlock(&bo->u.real.map_mutex);
584       return; /* it's been mapped multiple times */
585    }
586 
587    os_munmap(bo->u.real.ptr, bo->base.size);
588    bo->u.real.ptr = NULL;
589 
590    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
591       bo->rws->mapped_vram -= bo->base.size;
592    else
593       bo->rws->mapped_gtt -= bo->base.size;
594    bo->rws->num_mapped_buffers--;
595 
596    mtx_unlock(&bo->u.real.map_mutex);
597 }
598 
radeon_create_bo(struct radeon_drm_winsys * rws,unsigned size,unsigned alignment,unsigned initial_domains,unsigned flags,int heap)599 static struct radeon_bo *radeon_create_bo(struct radeon_drm_winsys *rws,
600                                           unsigned size, unsigned alignment,
601                                           unsigned initial_domains,
602                                           unsigned flags,
603                                           int heap)
604 {
605    struct radeon_bo *bo;
606    struct drm_radeon_gem_create args;
607    int r;
608 
609    memset(&args, 0, sizeof(args));
610 
611    assert(initial_domains);
612    assert((initial_domains &
613            ~(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) == 0);
614 
615    args.size = size;
616    args.alignment = alignment;
617    args.initial_domain = initial_domains;
618    args.flags = 0;
619 
620    /* If VRAM is just stolen system memory, allow both VRAM and
621     * GTT, whichever has free space. If a buffer is evicted from
622     * VRAM to GTT, it will stay there.
623     */
624    if (!rws->info.has_dedicated_vram)
625       args.initial_domain |= RADEON_DOMAIN_GTT;
626 
627    if (flags & RADEON_FLAG_GTT_WC)
628       args.flags |= RADEON_GEM_GTT_WC;
629    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
630       args.flags |= RADEON_GEM_NO_CPU_ACCESS;
631 
632    if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE,
633                            &args, sizeof(args))) {
634       fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
635       fprintf(stderr, "radeon:    size      : %u bytes\n", size);
636       fprintf(stderr, "radeon:    alignment : %u bytes\n", alignment);
637       fprintf(stderr, "radeon:    domains   : %u\n", args.initial_domain);
638       fprintf(stderr, "radeon:    flags     : %u\n", args.flags);
639       return NULL;
640    }
641 
642    assert(args.handle != 0);
643 
644    bo = CALLOC_STRUCT(radeon_bo);
645    if (!bo)
646       return NULL;
647 
648    pipe_reference_init(&bo->base.reference, 1);
649    bo->base.alignment_log2 = util_logbase2(alignment);
650    bo->base.usage = 0;
651    bo->base.size = size;
652    bo->rws = rws;
653    bo->handle = args.handle;
654    bo->va = 0;
655    bo->initial_domain = initial_domains;
656    bo->hash = __sync_fetch_and_add(&rws->next_bo_hash, 1);
657    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
658 
659    if (heap >= 0) {
660       pb_cache_init_entry(&rws->bo_cache, &bo->u.real.cache_entry, &bo->base,
661                           heap);
662    }
663 
664    if (rws->info.r600_has_virtual_memory) {
665       struct drm_radeon_gem_va va;
666       unsigned va_gap_size;
667 
668       va_gap_size = rws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
669 
670       if (flags & RADEON_FLAG_32BIT) {
671          bo->va = radeon_bomgr_find_va(&rws->info, &rws->vm32,
672                                        size + va_gap_size, alignment);
673          assert(bo->va + size < rws->vm32.end);
674       } else {
675          bo->va = radeon_bomgr_find_va64(rws, size + va_gap_size, alignment);
676       }
677 
678       va.handle = bo->handle;
679       va.vm_id = 0;
680       va.operation = RADEON_VA_MAP;
681       va.flags = RADEON_VM_PAGE_READABLE |
682                  RADEON_VM_PAGE_WRITEABLE |
683                  RADEON_VM_PAGE_SNOOPED;
684       va.offset = bo->va;
685       r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
686       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
687          fprintf(stderr, "radeon: Failed to allocate virtual address for buffer:\n");
688          fprintf(stderr, "radeon:    size      : %d bytes\n", size);
689          fprintf(stderr, "radeon:    alignment : %d bytes\n", alignment);
690          fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
691          fprintf(stderr, "radeon:    va        : 0x%016llx\n", (unsigned long long)bo->va);
692          radeon_bo_destroy(NULL, &bo->base);
693          return NULL;
694       }
695       mtx_lock(&rws->bo_handles_mutex);
696       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
697          struct pb_buffer_lean *b = &bo->base;
698          struct radeon_bo *old_bo =
699                _mesa_hash_table_u64_search(rws->bo_vas, va.offset);
700 
701          mtx_unlock(&rws->bo_handles_mutex);
702          radeon_bo_reference(&rws->base, &b, &old_bo->base);
703          return radeon_bo(b);
704       }
705 
706       _mesa_hash_table_u64_insert(rws->bo_vas, bo->va, bo);
707       mtx_unlock(&rws->bo_handles_mutex);
708    }
709 
710    if (initial_domains & RADEON_DOMAIN_VRAM)
711       rws->allocated_vram += align(size, rws->info.gart_page_size);
712    else if (initial_domains & RADEON_DOMAIN_GTT)
713       rws->allocated_gtt += align(size, rws->info.gart_page_size);
714 
715    return bo;
716 }
717 
radeon_bo_can_reclaim(void * winsys,struct pb_buffer_lean * _buf)718 bool radeon_bo_can_reclaim(void *winsys, struct pb_buffer_lean *_buf)
719 {
720    struct radeon_bo *bo = radeon_bo((struct pb_buffer_lean*)_buf);
721 
722    if (radeon_bo_is_referenced_by_any_cs(bo))
723       return false;
724 
725    return radeon_bo_wait(winsys, (struct pb_buffer_lean*)_buf, 0, RADEON_USAGE_READWRITE);
726 }
727 
radeon_bo_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)728 bool radeon_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
729 {
730    struct radeon_bo *bo = container_of(entry, struct radeon_bo, u.slab.entry);
731 
732    return radeon_bo_can_reclaim(priv, &bo->base);
733 }
734 
radeon_bo_slab_destroy(void * winsys,struct pb_buffer_lean * _buf)735 static void radeon_bo_slab_destroy(void *winsys, struct pb_buffer_lean *_buf)
736 {
737    struct radeon_bo *bo = radeon_bo(_buf);
738 
739    assert(!bo->handle);
740 
741    pb_slab_free(&bo->rws->bo_slabs, &bo->u.slab.entry);
742 }
743 
radeon_bo_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)744 struct pb_slab *radeon_bo_slab_alloc(void *priv, unsigned heap,
745                                      unsigned entry_size,
746                                      unsigned group_index)
747 {
748    struct radeon_drm_winsys *ws = priv;
749    struct radeon_slab *slab = CALLOC_STRUCT(radeon_slab);
750    enum radeon_bo_domain domains = radeon_domain_from_heap(heap);
751    enum radeon_bo_flag flags = radeon_flags_from_heap(heap);
752    unsigned base_hash;
753 
754    if (!slab)
755       return NULL;
756 
757    slab->buffer = radeon_bo(radeon_winsys_bo_create(&ws->base,
758                                                     64 * 1024, 64 * 1024,
759                                                     domains, flags));
760    if (!slab->buffer)
761       goto fail;
762 
763    assert(slab->buffer->handle);
764 
765    slab->base.num_entries = slab->buffer->base.size / entry_size;
766    slab->base.num_free = slab->base.num_entries;
767    slab->base.group_index = group_index;
768    slab->base.entry_size = entry_size;
769    slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries));
770    if (!slab->entries)
771       goto fail_buffer;
772 
773    list_inithead(&slab->base.free);
774 
775    base_hash = __sync_fetch_and_add(&ws->next_bo_hash, slab->base.num_entries);
776 
777    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
778       struct radeon_bo *bo = &slab->entries[i];
779 
780       bo->base.alignment_log2 = util_logbase2(entry_size);
781       bo->base.usage = slab->buffer->base.usage;
782       bo->base.size = entry_size;
783       bo->rws = ws;
784       bo->va = slab->buffer->va + i * entry_size;
785       bo->initial_domain = domains;
786       bo->hash = base_hash + i;
787       bo->u.slab.entry.slab = &slab->base;
788       bo->u.slab.real = slab->buffer;
789 
790       list_addtail(&bo->u.slab.entry.head, &slab->base.free);
791    }
792 
793    return &slab->base;
794 
795 fail_buffer:
796    radeon_ws_bo_reference(&ws->base, &slab->buffer, NULL);
797 fail:
798    FREE(slab);
799    return NULL;
800 }
801 
radeon_bo_slab_free(void * priv,struct pb_slab * pslab)802 void radeon_bo_slab_free(void *priv, struct pb_slab *pslab)
803 {
804    struct radeon_winsys *rws = (struct radeon_winsys *)priv;
805    struct radeon_slab *slab = (struct radeon_slab *)pslab;
806 
807    for (unsigned i = 0; i < slab->base.num_entries; ++i) {
808       struct radeon_bo *bo = &slab->entries[i];
809       for (unsigned j = 0; j < bo->u.slab.num_fences; ++j)
810          radeon_ws_bo_reference(rws, &bo->u.slab.fences[j], NULL);
811       FREE(bo->u.slab.fences);
812    }
813 
814    FREE(slab->entries);
815    radeon_ws_bo_reference(rws, &slab->buffer, NULL);
816    FREE(slab);
817 }
818 
eg_tile_split(unsigned tile_split)819 static unsigned eg_tile_split(unsigned tile_split)
820 {
821    switch (tile_split) {
822    case 0:     tile_split = 64;    break;
823    case 1:     tile_split = 128;   break;
824    case 2:     tile_split = 256;   break;
825    case 3:     tile_split = 512;   break;
826    default:
827    case 4:     tile_split = 1024;  break;
828    case 5:     tile_split = 2048;  break;
829    case 6:     tile_split = 4096;  break;
830    }
831    return tile_split;
832 }
833 
eg_tile_split_rev(unsigned eg_tile_split)834 static unsigned eg_tile_split_rev(unsigned eg_tile_split)
835 {
836    switch (eg_tile_split) {
837    case 64:    return 0;
838    case 128:   return 1;
839    case 256:   return 2;
840    case 512:   return 3;
841    default:
842    case 1024:  return 4;
843    case 2048:  return 5;
844    case 4096:  return 6;
845    }
846 }
847 
radeon_bo_get_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)848 static void radeon_bo_get_metadata(struct radeon_winsys *rws,
849                                    struct pb_buffer_lean *_buf,
850                                    struct radeon_bo_metadata *md,
851                                    struct radeon_surf *surf)
852 {
853    struct radeon_bo *bo = radeon_bo(_buf);
854    struct drm_radeon_gem_set_tiling args;
855 
856    assert(bo->handle && "must not be called for slab entries");
857 
858    memset(&args, 0, sizeof(args));
859 
860    args.handle = bo->handle;
861 
862    drmCommandWriteRead(bo->rws->fd,
863                        DRM_RADEON_GEM_GET_TILING,
864                        &args,
865                        sizeof(args));
866 
867    if (surf) {
868       if (args.tiling_flags & RADEON_TILING_MACRO)
869          md->mode = RADEON_SURF_MODE_2D;
870       else if (args.tiling_flags & RADEON_TILING_MICRO)
871          md->mode = RADEON_SURF_MODE_1D;
872       else
873          md->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
874 
875       surf->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
876       surf->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
877       surf->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
878       surf->u.legacy.tile_split = eg_tile_split(surf->u.legacy.tile_split);
879       surf->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
880 
881       if (bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT))
882          surf->flags |= RADEON_SURF_SCANOUT;
883       else
884          surf->flags &= ~RADEON_SURF_SCANOUT;
885       return;
886    }
887 
888    md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
889    md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
890    if (args.tiling_flags & RADEON_TILING_MICRO)
891       md->u.legacy.microtile = RADEON_LAYOUT_TILED;
892    else if (args.tiling_flags & RADEON_TILING_MICRO_SQUARE)
893       md->u.legacy.microtile = RADEON_LAYOUT_SQUARETILED;
894 
895    if (args.tiling_flags & RADEON_TILING_MACRO)
896       md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
897 
898    md->u.legacy.bankw = (args.tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
899    md->u.legacy.bankh = (args.tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
900    md->u.legacy.tile_split = (args.tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
901    md->u.legacy.mtilea = (args.tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
902    md->u.legacy.tile_split = eg_tile_split(md->u.legacy.tile_split);
903    md->u.legacy.scanout = bo->rws->gen >= DRV_SI && !(args.tiling_flags & RADEON_TILING_R600_NO_SCANOUT);
904 }
905 
radeon_bo_set_metadata(struct radeon_winsys * rws,struct pb_buffer_lean * _buf,struct radeon_bo_metadata * md,struct radeon_surf * surf)906 static void radeon_bo_set_metadata(struct radeon_winsys *rws,
907                                    struct pb_buffer_lean *_buf,
908                                    struct radeon_bo_metadata *md,
909                                    struct radeon_surf *surf)
910 {
911    struct radeon_bo *bo = radeon_bo(_buf);
912    struct drm_radeon_gem_set_tiling args;
913 
914    assert(bo->handle && "must not be called for slab entries");
915 
916    memset(&args, 0, sizeof(args));
917 
918    os_wait_until_zero(&bo->num_active_ioctls, OS_TIMEOUT_INFINITE);
919 
920    if (surf) {
921       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
922          args.tiling_flags |= RADEON_TILING_MICRO;
923       if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
924          args.tiling_flags |= RADEON_TILING_MACRO;
925 
926       args.tiling_flags |= (surf->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
927                            RADEON_TILING_EG_BANKW_SHIFT;
928       args.tiling_flags |= (surf->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
929                            RADEON_TILING_EG_BANKH_SHIFT;
930       if (surf->u.legacy.tile_split) {
931          args.tiling_flags |= (eg_tile_split_rev(surf->u.legacy.tile_split) &
932                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
933                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
934       }
935       args.tiling_flags |= (surf->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
936                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
937 
938       if (bo->rws->gen >= DRV_SI && !(surf->flags & RADEON_SURF_SCANOUT))
939          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
940 
941       args.pitch = surf->u.legacy.level[0].nblk_x * surf->bpe;
942    } else {
943       if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
944          args.tiling_flags |= RADEON_TILING_MICRO;
945       else if (md->u.legacy.microtile == RADEON_LAYOUT_SQUARETILED)
946          args.tiling_flags |= RADEON_TILING_MICRO_SQUARE;
947 
948       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
949          args.tiling_flags |= RADEON_TILING_MACRO;
950 
951       args.tiling_flags |= (md->u.legacy.bankw & RADEON_TILING_EG_BANKW_MASK) <<
952                            RADEON_TILING_EG_BANKW_SHIFT;
953       args.tiling_flags |= (md->u.legacy.bankh & RADEON_TILING_EG_BANKH_MASK) <<
954                            RADEON_TILING_EG_BANKH_SHIFT;
955       if (md->u.legacy.tile_split) {
956          args.tiling_flags |= (eg_tile_split_rev(md->u.legacy.tile_split) &
957                                RADEON_TILING_EG_TILE_SPLIT_MASK) <<
958                               RADEON_TILING_EG_TILE_SPLIT_SHIFT;
959       }
960       args.tiling_flags |= (md->u.legacy.mtilea & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK) <<
961                            RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT;
962 
963       if (bo->rws->gen >= DRV_SI && !md->u.legacy.scanout)
964          args.tiling_flags |= RADEON_TILING_R600_NO_SCANOUT;
965 
966       args.pitch = md->u.legacy.stride;
967    }
968 
969    args.handle = bo->handle;
970 
971    drmCommandWriteRead(bo->rws->fd,
972                        DRM_RADEON_GEM_SET_TILING,
973                        &args,
974                        sizeof(args));
975 }
976 
977 static struct pb_buffer_lean *
radeon_winsys_bo_create(struct radeon_winsys * rws,uint64_t size,unsigned alignment,enum radeon_bo_domain domain,enum radeon_bo_flag flags)978 radeon_winsys_bo_create(struct radeon_winsys *rws,
979                         uint64_t size,
980                         unsigned alignment,
981                         enum radeon_bo_domain domain,
982                         enum radeon_bo_flag flags)
983 {
984    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
985    struct radeon_bo *bo;
986 
987    radeon_canonicalize_bo_flags(&domain, &flags);
988 
989    assert(!(flags & RADEON_FLAG_SPARSE)); /* not supported */
990 
991    /* Only 32-bit sizes are supported. */
992    if (size > UINT_MAX)
993       return NULL;
994 
995    int heap = radeon_get_heap_index(domain, flags);
996 
997    /* Sub-allocate small buffers from slabs. */
998    if (heap >= 0 &&
999        size <= (1 << RADEON_SLAB_MAX_SIZE_LOG2) &&
1000        ws->info.r600_has_virtual_memory &&
1001        alignment <= MAX2(1 << RADEON_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) {
1002       struct pb_slab_entry *entry;
1003 
1004       entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1005       if (!entry) {
1006          /* Clear the cache and try again. */
1007          pb_cache_release_all_buffers(&ws->bo_cache);
1008 
1009          entry = pb_slab_alloc(&ws->bo_slabs, size, heap);
1010       }
1011       if (!entry)
1012          return NULL;
1013 
1014       bo = container_of(entry, struct radeon_bo, u.slab.entry);
1015 
1016       pipe_reference_init(&bo->base.reference, 1);
1017 
1018       return &bo->base;
1019    }
1020 
1021    /* Align size to page size. This is the minimum alignment for normal
1022     * BOs. Aligning this here helps the cached bufmgr. Especially small BOs,
1023     * like constant/uniform buffers, can benefit from better and more reuse.
1024     */
1025    size = align(size, ws->info.gart_page_size);
1026    alignment = align(alignment, ws->info.gart_page_size);
1027 
1028    bool use_reusable_pool = flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
1029                             !(flags & RADEON_FLAG_DISCARDABLE);
1030 
1031    /* Shared resources don't use cached heaps. */
1032    if (use_reusable_pool) {
1033       /* RADEON_FLAG_NO_SUBALLOC is irrelevant for the cache. */
1034       heap = radeon_get_heap_index(domain, flags & ~RADEON_FLAG_NO_SUBALLOC);
1035       assert(heap >= 0 && heap < RADEON_NUM_HEAPS);
1036 
1037       bo = radeon_bo((struct pb_buffer_lean*)pb_cache_reclaim_buffer(&ws->bo_cache, size,
1038                                                                 alignment, 0, heap));
1039       if (bo)
1040          return &bo->base;
1041    }
1042 
1043    bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1044    if (!bo) {
1045       /* Clear the cache and try again. */
1046       if (ws->info.r600_has_virtual_memory)
1047          pb_slabs_reclaim(&ws->bo_slabs);
1048       pb_cache_release_all_buffers(&ws->bo_cache);
1049       bo = radeon_create_bo(ws, size, alignment, domain, flags, heap);
1050       if (!bo)
1051          return NULL;
1052    }
1053 
1054    bo->u.real.use_reusable_pool = use_reusable_pool;
1055 
1056    mtx_lock(&ws->bo_handles_mutex);
1057    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1058    mtx_unlock(&ws->bo_handles_mutex);
1059 
1060    return &bo->base;
1061 }
1062 
radeon_winsys_bo_destroy(struct radeon_winsys * ws,struct pb_buffer_lean * buf)1063 static void radeon_winsys_bo_destroy(struct radeon_winsys *ws, struct pb_buffer_lean *buf)
1064 {
1065    struct radeon_bo *bo = radeon_bo(buf);
1066 
1067    if (bo->handle)
1068       radeon_bo_destroy_or_cache(ws, buf);
1069    else
1070       radeon_bo_slab_destroy(ws, buf);
1071 }
1072 
radeon_winsys_bo_from_ptr(struct radeon_winsys * rws,void * pointer,uint64_t size,enum radeon_bo_flag flags)1073 static struct pb_buffer_lean *radeon_winsys_bo_from_ptr(struct radeon_winsys *rws,
1074                                                    void *pointer, uint64_t size,
1075                                                    enum radeon_bo_flag flags)
1076 {
1077    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1078    struct drm_radeon_gem_userptr args;
1079    struct radeon_bo *bo;
1080    int r;
1081 
1082    bo = CALLOC_STRUCT(radeon_bo);
1083    if (!bo)
1084       return NULL;
1085 
1086    memset(&args, 0, sizeof(args));
1087    args.addr = (uintptr_t)pointer;
1088    args.size = align(size, ws->info.gart_page_size);
1089    args.flags = RADEON_GEM_USERPTR_ANONONLY |
1090                 RADEON_GEM_USERPTR_REGISTER |
1091                 RADEON_GEM_USERPTR_VALIDATE;
1092 
1093    if (drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR,
1094                            &args, sizeof(args))) {
1095       FREE(bo);
1096       return NULL;
1097    }
1098 
1099    assert(args.handle != 0);
1100 
1101    mtx_lock(&ws->bo_handles_mutex);
1102 
1103    /* Initialize it. */
1104    pipe_reference_init(&bo->base.reference, 1);
1105    bo->handle = args.handle;
1106    bo->base.alignment_log2 = 0;
1107    bo->base.size = size;
1108    bo->rws = ws;
1109    bo->user_ptr = pointer;
1110    bo->va = 0;
1111    bo->initial_domain = RADEON_DOMAIN_GTT;
1112    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1113    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1114 
1115    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1116 
1117    mtx_unlock(&ws->bo_handles_mutex);
1118 
1119    if (ws->info.r600_has_virtual_memory) {
1120       struct drm_radeon_gem_va va;
1121 
1122       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, 1 << 20);
1123 
1124       va.handle = bo->handle;
1125       va.operation = RADEON_VA_MAP;
1126       va.vm_id = 0;
1127       va.offset = bo->va;
1128       va.flags = RADEON_VM_PAGE_READABLE |
1129                  RADEON_VM_PAGE_WRITEABLE |
1130                  RADEON_VM_PAGE_SNOOPED;
1131       va.offset = bo->va;
1132       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1133       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1134          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1135          radeon_bo_destroy(NULL, &bo->base);
1136          return NULL;
1137       }
1138       mtx_lock(&ws->bo_handles_mutex);
1139       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1140          struct pb_buffer_lean *b = &bo->base;
1141          struct radeon_bo *old_bo =
1142                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1143 
1144          mtx_unlock(&ws->bo_handles_mutex);
1145          radeon_bo_reference(rws, &b, &old_bo->base);
1146          return b;
1147       }
1148 
1149       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1150       mtx_unlock(&ws->bo_handles_mutex);
1151    }
1152 
1153    ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1154 
1155    return (struct pb_buffer_lean*)bo;
1156 }
1157 
radeon_winsys_bo_from_handle(struct radeon_winsys * rws,struct winsys_handle * whandle,unsigned vm_alignment,bool is_dri_prime_linear_buffer)1158 static struct pb_buffer_lean *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
1159                                                       struct winsys_handle *whandle,
1160                                                       unsigned vm_alignment,
1161                                                       bool is_dri_prime_linear_buffer)
1162 {
1163    struct radeon_drm_winsys *ws = radeon_drm_winsys(rws);
1164    struct radeon_bo *bo;
1165    int r;
1166    unsigned handle;
1167    uint64_t size = 0;
1168 
1169    /* We must maintain a list of pairs <handle, bo>, so that we always return
1170     * the same BO for one particular handle. If we didn't do that and created
1171     * more than one BO for the same handle and then relocated them in a CS,
1172     * we would hit a deadlock in the kernel.
1173     *
1174     * The list of pairs is guarded by a mutex, of course. */
1175    mtx_lock(&ws->bo_handles_mutex);
1176 
1177    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1178       /* First check if there already is an existing bo for the handle. */
1179       bo = util_hash_table_get(ws->bo_names, (void*)(uintptr_t)whandle->handle);
1180    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1181       /* We must first get the GEM handle, as fds are unreliable keys */
1182       r = drmPrimeFDToHandle(ws->fd, whandle->handle, &handle);
1183       if (r)
1184          goto fail;
1185       bo = util_hash_table_get(ws->bo_handles, (void*)(uintptr_t)handle);
1186    } else {
1187       /* Unknown handle type */
1188       goto fail;
1189    }
1190 
1191    if (bo) {
1192       /* Increase the refcount. */
1193       p_atomic_inc(&bo->base.reference.count);
1194       goto done;
1195    }
1196 
1197    /* There isn't, create a new one. */
1198    bo = CALLOC_STRUCT(radeon_bo);
1199    if (!bo) {
1200       goto fail;
1201    }
1202 
1203    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1204       struct drm_gem_open open_arg = {};
1205       memset(&open_arg, 0, sizeof(open_arg));
1206       /* Open the BO. */
1207       open_arg.name = whandle->handle;
1208       if (drmIoctl(ws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
1209          FREE(bo);
1210          goto fail;
1211       }
1212       handle = open_arg.handle;
1213       size = open_arg.size;
1214       bo->flink_name = whandle->handle;
1215    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1216       size = lseek(whandle->handle, 0, SEEK_END);
1217       /*
1218        * Could check errno to determine whether the kernel is new enough, but
1219        * it doesn't really matter why this failed, just that it failed.
1220        */
1221       if (size == (off_t)-1) {
1222          FREE(bo);
1223          goto fail;
1224       }
1225       lseek(whandle->handle, 0, SEEK_SET);
1226    }
1227 
1228    assert(handle != 0);
1229 
1230    bo->handle = handle;
1231 
1232    /* Initialize it. */
1233    pipe_reference_init(&bo->base.reference, 1);
1234    bo->base.alignment_log2 = 0;
1235    bo->base.size = (unsigned) size;
1236    bo->rws = ws;
1237    bo->va = 0;
1238    bo->hash = __sync_fetch_and_add(&ws->next_bo_hash, 1);
1239    (void) mtx_init(&bo->u.real.map_mutex, mtx_plain);
1240 
1241    if (bo->flink_name)
1242       _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1243 
1244    _mesa_hash_table_insert(ws->bo_handles, (void*)(uintptr_t)bo->handle, bo);
1245 
1246 done:
1247    mtx_unlock(&ws->bo_handles_mutex);
1248 
1249    if (ws->info.r600_has_virtual_memory && !bo->va) {
1250       struct drm_radeon_gem_va va;
1251 
1252       bo->va = radeon_bomgr_find_va64(ws, bo->base.size, vm_alignment);
1253 
1254       va.handle = bo->handle;
1255       va.operation = RADEON_VA_MAP;
1256       va.vm_id = 0;
1257       va.offset = bo->va;
1258       va.flags = RADEON_VM_PAGE_READABLE |
1259                  RADEON_VM_PAGE_WRITEABLE |
1260                  RADEON_VM_PAGE_SNOOPED;
1261       va.offset = bo->va;
1262       r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
1263       if (r && va.operation == RADEON_VA_RESULT_ERROR) {
1264          fprintf(stderr, "radeon: Failed to assign virtual address space\n");
1265          radeon_bo_destroy(NULL, &bo->base);
1266          return NULL;
1267       }
1268       mtx_lock(&ws->bo_handles_mutex);
1269       if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
1270          struct pb_buffer_lean *b = &bo->base;
1271          struct radeon_bo *old_bo =
1272                _mesa_hash_table_u64_search(ws->bo_vas, va.offset);
1273 
1274          mtx_unlock(&ws->bo_handles_mutex);
1275          radeon_bo_reference(rws, &b, &old_bo->base);
1276          return b;
1277       }
1278 
1279       _mesa_hash_table_u64_insert(ws->bo_vas, bo->va, bo);
1280       mtx_unlock(&ws->bo_handles_mutex);
1281    }
1282 
1283    bo->initial_domain = radeon_bo_get_initial_domain((void*)bo);
1284 
1285    if (bo->initial_domain & RADEON_DOMAIN_VRAM)
1286       ws->allocated_vram += align(bo->base.size, ws->info.gart_page_size);
1287    else if (bo->initial_domain & RADEON_DOMAIN_GTT)
1288       ws->allocated_gtt += align(bo->base.size, ws->info.gart_page_size);
1289 
1290    return (struct pb_buffer_lean*)bo;
1291 
1292 fail:
1293    mtx_unlock(&ws->bo_handles_mutex);
1294    return NULL;
1295 }
1296 
radeon_winsys_bo_get_handle(struct radeon_winsys * rws,struct pb_buffer_lean * buffer,struct winsys_handle * whandle)1297 static bool radeon_winsys_bo_get_handle(struct radeon_winsys *rws,
1298                                         struct pb_buffer_lean *buffer,
1299                                         struct winsys_handle *whandle)
1300 {
1301    struct drm_gem_flink flink;
1302    struct radeon_bo *bo = radeon_bo(buffer);
1303    struct radeon_drm_winsys *ws = bo->rws;
1304 
1305    /* Don't allow exports of slab entries. */
1306    if (!bo->handle)
1307       return false;
1308 
1309    memset(&flink, 0, sizeof(flink));
1310 
1311    bo->u.real.use_reusable_pool = false;
1312 
1313    if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {
1314       if (!bo->flink_name) {
1315          flink.handle = bo->handle;
1316 
1317          if (ioctl(ws->fd, DRM_IOCTL_GEM_FLINK, &flink)) {
1318             return false;
1319          }
1320 
1321          bo->flink_name = flink.name;
1322 
1323          mtx_lock(&ws->bo_handles_mutex);
1324          _mesa_hash_table_insert(ws->bo_names, (void*)(uintptr_t)bo->flink_name, bo);
1325          mtx_unlock(&ws->bo_handles_mutex);
1326       }
1327       whandle->handle = bo->flink_name;
1328    } else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {
1329       whandle->handle = bo->handle;
1330    } else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {
1331       if (drmPrimeHandleToFD(ws->fd, bo->handle, DRM_CLOEXEC, (int*)&whandle->handle))
1332          return false;
1333    }
1334 
1335    return true;
1336 }
1337 
radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean * buf)1338 static bool radeon_winsys_bo_is_user_ptr(struct pb_buffer_lean *buf)
1339 {
1340    return ((struct radeon_bo*)buf)->user_ptr != NULL;
1341 }
1342 
radeon_winsys_bo_is_suballocated(struct pb_buffer_lean * buf)1343 static bool radeon_winsys_bo_is_suballocated(struct pb_buffer_lean *buf)
1344 {
1345    return !((struct radeon_bo*)buf)->handle;
1346 }
1347 
radeon_winsys_bo_va(struct pb_buffer_lean * buf)1348 static uint64_t radeon_winsys_bo_va(struct pb_buffer_lean *buf)
1349 {
1350    return ((struct radeon_bo*)buf)->va;
1351 }
1352 
radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean * buf)1353 static unsigned radeon_winsys_bo_get_reloc_offset(struct pb_buffer_lean *buf)
1354 {
1355    struct radeon_bo *bo = radeon_bo(buf);
1356 
1357    if (bo->handle)
1358       return 0;
1359 
1360    return bo->va - bo->u.slab.real->va;
1361 }
1362 
radeon_drm_bo_init_functions(struct radeon_drm_winsys * ws)1363 void radeon_drm_bo_init_functions(struct radeon_drm_winsys *ws)
1364 {
1365    ws->base.buffer_set_metadata = radeon_bo_set_metadata;
1366    ws->base.buffer_get_metadata = radeon_bo_get_metadata;
1367    ws->base.buffer_map = radeon_bo_map;
1368    ws->base.buffer_unmap = radeon_bo_unmap;
1369    ws->base.buffer_wait = radeon_bo_wait;
1370    ws->base.buffer_create = radeon_winsys_bo_create;
1371    ws->base.buffer_destroy = radeon_winsys_bo_destroy;
1372    ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
1373    ws->base.buffer_from_ptr = radeon_winsys_bo_from_ptr;
1374    ws->base.buffer_is_user_ptr = radeon_winsys_bo_is_user_ptr;
1375    ws->base.buffer_is_suballocated = radeon_winsys_bo_is_suballocated;
1376    ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
1377    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
1378    ws->base.buffer_get_reloc_offset = radeon_winsys_bo_get_reloc_offset;
1379    ws->base.buffer_get_initial_domain = radeon_bo_get_initial_domain;
1380 }
1381