xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based on amdgpu winsys.
6  * Copyright © 2011 Marek Olšák <[email protected]>
7  * Copyright © 2015 Advanced Micro Devices, Inc.
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include <stdio.h>
13 
14 #include "radv_amdgpu_bo.h"
15 #include "radv_debug.h"
16 
17 #include <amdgpu.h>
18 #include <inttypes.h>
19 #include <pthread.h>
20 #include <unistd.h>
21 #include <xf86drm.h>
22 #include "drm-uapi/amdgpu_drm.h"
23 #include <sys/mman.h>
24 
25 #include "util/os_time.h"
26 #include "util/u_atomic.h"
27 #include "util/u_math.h"
28 #include "util/u_memory.h"
29 
30 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
31 
32 static int
radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys * ws,amdgpu_bo_handle bo,uint64_t offset,uint64_t size,uint64_t addr,uint32_t bo_flags,uint64_t internal_flags,uint32_t ops)33 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset, uint64_t size, uint64_t addr,
34                      uint32_t bo_flags, uint64_t internal_flags, uint32_t ops)
35 {
36    uint64_t flags = internal_flags;
37    if (bo) {
38       flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
39 
40       if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.gfx_level >= GFX9)
41          flags |= AMDGPU_VM_MTYPE_UC;
42 
43       if (!(bo_flags & RADEON_FLAG_READ_ONLY))
44          flags |= AMDGPU_VM_PAGE_WRITEABLE;
45    }
46 
47    size = align64(size, getpagesize());
48 
49    return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
50 }
51 
52 static int
bo_comparator(const void * ap,const void * bp)53 bo_comparator(const void *ap, const void *bp)
54 {
55    struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
56    struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
57    return (a > b) ? 1 : (a < b) ? -1 : 0;
58 }
59 
60 static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo * bo)61 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
62 {
63    u_rwlock_wrlock(&bo->lock);
64 
65    if (bo->bo_capacity < bo->range_count) {
66       uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
67       struct radv_amdgpu_winsys_bo **bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
68       if (!bos) {
69          u_rwlock_wrunlock(&bo->lock);
70          return VK_ERROR_OUT_OF_HOST_MEMORY;
71       }
72       bo->bos = bos;
73       bo->bo_capacity = new_count;
74    }
75 
76    uint32_t temp_bo_count = 0;
77    for (uint32_t i = 0; i < bo->range_count; ++i)
78       if (bo->ranges[i].bo)
79          bo->bos[temp_bo_count++] = bo->ranges[i].bo;
80 
81    qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
82 
83    if (!temp_bo_count) {
84       bo->bo_count = 0;
85    } else {
86       uint32_t final_bo_count = 1;
87       for (uint32_t i = 1; i < temp_bo_count; ++i)
88          if (bo->bos[i] != bo->bos[i - 1])
89             bo->bos[final_bo_count++] = bo->bos[i];
90 
91       bo->bo_count = final_bo_count;
92    }
93 
94    u_rwlock_wrunlock(&bo->lock);
95    return VK_SUCCESS;
96 }
97 
98 static VkResult
radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys * _ws,struct radeon_winsys_bo * _parent,uint64_t offset,uint64_t size,struct radeon_winsys_bo * _bo,uint64_t bo_offset)99 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent, uint64_t offset,
100                                    uint64_t size, struct radeon_winsys_bo *_bo, uint64_t bo_offset)
101 {
102    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
103    struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
104    struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
105    int range_count_delta, new_idx;
106    int first = 0, last;
107    struct radv_amdgpu_map_range new_first, new_last;
108    VkResult result;
109    int r;
110 
111    assert(parent->is_virtual);
112    assert(!bo || !bo->is_virtual);
113 
114    /* When the BO is NULL, AMDGPU will reset the PTE VA range to the initial state. Otherwise, it
115     * will first unmap all existing VA that overlap the requested range and then map.
116     */
117    if (bo) {
118       r = radv_amdgpu_bo_va_op(ws, bo->bo, bo_offset, size, parent->base.va + offset, 0, 0, AMDGPU_VA_OP_REPLACE);
119    } else {
120       r =
121          radv_amdgpu_bo_va_op(ws, NULL, 0, size, parent->base.va + offset, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_REPLACE);
122    }
123 
124    if (r) {
125       fprintf(stderr, "radv/amdgpu: Failed to replace a PRT VA region (%d).\n", r);
126       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
127    }
128 
129    /* Do not add the BO to the virtual BO list if it's already in the global list to avoid dangling
130     * BO references because it might have been destroyed without being previously unbound. Resetting
131     * it to NULL clears the old BO ranges if present.
132     *
133     * This is going to be clarified in the Vulkan spec:
134     * https://gitlab.khronos.org/vulkan/vulkan/-/issues/3125
135     *
136     * The issue still exists for non-global BO but it will be addressed later, once we are 100% it's
137     * RADV fault (mostly because the solution looks more complicated).
138     */
139    if (bo && radv_buffer_is_resident(&bo->base)) {
140       bo = NULL;
141       bo_offset = 0;
142    }
143 
144    /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
145     * contains the newly bound range). */
146    if (parent->range_capacity - parent->range_count < 2) {
147       uint32_t range_capacity = parent->range_capacity + 2;
148       struct radv_amdgpu_map_range *ranges =
149          realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
150       if (!ranges)
151          return VK_ERROR_OUT_OF_HOST_MEMORY;
152       parent->ranges = ranges;
153       parent->range_capacity = range_capacity;
154    }
155 
156    /*
157     * [first, last] is exactly the range of ranges that either overlap the
158     * new parent, or are adjacent to it. This corresponds to the bind ranges
159     * that may change.
160     */
161    while (first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
162       ++first;
163 
164    last = first;
165    while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
166       ++last;
167 
168    /* Whether the first or last range are going to be totally removed or just
169     * resized/left alone. Note that in the case of first == last, we will split
170     * this into a part before and after the new range. The remove flag is then
171     * whether to not create the corresponding split part. */
172    bool remove_first = parent->ranges[first].offset == offset;
173    bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
174 
175    assert(parent->ranges[first].offset <= offset);
176    assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
177 
178    /* Try to merge the new range with the first range. */
179    if (parent->ranges[first].bo == bo &&
180        (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
181       size += offset - parent->ranges[first].offset;
182       offset = parent->ranges[first].offset;
183       bo_offset = parent->ranges[first].bo_offset;
184       remove_first = true;
185    }
186 
187    /* Try to merge the new range with the last range. */
188    if (parent->ranges[last].bo == bo &&
189        (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
190       size = parent->ranges[last].offset + parent->ranges[last].size - offset;
191       remove_last = true;
192    }
193 
194    range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
195    new_idx = first + !remove_first;
196 
197    /* If the first/last range are not left alone we unmap then and optionally map
198     * them again after modifications. Not that this implicitly can do the splitting
199     * if first == last. */
200    new_first = parent->ranges[first];
201    new_last = parent->ranges[last];
202 
203    if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
204       if (!remove_first) {
205          new_first.size = offset - new_first.offset;
206       }
207    }
208 
209    if (parent->ranges[last].offset < offset + size || remove_last) {
210       if (!remove_last) {
211          new_last.size -= offset + size - new_last.offset;
212          new_last.bo_offset += (offset + size - new_last.offset);
213          new_last.offset = offset + size;
214       }
215    }
216 
217    /* Moves the range list after last to account for the changed number of ranges. */
218    memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
219            sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
220 
221    if (!remove_first)
222       parent->ranges[first] = new_first;
223 
224    if (!remove_last)
225       parent->ranges[new_idx + 1] = new_last;
226 
227    /* Actually set up the new range. */
228    parent->ranges[new_idx].offset = offset;
229    parent->ranges[new_idx].size = size;
230    parent->ranges[new_idx].bo = bo;
231    parent->ranges[new_idx].bo_offset = bo_offset;
232 
233    parent->range_count += range_count_delta;
234 
235    result = radv_amdgpu_winsys_rebuild_bo_list(parent);
236    if (result != VK_SUCCESS)
237       return result;
238 
239    return VK_SUCCESS;
240 }
241 
242 static void
radv_amdgpu_log_bo(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo,bool destroyed)243 radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
244 {
245    struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
246 
247    if (!ws->debug_log_bos)
248       return;
249 
250    bo_log = malloc(sizeof(*bo_log));
251    if (!bo_log)
252       return;
253 
254    bo_log->va = bo->base.va;
255    bo_log->size = bo->base.size;
256    bo_log->timestamp = os_time_get_nano();
257    bo_log->is_virtual = bo->is_virtual;
258    bo_log->destroyed = destroyed;
259 
260    u_rwlock_wrlock(&ws->log_bo_list_lock);
261    list_addtail(&bo_log->list, &ws->log_bo_list);
262    u_rwlock_wrunlock(&ws->log_bo_list_lock);
263 }
264 
265 static int
radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)266 radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
267 {
268    u_rwlock_wrlock(&ws->global_bo_list.lock);
269    if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
270       unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
271       void *data = realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
272       if (!data) {
273          u_rwlock_wrunlock(&ws->global_bo_list.lock);
274          return VK_ERROR_OUT_OF_HOST_MEMORY;
275       }
276 
277       ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
278       ws->global_bo_list.capacity = capacity;
279    }
280 
281    ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
282    bo->base.use_global_list = true;
283    u_rwlock_wrunlock(&ws->global_bo_list.lock);
284    return VK_SUCCESS;
285 }
286 
287 static void
radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys * ws,struct radv_amdgpu_winsys_bo * bo)288 radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
289 {
290    u_rwlock_wrlock(&ws->global_bo_list.lock);
291    for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
292       if (ws->global_bo_list.bos[i] == bo) {
293          ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
294          --ws->global_bo_list.count;
295          bo->base.use_global_list = false;
296          break;
297       }
298    }
299    u_rwlock_wrunlock(&ws->global_bo_list.lock);
300 }
301 
302 static void
radv_amdgpu_winsys_bo_destroy(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo)303 radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
304 {
305    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
306    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
307 
308    radv_amdgpu_log_bo(ws, bo, true);
309 
310    if (bo->is_virtual) {
311       int r;
312 
313       /* Clear mappings of this PRT VA region. */
314       r = radv_amdgpu_bo_va_op(ws, NULL, 0, bo->base.size, bo->base.va, 0, 0, AMDGPU_VA_OP_CLEAR);
315       if (r) {
316          fprintf(stderr, "radv/amdgpu: Failed to clear a PRT VA region (%d).\n", r);
317       }
318 
319       free(bo->bos);
320       free(bo->ranges);
321       u_rwlock_destroy(&bo->lock);
322    } else {
323       if (bo->cpu_map)
324          munmap(bo->cpu_map, bo->base.size);
325 
326       if (ws->debug_all_bos)
327          radv_amdgpu_global_bo_list_del(ws, bo);
328       radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->base.size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
329       amdgpu_bo_free(bo->bo);
330    }
331 
332    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
333       if (bo->base.vram_no_cpu_access) {
334          p_atomic_add(&ws->allocated_vram, -align64(bo->base.size, ws->info.gart_page_size));
335       } else {
336          p_atomic_add(&ws->allocated_vram_vis, -align64(bo->base.size, ws->info.gart_page_size));
337       }
338    }
339 
340    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
341       p_atomic_add(&ws->allocated_gtt, -align64(bo->base.size, ws->info.gart_page_size));
342 
343    amdgpu_va_range_free(bo->va_handle);
344    FREE(bo);
345 }
346 
347 static VkResult
radv_amdgpu_winsys_bo_create(struct radeon_winsys * _ws,uint64_t size,unsigned alignment,enum radeon_bo_domain initial_domain,enum radeon_bo_flag flags,unsigned priority,uint64_t replay_address,struct radeon_winsys_bo ** out_bo)348 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
349                              enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags, unsigned priority,
350                              uint64_t replay_address, struct radeon_winsys_bo **out_bo)
351 {
352    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
353    struct radv_amdgpu_winsys_bo *bo;
354    struct amdgpu_bo_alloc_request request = {0};
355    struct radv_amdgpu_map_range *ranges = NULL;
356    amdgpu_bo_handle buf_handle;
357    uint64_t va = 0;
358    amdgpu_va_handle va_handle;
359    int r;
360    VkResult result = VK_SUCCESS;
361 
362    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
363     */
364    *out_bo = NULL;
365 
366    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
367    if (!bo) {
368       return VK_ERROR_OUT_OF_HOST_MEMORY;
369    }
370 
371    unsigned virt_alignment = alignment;
372    if (size >= ws->info.pte_fragment_size)
373       virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
374 
375    assert(!replay_address || (flags & RADEON_FLAG_REPLAYABLE));
376 
377    const uint64_t va_flags = AMDGPU_VA_RANGE_HIGH | (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
378                              (flags & RADEON_FLAG_REPLAYABLE ? AMDGPU_VA_RANGE_REPLAYABLE : 0);
379    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, replay_address, &va,
380                              &va_handle, va_flags);
381    if (r) {
382       result = replay_address ? VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS : VK_ERROR_OUT_OF_DEVICE_MEMORY;
383       goto error_va_alloc;
384    }
385 
386    bo->base.va = va;
387    bo->base.size = size;
388    bo->va_handle = va_handle;
389    bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
390 
391    if (flags & RADEON_FLAG_VIRTUAL) {
392       ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
393       if (!ranges) {
394          result = VK_ERROR_OUT_OF_HOST_MEMORY;
395          goto error_ranges_alloc;
396       }
397 
398       u_rwlock_init(&bo->lock);
399 
400       bo->ranges = ranges;
401       bo->range_count = 1;
402       bo->range_capacity = 1;
403 
404       bo->ranges[0].offset = 0;
405       bo->ranges[0].size = size;
406       bo->ranges[0].bo = NULL;
407       bo->ranges[0].bo_offset = 0;
408 
409       /* Reserve a PRT VA region. */
410       r = radv_amdgpu_bo_va_op(ws, NULL, 0, size, bo->base.va, 0, AMDGPU_VM_PAGE_PRT, AMDGPU_VA_OP_MAP);
411       if (r) {
412          fprintf(stderr, "radv/amdgpu: Failed to reserve a PRT VA region (%d).\n", r);
413          result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
414          goto error_ranges_alloc;
415       }
416 
417       radv_amdgpu_log_bo(ws, bo, false);
418 
419       *out_bo = (struct radeon_winsys_bo *)bo;
420       return VK_SUCCESS;
421    }
422 
423    request.alloc_size = size;
424    request.phys_alignment = alignment;
425 
426    if (initial_domain & RADEON_DOMAIN_VRAM) {
427       request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
428 
429       /* Since VRAM and GTT have almost the same performance on
430        * APUs, we could just set GTT. However, in order to decrease
431        * GTT(RAM) usage, which is shared with the OS, allow VRAM
432        * placements too. The idea is not to use VRAM usefully, but
433        * to use it so that it's not unused and wasted.
434        *
435        * Furthermore, even on discrete GPUs this is beneficial. If
436        * both GTT and VRAM are set then AMDGPU still prefers VRAM
437        * for the initial placement, but it makes the buffers
438        * spillable. Otherwise AMDGPU tries to place the buffers in
439        * VRAM really hard to the extent that we are getting a lot
440        * of unnecessary movement. This helps significantly when
441        * e.g. Horizon Zero Dawn allocates more memory than we have
442        * VRAM.
443        */
444       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
445    }
446 
447    if (initial_domain & RADEON_DOMAIN_GTT)
448       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
449    if (initial_domain & RADEON_DOMAIN_GDS)
450       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
451    if (initial_domain & RADEON_DOMAIN_OA)
452       request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
453 
454    if (flags & RADEON_FLAG_CPU_ACCESS)
455       request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
456    if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
457       bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
458       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
459    }
460    if (flags & RADEON_FLAG_GTT_WC)
461       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
462    if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
463       request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
464    if ((initial_domain & RADEON_DOMAIN_VRAM_GTT) && (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) &&
465        ((ws->perftest & RADV_PERFTEST_LOCAL_BOS) || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
466       bo->base.is_local = true;
467       request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
468    }
469 
470    if (initial_domain & RADEON_DOMAIN_VRAM) {
471       if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
472          request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
473    }
474 
475    if (flags & RADEON_FLAG_DISCARDABLE && ws->info.drm_minor >= 47)
476       request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
477 
478    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
479    if (r) {
480       fprintf(stderr, "radv/amdgpu: Failed to allocate a buffer:\n");
481       fprintf(stderr, "radv/amdgpu:    size      : %" PRIu64 " bytes\n", size);
482       fprintf(stderr, "radv/amdgpu:    alignment : %u bytes\n", alignment);
483       fprintf(stderr, "radv/amdgpu:    domains   : %u\n", initial_domain);
484       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
485       goto error_bo_alloc;
486    }
487 
488    r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
489    if (r) {
490       result = VK_ERROR_UNKNOWN;
491       goto error_va_map;
492    }
493 
494    bo->bo = buf_handle;
495    bo->base.initial_domain = initial_domain;
496    bo->base.use_global_list = false;
497    bo->priority = priority;
498    bo->cpu_map = NULL;
499 
500    r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
501    assert(!r);
502 
503    if (initial_domain & RADEON_DOMAIN_VRAM) {
504       /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
505        * aren't mappable and they are counted as part of the VRAM
506        * counter.
507        *
508        * Otherwise, buffers with the CPU_ACCESS flag or without any
509        * of both (imported buffers) are counted as part of the VRAM
510        * visible counter because they can be mapped.
511        */
512       if (bo->base.vram_no_cpu_access) {
513          p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
514       } else {
515          p_atomic_add(&ws->allocated_vram_vis, align64(bo->base.size, ws->info.gart_page_size));
516       }
517    }
518 
519    if (initial_domain & RADEON_DOMAIN_GTT)
520       p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
521 
522    if (ws->debug_all_bos)
523       radv_amdgpu_global_bo_list_add(ws, bo);
524    radv_amdgpu_log_bo(ws, bo, false);
525 
526    *out_bo = (struct radeon_winsys_bo *)bo;
527    return VK_SUCCESS;
528 error_va_map:
529    amdgpu_bo_free(buf_handle);
530 
531 error_bo_alloc:
532    free(ranges);
533 
534 error_ranges_alloc:
535    amdgpu_va_range_free(va_handle);
536 
537 error_va_alloc:
538    FREE(bo);
539    return result;
540 }
541 
542 static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool use_fixed_addr,void * fixed_addr)543 radv_amdgpu_winsys_bo_map(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool use_fixed_addr,
544                           void *fixed_addr)
545 {
546    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
547 
548    /* Safeguard for the Quantic Dream layer skipping unmaps. */
549    if (bo->cpu_map && !use_fixed_addr)
550       return bo->cpu_map;
551 
552    assert(!bo->cpu_map);
553 
554    union drm_amdgpu_gem_mmap args;
555    memset(&args, 0, sizeof(args));
556    args.in.handle = bo->bo_handle;
557 
558    int ret =
559       drmCommandWriteRead(amdgpu_device_get_fd(radv_amdgpu_winsys(_ws)->dev), DRM_AMDGPU_GEM_MMAP, &args, sizeof(args));
560    if (ret)
561       return NULL;
562 
563    void *data = mmap(fixed_addr, bo->base.size, PROT_READ | PROT_WRITE, MAP_SHARED | (use_fixed_addr ? MAP_FIXED : 0),
564                      amdgpu_device_get_fd(radv_amdgpu_winsys(_ws)->dev), args.out.addr_ptr);
565    if (data == MAP_FAILED)
566       return NULL;
567 
568    bo->cpu_map = data;
569    return data;
570 }
571 
572 static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool replace)573 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool replace)
574 {
575    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
576 
577    /* Defense in depth against buggy apps. */
578    if (!bo->cpu_map && !replace)
579       return;
580 
581    assert(bo->cpu_map);
582    if (replace) {
583       (void)mmap(bo->cpu_map, bo->base.size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
584    } else {
585       munmap(bo->cpu_map, bo->base.size);
586    }
587    bo->cpu_map = NULL;
588 }
589 
590 static uint64_t
radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys * ws,uint64_t size,unsigned alignment)591 radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size, unsigned alignment)
592 {
593    uint64_t vm_alignment = alignment;
594 
595    /* Increase the VM alignment for faster address translation. */
596    if (size >= ws->info.pte_fragment_size)
597       vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
598 
599    /* Gfx9: Increase the VM alignment to the most significant bit set
600     * in the size for faster address translation.
601     */
602    if (ws->info.gfx_level >= GFX9) {
603       unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
604       uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
605 
606       vm_alignment = MAX2(vm_alignment, msb_alignment);
607    }
608    return vm_alignment;
609 }
610 
611 static VkResult
radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys * _ws,void * pointer,uint64_t size,unsigned priority,struct radeon_winsys_bo ** out_bo)612 radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size, unsigned priority,
613                                struct radeon_winsys_bo **out_bo)
614 {
615    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
616    amdgpu_bo_handle buf_handle;
617    struct radv_amdgpu_winsys_bo *bo;
618    uint64_t va;
619    amdgpu_va_handle va_handle;
620    uint64_t vm_alignment;
621    VkResult result = VK_SUCCESS;
622    int ret;
623 
624    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
625     */
626    *out_bo = NULL;
627 
628    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
629    if (!bo)
630       return VK_ERROR_OUT_OF_HOST_MEMORY;
631 
632    ret = amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle);
633    if (ret) {
634       if (ret == -EINVAL) {
635          result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
636       } else {
637          result = VK_ERROR_UNKNOWN;
638       }
639       goto error;
640    }
641 
642    /* Using the optimal VM alignment also fixes GPU hangs for buffers that
643     * are imported.
644     */
645    vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
646 
647    if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va, &va_handle,
648                              AMDGPU_VA_RANGE_HIGH)) {
649       result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
650       goto error_va_alloc;
651    }
652 
653    if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) {
654       result = VK_ERROR_UNKNOWN;
655       goto error_va_map;
656    }
657 
658    /* Initialize it */
659    bo->base.va = va;
660    bo->va_handle = va_handle;
661    bo->base.size = size;
662    bo->bo = buf_handle;
663    bo->base.initial_domain = RADEON_DOMAIN_GTT;
664    bo->base.use_global_list = false;
665    bo->priority = priority;
666    bo->cpu_map = NULL;
667 
668    ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
669    assert(!r);
670 
671    p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
672 
673    if (ws->debug_all_bos)
674       radv_amdgpu_global_bo_list_add(ws, bo);
675    radv_amdgpu_log_bo(ws, bo, false);
676 
677    *out_bo = (struct radeon_winsys_bo *)bo;
678    return VK_SUCCESS;
679 
680 error_va_map:
681    amdgpu_va_range_free(va_handle);
682 
683 error_va_alloc:
684    amdgpu_bo_free(buf_handle);
685 
686 error:
687    FREE(bo);
688    return result;
689 }
690 
691 static VkResult
radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys * _ws,int fd,unsigned priority,struct radeon_winsys_bo ** out_bo,uint64_t * alloc_size)692 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority, struct radeon_winsys_bo **out_bo,
693                               uint64_t *alloc_size)
694 {
695    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
696    struct radv_amdgpu_winsys_bo *bo;
697    uint64_t va;
698    amdgpu_va_handle va_handle;
699    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
700    struct amdgpu_bo_import_result result;
701    struct amdgpu_bo_info info;
702    enum radeon_bo_domain initial = 0;
703    int r;
704    VkResult vk_result = VK_SUCCESS;
705 
706    /* Just be robust for callers that might use NULL-ness for determining if things should be freed.
707     */
708    *out_bo = NULL;
709 
710    bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
711    if (!bo)
712       return VK_ERROR_OUT_OF_HOST_MEMORY;
713 
714    r = amdgpu_bo_import(ws->dev, type, fd, &result);
715    if (r) {
716       vk_result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
717       goto error;
718    }
719 
720    r = amdgpu_bo_query_info(result.buf_handle, &info);
721    if (r) {
722       vk_result = VK_ERROR_UNKNOWN;
723       goto error_query;
724    }
725 
726    if (alloc_size) {
727       *alloc_size = info.alloc_size;
728    }
729 
730    r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0, &va, &va_handle,
731                              AMDGPU_VA_RANGE_HIGH);
732    if (r) {
733       vk_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
734       goto error_query;
735    }
736 
737    r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
738    if (r) {
739       vk_result = VK_ERROR_UNKNOWN;
740       goto error_va_map;
741    }
742 
743    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
744       initial |= RADEON_DOMAIN_VRAM;
745    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
746       initial |= RADEON_DOMAIN_GTT;
747 
748    bo->bo = result.buf_handle;
749    bo->base.va = va;
750    bo->va_handle = va_handle;
751    bo->base.initial_domain = initial;
752    bo->base.use_global_list = false;
753    bo->base.size = result.alloc_size;
754    bo->priority = priority;
755    bo->cpu_map = NULL;
756 
757    r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
758    assert(!r);
759 
760    if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
761       p_atomic_add(&ws->allocated_vram, align64(bo->base.size, ws->info.gart_page_size));
762    if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
763       p_atomic_add(&ws->allocated_gtt, align64(bo->base.size, ws->info.gart_page_size));
764 
765    if (ws->debug_all_bos)
766       radv_amdgpu_global_bo_list_add(ws, bo);
767    radv_amdgpu_log_bo(ws, bo, false);
768 
769    *out_bo = (struct radeon_winsys_bo *)bo;
770    return VK_SUCCESS;
771 error_va_map:
772    amdgpu_va_range_free(va_handle);
773 
774 error_query:
775    amdgpu_bo_free(result.buf_handle);
776 
777 error:
778    FREE(bo);
779    return vk_result;
780 }
781 
782 static bool
radv_amdgpu_winsys_get_fd(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,int * fd)783 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
784 {
785    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
786    enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
787    int r;
788    unsigned handle;
789    r = amdgpu_bo_export(bo->bo, type, &handle);
790    if (r)
791       return false;
792 
793    *fd = (int)handle;
794    return true;
795 }
796 
797 static bool
radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys * _ws,int fd,enum radeon_bo_domain * domains,enum radeon_bo_flag * flags)798 radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
799                                  enum radeon_bo_flag *flags)
800 {
801    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
802    struct amdgpu_bo_import_result result = {0};
803    struct amdgpu_bo_info info = {0};
804    int r;
805 
806    *domains = 0;
807    *flags = 0;
808 
809    r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
810    if (r)
811       return false;
812 
813    r = amdgpu_bo_query_info(result.buf_handle, &info);
814    amdgpu_bo_free(result.buf_handle);
815    if (r)
816       return false;
817 
818    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
819       *domains |= RADEON_DOMAIN_VRAM;
820    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
821       *domains |= RADEON_DOMAIN_GTT;
822    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
823       *domains |= RADEON_DOMAIN_GDS;
824    if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
825       *domains |= RADEON_DOMAIN_OA;
826 
827    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
828       *flags |= RADEON_FLAG_CPU_ACCESS;
829    if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
830       *flags |= RADEON_FLAG_NO_CPU_ACCESS;
831    if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
832       *flags |= RADEON_FLAG_IMPLICIT_SYNC;
833    if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
834       *flags |= RADEON_FLAG_GTT_WC;
835    if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
836       *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
837    if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
838       *flags |= RADEON_FLAG_ZERO_VRAM;
839    return true;
840 }
841 
842 static unsigned
eg_tile_split(unsigned tile_split)843 eg_tile_split(unsigned tile_split)
844 {
845    switch (tile_split) {
846    case 0:
847       tile_split = 64;
848       break;
849    case 1:
850       tile_split = 128;
851       break;
852    case 2:
853       tile_split = 256;
854       break;
855    case 3:
856       tile_split = 512;
857       break;
858    default:
859    case 4:
860       tile_split = 1024;
861       break;
862    case 5:
863       tile_split = 2048;
864       break;
865    case 6:
866       tile_split = 4096;
867       break;
868    }
869    return tile_split;
870 }
871 
872 static unsigned
radv_eg_tile_split_rev(unsigned eg_tile_split)873 radv_eg_tile_split_rev(unsigned eg_tile_split)
874 {
875    switch (eg_tile_split) {
876    case 64:
877       return 0;
878    case 128:
879       return 1;
880    case 256:
881       return 2;
882    case 512:
883       return 3;
884    default:
885    case 1024:
886       return 4;
887    case 2048:
888       return 5;
889    case 4096:
890       return 6;
891    }
892 }
893 
894 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
895 #define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
896 
897 static void
radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)898 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
899                                    struct radeon_bo_metadata *md)
900 {
901    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
902    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
903    struct amdgpu_bo_metadata metadata = {0};
904    uint64_t tiling_flags = 0;
905 
906    if (ws->info.gfx_level >= GFX9) {
907       tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
908       tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
909       tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
910       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
911       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
912       tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
913       tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
914    } else {
915       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
916          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
917       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
918          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
919       else
920          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
921 
922       tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
923       tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
924       tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
925       if (md->u.legacy.tile_split)
926          tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
927       tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
928       tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
929 
930       if (md->u.legacy.scanout)
931          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
932       else
933          tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
934    }
935 
936    metadata.tiling_info = tiling_flags;
937    metadata.size_metadata = md->size_metadata;
938    memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
939 
940    amdgpu_bo_set_metadata(bo->bo, &metadata);
941 }
942 
943 static void
radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,struct radeon_bo_metadata * md)944 radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
945                                    struct radeon_bo_metadata *md)
946 {
947    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
948    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
949    struct amdgpu_bo_info info = {0};
950 
951    int r = amdgpu_bo_query_info(bo->bo, &info);
952    if (r)
953       return;
954 
955    uint64_t tiling_flags = info.metadata.tiling_info;
956 
957    if (ws->info.gfx_level >= GFX9) {
958       md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
959       md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
960    } else {
961       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
962       md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
963 
964       if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
965          md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
966       else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
967          md->u.legacy.microtile = RADEON_LAYOUT_TILED;
968 
969       md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
970       md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
971       md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
972       md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
973       md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
974       md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
975       md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
976    }
977 
978    md->size_metadata = info.metadata.size_metadata;
979    memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
980 }
981 
982 static VkResult
radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys * _ws,struct radeon_winsys_bo * _bo,bool resident)983 radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, bool resident)
984 {
985    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
986    struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
987    VkResult result = VK_SUCCESS;
988 
989    /* Do not add the BO to the global list if it's a local BO because the
990     * kernel maintains a list for us.
991     */
992    if (bo->base.is_local)
993       return VK_SUCCESS;
994 
995    /* Do not add the BO twice to the global list if the allbos debug
996     * option is enabled.
997     */
998    if (ws->debug_all_bos)
999       return VK_SUCCESS;
1000 
1001    if (resident) {
1002       result = radv_amdgpu_global_bo_list_add(ws, bo);
1003    } else {
1004       radv_amdgpu_global_bo_list_del(ws, bo);
1005    }
1006 
1007    return result;
1008 }
1009 
1010 static int
radv_amdgpu_bo_va_compare(const void * a,const void * b)1011 radv_amdgpu_bo_va_compare(const void *a, const void *b)
1012 {
1013    const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
1014    const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
1015    return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
1016 }
1017 
1018 static uint64_t
radv_amdgpu_canonicalize_va(uint64_t va)1019 radv_amdgpu_canonicalize_va(uint64_t va)
1020 {
1021    /* Would be less hardcoded to use addr32_hi (0xffff8000) to generate a mask,
1022     * but there are confusing differences between page fault reports from kernel where
1023     * it seems to report the top 48 bits, where addr32_hi has 47-bits. */
1024    return va & ((1ull << 48) - 1);
1025 }
1026 
1027 static void
radv_amdgpu_dump_bo_log(struct radeon_winsys * _ws,FILE * file)1028 radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
1029 {
1030    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1031    struct radv_amdgpu_winsys_bo_log *bo_log;
1032 
1033    if (!ws->debug_log_bos)
1034       return;
1035 
1036    u_rwlock_rdlock(&ws->log_bo_list_lock);
1037    LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
1038       fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n", (long long)bo_log->timestamp,
1039               (long long)radv_amdgpu_canonicalize_va(bo_log->va),
1040               (long long)radv_amdgpu_canonicalize_va(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
1041    }
1042    u_rwlock_rdunlock(&ws->log_bo_list_lock);
1043 }
1044 
1045 static void
radv_amdgpu_dump_bo_ranges(struct radeon_winsys * _ws,FILE * file)1046 radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
1047 {
1048    struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
1049    if (ws->debug_all_bos) {
1050       struct radv_amdgpu_winsys_bo **bos = NULL;
1051       int i = 0;
1052 
1053       u_rwlock_rdlock(&ws->global_bo_list.lock);
1054       bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
1055       if (!bos) {
1056          u_rwlock_rdunlock(&ws->global_bo_list.lock);
1057          fprintf(file, "  Failed to allocate memory to sort VA ranges for dumping\n");
1058          return;
1059       }
1060 
1061       for (i = 0; i < ws->global_bo_list.count; i++) {
1062          bos[i] = ws->global_bo_list.bos[i];
1063       }
1064       qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
1065 
1066       for (i = 0; i < ws->global_bo_list.count; ++i) {
1067          fprintf(file, "  VA=%.16llx-%.16llx, handle=%d\n", (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va),
1068                  (long long)radv_amdgpu_canonicalize_va(bos[i]->base.va + bos[i]->base.size), bos[i]->bo_handle);
1069       }
1070       free(bos);
1071       u_rwlock_rdunlock(&ws->global_bo_list.lock);
1072    } else
1073       fprintf(file, "  To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
1074 }
1075 void
radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys * ws)1076 radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
1077 {
1078    ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
1079    ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
1080    ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
1081    ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
1082    ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
1083    ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
1084    ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
1085    ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
1086    ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
1087    ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
1088    ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
1089    ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
1090    ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
1091    ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
1092 }
1093