xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dv_bo.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 
26 #include <errno.h>
27 #include <sys/mman.h>
28 
29 #include "drm-uapi/v3d_drm.h"
30 #include "util/u_memory.h"
31 
32 /* Default max size of the bo cache, in MB.
33  *
34  * This value comes from testing different Vulkan application. Greater values
35  * didn't get any further performance benefit. This looks somewhat small, but
36  * from testing those applications, the main consumer of the bo cache are
37  * the bos used for the CLs, that are usually small.
38  */
39 #define DEFAULT_MAX_BO_CACHE_SIZE 64
40 
41 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
42  * check for most of the calls
43  */
44 static const bool dump_stats = false;
45 
46 static void
bo_dump_stats(struct v3dv_device * device)47 bo_dump_stats(struct v3dv_device *device)
48 {
49    struct v3dv_bo_cache *cache = &device->bo_cache;
50 
51    fprintf(stderr, "  BOs allocated:   %d\n", device->bo_count);
52    fprintf(stderr, "  BOs size:        %dkb\n", device->bo_size / 1024);
53    fprintf(stderr, "  BOs cached:      %d\n", cache->cache_count);
54    fprintf(stderr, "  BOs cached size: %dkb\n", cache->cache_size / 1024);
55 
56    if (!list_is_empty(&cache->time_list)) {
57       struct v3dv_bo *first = list_first_entry(&cache->time_list,
58                                               struct v3dv_bo,
59                                               time_list);
60       struct v3dv_bo *last = list_last_entry(&cache->time_list,
61                                             struct v3dv_bo,
62                                             time_list);
63 
64       fprintf(stderr, "  oldest cache time: %ld\n",
65               (long)first->free_time);
66       fprintf(stderr, "  newest cache time: %ld\n",
67               (long)last->free_time);
68 
69       struct timespec time;
70       clock_gettime(CLOCK_MONOTONIC, &time);
71       fprintf(stderr, "  now:               %lld\n",
72               (long long)time.tv_sec);
73    }
74 
75    if (cache->size_list_size) {
76       uint32_t empty_size_list = 0;
77       for (uint32_t i = 0; i < cache->size_list_size; i++) {
78          if (list_is_empty(&cache->size_list[i]))
79             empty_size_list++;
80       }
81       fprintf(stderr, "  Empty size_list lists: %d\n", empty_size_list);
82    }
83 }
84 
85 static void
bo_remove_from_cache(struct v3dv_bo_cache * cache,struct v3dv_bo * bo)86 bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
87 {
88    list_del(&bo->time_list);
89    list_del(&bo->size_list);
90 
91    cache->cache_count--;
92    cache->cache_size -= bo->size;
93 }
94 
95 static struct v3dv_bo *
bo_from_cache(struct v3dv_device * device,uint32_t size,const char * name)96 bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
97 {
98    struct v3dv_bo_cache *cache = &device->bo_cache;
99    uint32_t page_index = size / 4096 - 1;
100 
101    if (cache->size_list_size <= page_index)
102       return NULL;
103 
104    struct v3dv_bo *bo = NULL;
105 
106    mtx_lock(&cache->lock);
107    if (!list_is_empty(&cache->size_list[page_index])) {
108       bo = list_first_entry(&cache->size_list[page_index],
109                             struct v3dv_bo, size_list);
110 
111       /* Check that the BO has gone idle.  If not, then we want to
112        * allocate something new instead, since we assume that the
113        * user will proceed to CPU map it and fill it with stuff.
114        */
115       if (!v3dv_bo_wait(device, bo, 0)) {
116          mtx_unlock(&cache->lock);
117          return NULL;
118       }
119 
120       bo_remove_from_cache(cache, bo);
121       bo->name = name;
122       p_atomic_set(&bo->refcnt, 1);
123    }
124    mtx_unlock(&cache->lock);
125    return bo;
126 }
127 
128 static bool
bo_free(struct v3dv_device * device,struct v3dv_bo * bo)129 bo_free(struct v3dv_device *device,
130         struct v3dv_bo *bo)
131 {
132    if (!bo)
133       return true;
134 
135    assert(p_atomic_read(&bo->refcnt) == 0);
136    assert(bo->map == NULL);
137 
138    if (!bo->is_import) {
139       device->bo_count--;
140       device->bo_size -= bo->size;
141 
142       if (dump_stats) {
143          fprintf(stderr, "Freed %s%s%dkb:\n",
144                  bo->name ? bo->name : "",
145                  bo->name ? " " : "",
146                  bo->size / 1024);
147          bo_dump_stats(device);
148       }
149    }
150 
151    uint32_t handle = bo->handle;
152    /* Our BO structs are stored in a sparse array in the physical device,
153     * so we don't want to free the BO pointer, instead we want to reset it
154     * to 0, to signal that array entry as being free.
155     *
156     * We must do the reset before we actually free the BO in the kernel, since
157     * otherwise there is a chance the application creates another BO in a
158     * different thread and gets the same array entry, causing a race.
159     */
160    memset(bo, 0, sizeof(*bo));
161 
162    struct drm_gem_close c;
163    memset(&c, 0, sizeof(c));
164    c.handle = handle;
165    int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
166    if (ret != 0)
167       fprintf(stderr, "close object %d: %s\n", handle, strerror(errno));
168 
169    return ret == 0;
170 }
171 
172 static void
bo_cache_free_all(struct v3dv_device * device,bool with_lock)173 bo_cache_free_all(struct v3dv_device *device,
174                        bool with_lock)
175 {
176    struct v3dv_bo_cache *cache = &device->bo_cache;
177 
178    if (with_lock)
179       mtx_lock(&cache->lock);
180    list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
181                             time_list) {
182       bo_remove_from_cache(cache, bo);
183       bo_free(device, bo);
184    }
185    if (with_lock)
186       mtx_unlock(&cache->lock);
187 
188 }
189 
190 void
v3dv_bo_init(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,const char * name,bool private)191 v3dv_bo_init(struct v3dv_bo *bo,
192              uint32_t handle,
193              uint32_t size,
194              uint32_t offset,
195              const char *name,
196              bool private)
197 {
198    p_atomic_set(&bo->refcnt, 1);
199    bo->handle = handle;
200    bo->handle_bit = 1ull << (handle % 64);
201    bo->size = size;
202    bo->offset = offset;
203    bo->map = NULL;
204    bo->map_size = 0;
205    bo->name = name;
206    bo->private = private;
207    bo->dumb_handle = -1;
208    bo->is_import = false;
209    bo->cl_branch_offset = 0xffffffff;
210    list_inithead(&bo->list_link);
211 }
212 
213 void
v3dv_bo_init_import(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,bool private)214 v3dv_bo_init_import(struct v3dv_bo *bo,
215                     uint32_t handle,
216                     uint32_t size,
217                     uint32_t offset,
218                     bool private)
219 {
220    v3dv_bo_init(bo, handle, size, offset, "import", private);
221    bo->is_import = true;
222 }
223 
224 struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device * device,uint32_t size,const char * name,bool private)225 v3dv_bo_alloc(struct v3dv_device *device,
226               uint32_t size,
227               const char *name,
228               bool private)
229 {
230    struct v3dv_bo *bo;
231 
232    const uint32_t page_align = 4096; /* Always allocate full pages */
233    size = align(size, page_align);
234 
235    if (private) {
236       bo = bo_from_cache(device, size, name);
237       if (bo) {
238          if (dump_stats) {
239             fprintf(stderr, "Allocated %s %dkb from cache:\n",
240                     name, size / 1024);
241             bo_dump_stats(device);
242          }
243          return bo;
244       }
245    }
246 
247  retry:
248    ;
249 
250    bool cleared_and_retried = false;
251    struct drm_v3d_create_bo create = {
252       .size = size
253    };
254 
255    int ret = v3dv_ioctl(device->pdevice->render_fd,
256                         DRM_IOCTL_V3D_CREATE_BO, &create);
257    if (ret != 0) {
258       if (!list_is_empty(&device->bo_cache.time_list) &&
259           !cleared_and_retried) {
260          cleared_and_retried = true;
261          bo_cache_free_all(device, true);
262          goto retry;
263       }
264 
265       fprintf(stderr, "Failed to allocate device memory for BO\n");
266       return NULL;
267    }
268 
269    assert(create.offset % page_align == 0);
270    assert((create.offset & 0xffffffff) == create.offset);
271 
272    bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
273    assert(bo && bo->handle == 0);
274 
275    v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
276 
277    device->bo_count++;
278    device->bo_size += bo->size;
279    if (dump_stats) {
280       fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
281       bo_dump_stats(device);
282    }
283 
284    return bo;
285 }
286 
287 bool
v3dv_bo_map_unsynchronized(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)288 v3dv_bo_map_unsynchronized(struct v3dv_device *device,
289                            struct v3dv_bo *bo,
290                            uint32_t size)
291 {
292    assert(bo != NULL && size <= bo->size);
293 
294    if (bo->map)
295       return bo->map;
296 
297    struct drm_v3d_mmap_bo map;
298    memset(&map, 0, sizeof(map));
299    map.handle = bo->handle;
300    int ret = v3dv_ioctl(device->pdevice->render_fd,
301                         DRM_IOCTL_V3D_MMAP_BO, &map);
302    if (ret != 0) {
303       fprintf(stderr, "map ioctl failure\n");
304       return false;
305    }
306 
307    bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
308                   device->pdevice->render_fd, map.offset);
309    if (bo->map == MAP_FAILED) {
310       fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
311               bo->handle, (long long)map.offset, (uint32_t)bo->size);
312       return false;
313    }
314    VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
315 
316    bo->map_size = size;
317 
318    return true;
319 }
320 
321 bool
v3dv_bo_wait(struct v3dv_device * device,struct v3dv_bo * bo,uint64_t timeout_ns)322 v3dv_bo_wait(struct v3dv_device *device,
323              struct v3dv_bo *bo,
324              uint64_t timeout_ns)
325 {
326    struct drm_v3d_wait_bo wait = {
327       .handle = bo->handle,
328       .timeout_ns = timeout_ns,
329    };
330    return v3dv_ioctl(device->pdevice->render_fd,
331                      DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
332 }
333 
334 bool
v3dv_bo_map(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)335 v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
336 {
337    assert(bo && size <= bo->size);
338 
339    bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
340    if (!ok)
341       return false;
342 
343    ok = v3dv_bo_wait(device, bo, OS_TIMEOUT_INFINITE);
344    if (!ok) {
345       fprintf(stderr, "memory wait for map failed\n");
346       return false;
347    }
348 
349    return true;
350 }
351 
352 void
v3dv_bo_unmap(struct v3dv_device * device,struct v3dv_bo * bo)353 v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
354 {
355    assert(bo && bo->map && bo->map_size > 0);
356 
357    munmap(bo->map, bo->map_size);
358    VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
359    bo->map = NULL;
360    bo->map_size = 0;
361 }
362 
363 static bool
reallocate_size_list(struct v3dv_bo_cache * cache,struct v3dv_device * device,uint32_t size)364 reallocate_size_list(struct v3dv_bo_cache *cache,
365                      struct v3dv_device *device,
366                      uint32_t size)
367 {
368    struct list_head *new_list =
369       vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
370                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
371 
372    if (!new_list) {
373       fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
374       return false;
375    }
376    struct list_head *old_list = cache->size_list;
377 
378    /* Move old list contents over (since the array has moved, and
379     * therefore the pointers to the list heads have to change).
380     */
381    for (int i = 0; i < cache->size_list_size; i++) {
382       struct list_head *old_head = &cache->size_list[i];
383       if (list_is_empty(old_head)) {
384          list_inithead(&new_list[i]);
385       } else {
386          new_list[i].next = old_head->next;
387          new_list[i].prev = old_head->prev;
388          new_list[i].next->prev = &new_list[i];
389          new_list[i].prev->next = &new_list[i];
390       }
391    }
392    for (int i = cache->size_list_size; i < size; i++)
393       list_inithead(&new_list[i]);
394 
395    cache->size_list = new_list;
396    cache->size_list_size = size;
397    vk_free(&device->vk.alloc, old_list);
398 
399    return true;
400 }
401 
402 void
v3dv_bo_cache_init(struct v3dv_device * device)403 v3dv_bo_cache_init(struct v3dv_device *device)
404 {
405    device->bo_size = 0;
406    device->bo_count = 0;
407    list_inithead(&device->bo_cache.time_list);
408    /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
409     * reallocations
410     */
411    device->bo_cache.size_list_size = 0;
412 
413    const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
414    if (max_cache_size_str == NULL)
415       device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
416    else
417       device->bo_cache.max_cache_size = atoll(max_cache_size_str);
418 
419    if (dump_stats) {
420       fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
421    }
422 
423    mtx_lock(&device->bo_cache.lock);
424    device->bo_cache.max_cache_size *= 1024 * 1024;
425    device->bo_cache.cache_count = 0;
426    device->bo_cache.cache_size = 0;
427    mtx_unlock(&device->bo_cache.lock);
428 }
429 
430 void
v3dv_bo_cache_destroy(struct v3dv_device * device)431 v3dv_bo_cache_destroy(struct v3dv_device *device)
432 {
433    bo_cache_free_all(device, true);
434    vk_free(&device->vk.alloc, device->bo_cache.size_list);
435 
436    if (dump_stats) {
437       fprintf(stderr, "BO stats after screen destroy:\n");
438       bo_dump_stats(device);
439    }
440 }
441 
442 
443 static void
free_stale_bos(struct v3dv_device * device,time_t time)444 free_stale_bos(struct v3dv_device *device,
445                time_t time)
446 {
447    struct v3dv_bo_cache *cache = &device->bo_cache;
448    bool freed_any = false;
449 
450    list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
451                             time_list) {
452       /* If it's more than a second old, free it. */
453       if (time - bo->free_time > 2) {
454          if (dump_stats && !freed_any) {
455             fprintf(stderr, "Freeing stale BOs:\n");
456             bo_dump_stats(device);
457             freed_any = true;
458          }
459 
460          bo_remove_from_cache(cache, bo);
461          bo_free(device, bo);
462       } else {
463          break;
464       }
465    }
466 
467    if (dump_stats && freed_any) {
468       fprintf(stderr, "Freed stale BOs:\n");
469       bo_dump_stats(device);
470    }
471 }
472 
473 bool
v3dv_bo_free(struct v3dv_device * device,struct v3dv_bo * bo)474 v3dv_bo_free(struct v3dv_device *device,
475              struct v3dv_bo *bo)
476 {
477    if (!bo)
478       return true;
479 
480    if (!p_atomic_dec_zero(&bo->refcnt))
481       return true;
482 
483    if (bo->map)
484       v3dv_bo_unmap(device, bo);
485 
486    struct timespec time;
487    struct v3dv_bo_cache *cache = &device->bo_cache;
488    uint32_t page_index = bo->size / 4096 - 1;
489 
490    if (bo->private &&
491        bo->size > cache->max_cache_size - cache->cache_size) {
492       clock_gettime(CLOCK_MONOTONIC, &time);
493       mtx_lock(&cache->lock);
494       free_stale_bos(device, time.tv_sec);
495       mtx_unlock(&cache->lock);
496    }
497 
498    if (!bo->private ||
499        bo->size > cache->max_cache_size - cache->cache_size) {
500       return bo_free(device, bo);
501    }
502 
503    clock_gettime(CLOCK_MONOTONIC, &time);
504    mtx_lock(&cache->lock);
505 
506    if (cache->size_list_size <= page_index) {
507       if (!reallocate_size_list(cache, device, page_index + 1)) {
508          bool outcome = bo_free(device, bo);
509          /* If the reallocation failed, it usually means that we are out of
510           * memory, so we also free all the bo cache. We need to call it to
511           * not use the cache lock, as we are already under it.
512           */
513          bo_cache_free_all(device, false);
514          mtx_unlock(&cache->lock);
515          return outcome;
516       }
517    }
518 
519    bo->free_time = time.tv_sec;
520    list_addtail(&bo->size_list, &cache->size_list[page_index]);
521    list_addtail(&bo->time_list, &cache->time_list);
522 
523    cache->cache_count++;
524    cache->cache_size += bo->size;
525 
526    if (dump_stats) {
527       fprintf(stderr, "Freed %s %dkb to cache:\n",
528               bo->name, bo->size / 1024);
529       bo_dump_stats(device);
530    }
531    bo->name = NULL;
532 
533    free_stale_bos(device, time.tv_sec);
534 
535    mtx_unlock(&cache->lock);
536 
537    return true;
538 }
539