1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25
26 #include <errno.h>
27 #include <sys/mman.h>
28
29 #include "drm-uapi/v3d_drm.h"
30 #include "util/u_memory.h"
31
32 /* Default max size of the bo cache, in MB.
33 *
34 * This value comes from testing different Vulkan application. Greater values
35 * didn't get any further performance benefit. This looks somewhat small, but
36 * from testing those applications, the main consumer of the bo cache are
37 * the bos used for the CLs, that are usually small.
38 */
39 #define DEFAULT_MAX_BO_CACHE_SIZE 64
40
41 /* Discarded to use a V3D_DEBUG for this, as it would mean adding a run-time
42 * check for most of the calls
43 */
44 static const bool dump_stats = false;
45
46 static void
bo_dump_stats(struct v3dv_device * device)47 bo_dump_stats(struct v3dv_device *device)
48 {
49 struct v3dv_bo_cache *cache = &device->bo_cache;
50
51 fprintf(stderr, " BOs allocated: %d\n", device->bo_count);
52 fprintf(stderr, " BOs size: %dkb\n", device->bo_size / 1024);
53 fprintf(stderr, " BOs cached: %d\n", cache->cache_count);
54 fprintf(stderr, " BOs cached size: %dkb\n", cache->cache_size / 1024);
55
56 if (!list_is_empty(&cache->time_list)) {
57 struct v3dv_bo *first = list_first_entry(&cache->time_list,
58 struct v3dv_bo,
59 time_list);
60 struct v3dv_bo *last = list_last_entry(&cache->time_list,
61 struct v3dv_bo,
62 time_list);
63
64 fprintf(stderr, " oldest cache time: %ld\n",
65 (long)first->free_time);
66 fprintf(stderr, " newest cache time: %ld\n",
67 (long)last->free_time);
68
69 struct timespec time;
70 clock_gettime(CLOCK_MONOTONIC, &time);
71 fprintf(stderr, " now: %lld\n",
72 (long long)time.tv_sec);
73 }
74
75 if (cache->size_list_size) {
76 uint32_t empty_size_list = 0;
77 for (uint32_t i = 0; i < cache->size_list_size; i++) {
78 if (list_is_empty(&cache->size_list[i]))
79 empty_size_list++;
80 }
81 fprintf(stderr, " Empty size_list lists: %d\n", empty_size_list);
82 }
83 }
84
85 static void
bo_remove_from_cache(struct v3dv_bo_cache * cache,struct v3dv_bo * bo)86 bo_remove_from_cache(struct v3dv_bo_cache *cache, struct v3dv_bo *bo)
87 {
88 list_del(&bo->time_list);
89 list_del(&bo->size_list);
90
91 cache->cache_count--;
92 cache->cache_size -= bo->size;
93 }
94
95 static struct v3dv_bo *
bo_from_cache(struct v3dv_device * device,uint32_t size,const char * name)96 bo_from_cache(struct v3dv_device *device, uint32_t size, const char *name)
97 {
98 struct v3dv_bo_cache *cache = &device->bo_cache;
99 uint32_t page_index = size / 4096 - 1;
100
101 if (cache->size_list_size <= page_index)
102 return NULL;
103
104 struct v3dv_bo *bo = NULL;
105
106 mtx_lock(&cache->lock);
107 if (!list_is_empty(&cache->size_list[page_index])) {
108 bo = list_first_entry(&cache->size_list[page_index],
109 struct v3dv_bo, size_list);
110
111 /* Check that the BO has gone idle. If not, then we want to
112 * allocate something new instead, since we assume that the
113 * user will proceed to CPU map it and fill it with stuff.
114 */
115 if (!v3dv_bo_wait(device, bo, 0)) {
116 mtx_unlock(&cache->lock);
117 return NULL;
118 }
119
120 bo_remove_from_cache(cache, bo);
121 bo->name = name;
122 p_atomic_set(&bo->refcnt, 1);
123 }
124 mtx_unlock(&cache->lock);
125 return bo;
126 }
127
128 static bool
bo_free(struct v3dv_device * device,struct v3dv_bo * bo)129 bo_free(struct v3dv_device *device,
130 struct v3dv_bo *bo)
131 {
132 if (!bo)
133 return true;
134
135 assert(p_atomic_read(&bo->refcnt) == 0);
136 assert(bo->map == NULL);
137
138 if (!bo->is_import) {
139 device->bo_count--;
140 device->bo_size -= bo->size;
141
142 if (dump_stats) {
143 fprintf(stderr, "Freed %s%s%dkb:\n",
144 bo->name ? bo->name : "",
145 bo->name ? " " : "",
146 bo->size / 1024);
147 bo_dump_stats(device);
148 }
149 }
150
151 uint32_t handle = bo->handle;
152 /* Our BO structs are stored in a sparse array in the physical device,
153 * so we don't want to free the BO pointer, instead we want to reset it
154 * to 0, to signal that array entry as being free.
155 *
156 * We must do the reset before we actually free the BO in the kernel, since
157 * otherwise there is a chance the application creates another BO in a
158 * different thread and gets the same array entry, causing a race.
159 */
160 memset(bo, 0, sizeof(*bo));
161
162 struct drm_gem_close c;
163 memset(&c, 0, sizeof(c));
164 c.handle = handle;
165 int ret = v3dv_ioctl(device->pdevice->render_fd, DRM_IOCTL_GEM_CLOSE, &c);
166 if (ret != 0)
167 fprintf(stderr, "close object %d: %s\n", handle, strerror(errno));
168
169 return ret == 0;
170 }
171
172 static void
bo_cache_free_all(struct v3dv_device * device,bool with_lock)173 bo_cache_free_all(struct v3dv_device *device,
174 bool with_lock)
175 {
176 struct v3dv_bo_cache *cache = &device->bo_cache;
177
178 if (with_lock)
179 mtx_lock(&cache->lock);
180 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
181 time_list) {
182 bo_remove_from_cache(cache, bo);
183 bo_free(device, bo);
184 }
185 if (with_lock)
186 mtx_unlock(&cache->lock);
187
188 }
189
190 void
v3dv_bo_init(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,const char * name,bool private)191 v3dv_bo_init(struct v3dv_bo *bo,
192 uint32_t handle,
193 uint32_t size,
194 uint32_t offset,
195 const char *name,
196 bool private)
197 {
198 p_atomic_set(&bo->refcnt, 1);
199 bo->handle = handle;
200 bo->handle_bit = 1ull << (handle % 64);
201 bo->size = size;
202 bo->offset = offset;
203 bo->map = NULL;
204 bo->map_size = 0;
205 bo->name = name;
206 bo->private = private;
207 bo->dumb_handle = -1;
208 bo->is_import = false;
209 bo->cl_branch_offset = 0xffffffff;
210 list_inithead(&bo->list_link);
211 }
212
213 void
v3dv_bo_init_import(struct v3dv_bo * bo,uint32_t handle,uint32_t size,uint32_t offset,bool private)214 v3dv_bo_init_import(struct v3dv_bo *bo,
215 uint32_t handle,
216 uint32_t size,
217 uint32_t offset,
218 bool private)
219 {
220 v3dv_bo_init(bo, handle, size, offset, "import", private);
221 bo->is_import = true;
222 }
223
224 struct v3dv_bo *
v3dv_bo_alloc(struct v3dv_device * device,uint32_t size,const char * name,bool private)225 v3dv_bo_alloc(struct v3dv_device *device,
226 uint32_t size,
227 const char *name,
228 bool private)
229 {
230 struct v3dv_bo *bo;
231
232 const uint32_t page_align = 4096; /* Always allocate full pages */
233 size = align(size, page_align);
234
235 if (private) {
236 bo = bo_from_cache(device, size, name);
237 if (bo) {
238 if (dump_stats) {
239 fprintf(stderr, "Allocated %s %dkb from cache:\n",
240 name, size / 1024);
241 bo_dump_stats(device);
242 }
243 return bo;
244 }
245 }
246
247 retry:
248 ;
249
250 bool cleared_and_retried = false;
251 struct drm_v3d_create_bo create = {
252 .size = size
253 };
254
255 int ret = v3dv_ioctl(device->pdevice->render_fd,
256 DRM_IOCTL_V3D_CREATE_BO, &create);
257 if (ret != 0) {
258 if (!list_is_empty(&device->bo_cache.time_list) &&
259 !cleared_and_retried) {
260 cleared_and_retried = true;
261 bo_cache_free_all(device, true);
262 goto retry;
263 }
264
265 fprintf(stderr, "Failed to allocate device memory for BO\n");
266 return NULL;
267 }
268
269 assert(create.offset % page_align == 0);
270 assert((create.offset & 0xffffffff) == create.offset);
271
272 bo = v3dv_device_lookup_bo(device->pdevice, create.handle);
273 assert(bo && bo->handle == 0);
274
275 v3dv_bo_init(bo, create.handle, size, create.offset, name, private);
276
277 device->bo_count++;
278 device->bo_size += bo->size;
279 if (dump_stats) {
280 fprintf(stderr, "Allocated %s %dkb:\n", name, size / 1024);
281 bo_dump_stats(device);
282 }
283
284 return bo;
285 }
286
287 bool
v3dv_bo_map_unsynchronized(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)288 v3dv_bo_map_unsynchronized(struct v3dv_device *device,
289 struct v3dv_bo *bo,
290 uint32_t size)
291 {
292 assert(bo != NULL && size <= bo->size);
293
294 if (bo->map)
295 return bo->map;
296
297 struct drm_v3d_mmap_bo map;
298 memset(&map, 0, sizeof(map));
299 map.handle = bo->handle;
300 int ret = v3dv_ioctl(device->pdevice->render_fd,
301 DRM_IOCTL_V3D_MMAP_BO, &map);
302 if (ret != 0) {
303 fprintf(stderr, "map ioctl failure\n");
304 return false;
305 }
306
307 bo->map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
308 device->pdevice->render_fd, map.offset);
309 if (bo->map == MAP_FAILED) {
310 fprintf(stderr, "mmap of bo %d (offset 0x%016llx, size %d) failed\n",
311 bo->handle, (long long)map.offset, (uint32_t)bo->size);
312 return false;
313 }
314 VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, false));
315
316 bo->map_size = size;
317
318 return true;
319 }
320
321 bool
v3dv_bo_wait(struct v3dv_device * device,struct v3dv_bo * bo,uint64_t timeout_ns)322 v3dv_bo_wait(struct v3dv_device *device,
323 struct v3dv_bo *bo,
324 uint64_t timeout_ns)
325 {
326 struct drm_v3d_wait_bo wait = {
327 .handle = bo->handle,
328 .timeout_ns = timeout_ns,
329 };
330 return v3dv_ioctl(device->pdevice->render_fd,
331 DRM_IOCTL_V3D_WAIT_BO, &wait) == 0;
332 }
333
334 bool
v3dv_bo_map(struct v3dv_device * device,struct v3dv_bo * bo,uint32_t size)335 v3dv_bo_map(struct v3dv_device *device, struct v3dv_bo *bo, uint32_t size)
336 {
337 assert(bo && size <= bo->size);
338
339 bool ok = v3dv_bo_map_unsynchronized(device, bo, size);
340 if (!ok)
341 return false;
342
343 ok = v3dv_bo_wait(device, bo, OS_TIMEOUT_INFINITE);
344 if (!ok) {
345 fprintf(stderr, "memory wait for map failed\n");
346 return false;
347 }
348
349 return true;
350 }
351
352 void
v3dv_bo_unmap(struct v3dv_device * device,struct v3dv_bo * bo)353 v3dv_bo_unmap(struct v3dv_device *device, struct v3dv_bo *bo)
354 {
355 assert(bo && bo->map && bo->map_size > 0);
356
357 munmap(bo->map, bo->map_size);
358 VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
359 bo->map = NULL;
360 bo->map_size = 0;
361 }
362
363 static bool
reallocate_size_list(struct v3dv_bo_cache * cache,struct v3dv_device * device,uint32_t size)364 reallocate_size_list(struct v3dv_bo_cache *cache,
365 struct v3dv_device *device,
366 uint32_t size)
367 {
368 struct list_head *new_list =
369 vk_alloc(&device->vk.alloc, sizeof(struct list_head) * size, 8,
370 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
371
372 if (!new_list) {
373 fprintf(stderr, "Failed to allocate host memory for cache bo list\n");
374 return false;
375 }
376 struct list_head *old_list = cache->size_list;
377
378 /* Move old list contents over (since the array has moved, and
379 * therefore the pointers to the list heads have to change).
380 */
381 for (int i = 0; i < cache->size_list_size; i++) {
382 struct list_head *old_head = &cache->size_list[i];
383 if (list_is_empty(old_head)) {
384 list_inithead(&new_list[i]);
385 } else {
386 new_list[i].next = old_head->next;
387 new_list[i].prev = old_head->prev;
388 new_list[i].next->prev = &new_list[i];
389 new_list[i].prev->next = &new_list[i];
390 }
391 }
392 for (int i = cache->size_list_size; i < size; i++)
393 list_inithead(&new_list[i]);
394
395 cache->size_list = new_list;
396 cache->size_list_size = size;
397 vk_free(&device->vk.alloc, old_list);
398
399 return true;
400 }
401
402 void
v3dv_bo_cache_init(struct v3dv_device * device)403 v3dv_bo_cache_init(struct v3dv_device *device)
404 {
405 device->bo_size = 0;
406 device->bo_count = 0;
407 list_inithead(&device->bo_cache.time_list);
408 /* FIXME: perhaps set a initial size for the size-list, to avoid run-time
409 * reallocations
410 */
411 device->bo_cache.size_list_size = 0;
412
413 const char *max_cache_size_str = getenv("V3DV_MAX_BO_CACHE_SIZE");
414 if (max_cache_size_str == NULL)
415 device->bo_cache.max_cache_size = DEFAULT_MAX_BO_CACHE_SIZE;
416 else
417 device->bo_cache.max_cache_size = atoll(max_cache_size_str);
418
419 if (dump_stats) {
420 fprintf(stderr, "MAX BO CACHE SIZE: %iMB\n", device->bo_cache.max_cache_size);
421 }
422
423 mtx_lock(&device->bo_cache.lock);
424 device->bo_cache.max_cache_size *= 1024 * 1024;
425 device->bo_cache.cache_count = 0;
426 device->bo_cache.cache_size = 0;
427 mtx_unlock(&device->bo_cache.lock);
428 }
429
430 void
v3dv_bo_cache_destroy(struct v3dv_device * device)431 v3dv_bo_cache_destroy(struct v3dv_device *device)
432 {
433 bo_cache_free_all(device, true);
434 vk_free(&device->vk.alloc, device->bo_cache.size_list);
435
436 if (dump_stats) {
437 fprintf(stderr, "BO stats after screen destroy:\n");
438 bo_dump_stats(device);
439 }
440 }
441
442
443 static void
free_stale_bos(struct v3dv_device * device,time_t time)444 free_stale_bos(struct v3dv_device *device,
445 time_t time)
446 {
447 struct v3dv_bo_cache *cache = &device->bo_cache;
448 bool freed_any = false;
449
450 list_for_each_entry_safe(struct v3dv_bo, bo, &cache->time_list,
451 time_list) {
452 /* If it's more than a second old, free it. */
453 if (time - bo->free_time > 2) {
454 if (dump_stats && !freed_any) {
455 fprintf(stderr, "Freeing stale BOs:\n");
456 bo_dump_stats(device);
457 freed_any = true;
458 }
459
460 bo_remove_from_cache(cache, bo);
461 bo_free(device, bo);
462 } else {
463 break;
464 }
465 }
466
467 if (dump_stats && freed_any) {
468 fprintf(stderr, "Freed stale BOs:\n");
469 bo_dump_stats(device);
470 }
471 }
472
473 bool
v3dv_bo_free(struct v3dv_device * device,struct v3dv_bo * bo)474 v3dv_bo_free(struct v3dv_device *device,
475 struct v3dv_bo *bo)
476 {
477 if (!bo)
478 return true;
479
480 if (!p_atomic_dec_zero(&bo->refcnt))
481 return true;
482
483 if (bo->map)
484 v3dv_bo_unmap(device, bo);
485
486 struct timespec time;
487 struct v3dv_bo_cache *cache = &device->bo_cache;
488 uint32_t page_index = bo->size / 4096 - 1;
489
490 if (bo->private &&
491 bo->size > cache->max_cache_size - cache->cache_size) {
492 clock_gettime(CLOCK_MONOTONIC, &time);
493 mtx_lock(&cache->lock);
494 free_stale_bos(device, time.tv_sec);
495 mtx_unlock(&cache->lock);
496 }
497
498 if (!bo->private ||
499 bo->size > cache->max_cache_size - cache->cache_size) {
500 return bo_free(device, bo);
501 }
502
503 clock_gettime(CLOCK_MONOTONIC, &time);
504 mtx_lock(&cache->lock);
505
506 if (cache->size_list_size <= page_index) {
507 if (!reallocate_size_list(cache, device, page_index + 1)) {
508 bool outcome = bo_free(device, bo);
509 /* If the reallocation failed, it usually means that we are out of
510 * memory, so we also free all the bo cache. We need to call it to
511 * not use the cache lock, as we are already under it.
512 */
513 bo_cache_free_all(device, false);
514 mtx_unlock(&cache->lock);
515 return outcome;
516 }
517 }
518
519 bo->free_time = time.tv_sec;
520 list_addtail(&bo->size_list, &cache->size_list[page_index]);
521 list_addtail(&bo->time_list, &cache->time_list);
522
523 cache->cache_count++;
524 cache->cache_size += bo->size;
525
526 if (dump_stats) {
527 fprintf(stderr, "Freed %s %dkb to cache:\n",
528 bo->name, bo->size / 1024);
529 bo_dump_stats(device);
530 }
531 bo->name = NULL;
532
533 free_stale_bos(device, time.tv_sec);
534
535 mtx_unlock(&cache->lock);
536
537 return true;
538 }
539