1 /*
2 * Copyright © 2012-2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "freedreno_drmif.h"
10 #include "freedreno_drm_perfetto.h"
11 #include "freedreno_priv.h"
12
13 #define FD_BO_CACHE_STATS 0
14
15 #define BO_CACHE_LOG(cache, fmt, ...) do { \
16 if (FD_BO_CACHE_STATS) { \
17 mesa_logi("%s: "fmt, (cache)->name, ##__VA_ARGS__); \
18 } \
19 } while (0)
20
21 static void
bo_remove_from_bucket(struct fd_bo_bucket * bucket,struct fd_bo * bo)22 bo_remove_from_bucket(struct fd_bo_bucket *bucket, struct fd_bo *bo)
23 {
24 list_delinit(&bo->node);
25 bucket->count--;
26 }
27
28 static void
dump_cache_stats(struct fd_bo_cache * cache)29 dump_cache_stats(struct fd_bo_cache *cache)
30 {
31 if (!FD_BO_CACHE_STATS)
32 return;
33
34 static int cnt;
35
36 if ((++cnt % 32))
37 return;
38
39 int size = 0;
40 int count = 0;
41 int hits = 0;
42 int misses = 0;
43 int expired = 0;
44
45 for (int i = 0; i < cache->num_buckets; i++) {
46 char *state = "";
47
48 struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
49
50 if (bucket->count > 0) {
51 struct fd_bo *bo = first_bo(&bucket->list);
52 if (fd_bo_state(bo) == FD_BO_STATE_IDLE)
53 state = " (idle)";
54 }
55
56 BO_CACHE_LOG(cache, "bucket[%u]: count=%d\thits=%d\tmisses=%d\texpired=%d%s",
57 bucket->size, bucket->count, bucket->hits,
58 bucket->misses, bucket->expired, state);
59
60 size += bucket->size * bucket->count;
61 count += bucket->count;
62 hits += bucket->hits;
63 misses += bucket->misses;
64 expired += bucket->expired;
65 }
66
67 BO_CACHE_LOG(cache, "size=%d\tcount=%d\thits=%d\tmisses=%d\texpired=%d",
68 size, count, hits, misses, expired);
69 }
70
71 static void
add_bucket(struct fd_bo_cache * cache,int size)72 add_bucket(struct fd_bo_cache *cache, int size)
73 {
74 unsigned int i = cache->num_buckets;
75 struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
76
77 assert(i < ARRAY_SIZE(cache->cache_bucket));
78
79 list_inithead(&bucket->list);
80 bucket->size = size;
81 bucket->count = 0;
82 bucket->hits = 0;
83 bucket->misses = 0;
84 bucket->expired = 0;
85 cache->num_buckets++;
86 }
87
88 /**
89 * @coarse: if true, only power-of-two bucket sizes, otherwise
90 * fill in for a bit smoother size curve..
91 */
92 void
fd_bo_cache_init(struct fd_bo_cache * cache,int coarse,const char * name)93 fd_bo_cache_init(struct fd_bo_cache *cache, int coarse, const char *name)
94 {
95 unsigned long size, cache_max_size = 64 * 1024 * 1024;
96
97 cache->name = name;
98 simple_mtx_init(&cache->lock, mtx_plain);
99
100 /* OK, so power of two buckets was too wasteful of memory.
101 * Give 3 other sizes between each power of two, to hopefully
102 * cover things accurately enough. (The alternative is
103 * probably to just go for exact matching of sizes, and assume
104 * that for things like composited window resize the tiled
105 * width/height alignment and rounding of sizes to pages will
106 * get us useful cache hit rates anyway)
107 */
108 add_bucket(cache, os_page_size);
109 add_bucket(cache, os_page_size * 2);
110 if (!coarse)
111 add_bucket(cache, os_page_size * 3);
112
113 /* Initialize the linked lists for BO reuse cache. */
114 for (size = 4 * os_page_size; size <= cache_max_size; size *= 2) {
115 add_bucket(cache, size);
116 if (!coarse) {
117 add_bucket(cache, size + size * 1 / 4);
118 add_bucket(cache, size + size * 2 / 4);
119 add_bucket(cache, size + size * 3 / 4);
120 }
121 }
122 }
123
124 /* Frees older cached buffers. Called under table_lock */
125 void
fd_bo_cache_cleanup(struct fd_bo_cache * cache,time_t time)126 fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
127 {
128 int i, cnt = 0;
129
130 if (cache->time == time)
131 return;
132
133 struct list_head freelist;
134
135 list_inithead(&freelist);
136
137 simple_mtx_lock(&cache->lock);
138 for (i = 0; i < cache->num_buckets; i++) {
139 struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
140 struct fd_bo *bo;
141
142 while (!list_is_empty(&bucket->list)) {
143 bo = first_bo(&bucket->list);
144
145 /* keep things in cache for at least 1 second: */
146 if (time && ((time - bo->free_time) <= 1))
147 break;
148
149 if (cnt == 0) {
150 BO_CACHE_LOG(cache, "cache cleanup");
151 dump_cache_stats(cache);
152 }
153
154 VG_BO_OBTAIN(bo);
155 bo_remove_from_bucket(bucket, bo);
156 bucket->expired++;
157 list_addtail(&bo->node, &freelist);
158 fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_NONE);
159
160 cnt++;
161 }
162 }
163 simple_mtx_unlock(&cache->lock);
164
165 fd_bo_del_list_nocache(&freelist);
166
167 if (cnt > 0) {
168 BO_CACHE_LOG(cache, "cache cleaned %u BOs", cnt);
169 dump_cache_stats(cache);
170 }
171
172 cache->time = time;
173 }
174
175 static struct fd_bo_bucket *
get_bucket(struct fd_bo_cache * cache,uint32_t size)176 get_bucket(struct fd_bo_cache *cache, uint32_t size)
177 {
178 int i;
179
180 /* hmm, this is what intel does, but I suppose we could calculate our
181 * way to the correct bucket size rather than looping..
182 */
183 for (i = 0; i < cache->num_buckets; i++) {
184 struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
185 if (bucket->size >= size) {
186 return bucket;
187 }
188 }
189
190 return NULL;
191 }
192
193 static struct fd_bo *
find_in_bucket(struct fd_bo_bucket * bucket,uint32_t flags)194 find_in_bucket(struct fd_bo_bucket *bucket, uint32_t flags)
195 {
196 struct fd_bo *bo = NULL;
197
198 /* TODO .. if we had an ALLOC_FOR_RENDER flag like intel, we could
199 * skip the busy check.. if it is only going to be a render target
200 * then we probably don't need to stall..
201 *
202 * NOTE that intel takes ALLOC_FOR_RENDER bo's from the list tail
203 * (MRU, since likely to be in GPU cache), rather than head (LRU)..
204 */
205 foreach_bo (entry, &bucket->list) {
206 if (fd_bo_state(entry) != FD_BO_STATE_IDLE) {
207 break;
208 }
209 if (entry->alloc_flags == flags) {
210 bo = entry;
211 bo_remove_from_bucket(bucket, bo);
212 break;
213 }
214 }
215
216 return bo;
217 }
218
219 /* NOTE: size is potentially rounded up to bucket size: */
220 struct fd_bo *
fd_bo_cache_alloc(struct fd_bo_cache * cache,uint32_t * size,uint32_t flags)221 fd_bo_cache_alloc(struct fd_bo_cache *cache, uint32_t *size, uint32_t flags)
222 {
223 struct fd_bo *bo = NULL;
224 struct fd_bo_bucket *bucket;
225
226 *size = align(*size, os_page_size);
227 bucket = get_bucket(cache, *size);
228
229 struct list_head freelist;
230
231 list_inithead(&freelist);
232
233 /* see if we can be green and recycle: */
234 retry:
235 if (bucket) {
236 *size = bucket->size;
237 simple_mtx_lock(&cache->lock);
238 bo = find_in_bucket(bucket, flags);
239 simple_mtx_unlock(&cache->lock);
240 if (bo) {
241 VG_BO_OBTAIN(bo);
242 if (bo->funcs->madvise(bo, true) <= 0) {
243 /* we've lost the backing pages, delete and try again: */
244 list_addtail(&bo->node, &freelist);
245 fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_NONE);
246 goto retry;
247 }
248 p_atomic_set(&bo->refcnt, 1);
249 bo->reloc_flags = FD_RELOC_FLAGS_INIT;
250 bucket->hits++;
251 fd_alloc_log(bo, FD_ALLOC_CACHE, FD_ALLOC_ACTIVE);
252 return bo;
253 }
254 bucket->misses++;
255 }
256
257 fd_bo_del_list_nocache(&freelist);
258
259 BO_CACHE_LOG(cache, "miss on size=%u, flags=0x%x, bucket=%u", *size, flags,
260 bucket ? bucket->size : 0);
261 dump_cache_stats(cache);
262
263 return NULL;
264 }
265
266 int
fd_bo_cache_free(struct fd_bo_cache * cache,struct fd_bo * bo)267 fd_bo_cache_free(struct fd_bo_cache *cache, struct fd_bo *bo)
268 {
269 if (bo->alloc_flags & (FD_BO_SHARED | _FD_BO_NOSYNC))
270 return -1;
271
272 struct fd_bo_bucket *bucket = get_bucket(cache, bo->size);
273
274 /* see if we can be green and recycle: */
275 if (bucket) {
276 struct timespec time;
277
278 bo->funcs->madvise(bo, false);
279
280 clock_gettime(CLOCK_MONOTONIC, &time);
281
282 bo->free_time = time.tv_sec;
283 VG_BO_RELEASE(bo);
284
285 simple_mtx_lock(&cache->lock);
286 list_addtail(&bo->node, &bucket->list);
287 bucket->count++;
288 simple_mtx_unlock(&cache->lock);
289
290 fd_alloc_log(bo, FD_ALLOC_ACTIVE, FD_ALLOC_CACHE);
291 fd_bo_cache_cleanup(cache, time.tv_sec);
292
293 return 0;
294 }
295
296 return -1;
297 }
298