xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/freedreno_batch_cache.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "util/hash_table.h"
10 #include "util/list.h"
11 #include "util/set.h"
12 #include "util/u_string.h"
13 #define XXH_INLINE_ALL
14 #include "util/xxhash.h"
15 
16 #include "freedreno_batch.h"
17 #include "freedreno_batch_cache.h"
18 #include "freedreno_context.h"
19 #include "freedreno_resource.h"
20 
21 /* Overview:
22  *
23  *   The batch cache provides lookup for mapping pipe_framebuffer_state
24  *   to a batch.
25  *
26  *   It does this via hashtable, with key that roughly matches the
27  *   pipe_framebuffer_state, as described below.
28  *
29  * Batch Cache hashtable key:
30  *
31  *   To serialize the key, and to avoid dealing with holding a reference to
32  *   pipe_surface's (which hold a reference to pipe_resource and complicate
33  *   the whole refcnting thing), the key is variable length and inline's the
34  *   pertinent details of the pipe_surface.
35  *
36  * Batch:
37  *
38  *   Each batch needs to hold a reference to each resource it depends on (ie.
39  *   anything that needs a mem2gmem).  And a weak reference to resources it
40  *   renders to.  (If both src[n] and dst[n] are not NULL then they are the
41  *   same.)
42  *
43  *   When a resource is destroyed, we need to remove entries in the batch
44  *   cache that reference the resource, to avoid dangling pointer issues.
45  *   So each resource holds a hashset of batches which have reference them
46  *   in their hashtable key.
47  *
48  *   When a batch has weak reference to no more resources (ie. all the
49  *   surfaces it rendered to are destroyed) the batch can be destroyed.
50  *   Could happen in an app that renders and never uses the result.  More
51  *   common scenario, I think, will be that some, but not all, of the
52  *   surfaces are destroyed before the batch is submitted.
53  *
54  *   If (for example), batch writes to zsbuf but that surface is destroyed
55  *   before batch is submitted, we can skip gmem2mem (but still need to
56  *   alloc gmem space as before.  If the batch depended on previous contents
57  *   of that surface, it would be holding a reference so the surface would
58  *   not have been destroyed.
59  */
60 
61 struct fd_batch_key {
62    uint32_t width;
63    uint32_t height;
64    uint16_t layers;
65    uint16_t samples;
66    uint16_t num_surfs;
67    uint16_t ctx_seqno;
68    struct {
69       struct pipe_resource *texture;
70       union pipe_surface_desc u;
71       uint8_t pos, samples;
72       uint16_t format;
73    } surf[0];
74 };
75 
76 static struct fd_batch_key *
key_alloc(unsigned num_surfs)77 key_alloc(unsigned num_surfs)
78 {
79    struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(
80       fd_batch_key, sizeof(key->surf[0]) * num_surfs);
81    return key;
82 }
83 
84 uint32_t
fd_batch_key_hash(const void * _key)85 fd_batch_key_hash(const void *_key)
86 {
87    const struct fd_batch_key *key = _key;
88    uint32_t hash = 0;
89    hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
90    hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);
91    return hash;
92 }
93 
94 bool
fd_batch_key_equals(const void * _a,const void * _b)95 fd_batch_key_equals(const void *_a, const void *_b)
96 {
97    const struct fd_batch_key *a = _a;
98    const struct fd_batch_key *b = _b;
99    return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
100           (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
101 }
102 
103 struct fd_batch_key *
fd_batch_key_clone(void * mem_ctx,const struct fd_batch_key * key)104 fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
105 {
106    unsigned sz =
107       sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
108    struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
109    memcpy(new_key, key, sz);
110    return new_key;
111 }
112 
113 void
fd_bc_init(struct fd_batch_cache * cache)114 fd_bc_init(struct fd_batch_cache *cache)
115 {
116    cache->ht =
117       _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
118 }
119 
120 void
fd_bc_fini(struct fd_batch_cache * cache)121 fd_bc_fini(struct fd_batch_cache *cache)
122 {
123    _mesa_hash_table_destroy(cache->ht, NULL);
124 }
125 
126 /* Find a batch that depends on last_batch (recursively if needed).
127  * The returned batch should not be depended on by any other batch.
128  */
129 static struct fd_batch *
find_dependee(struct fd_context * ctx,struct fd_batch * last_batch)130 find_dependee(struct fd_context *ctx, struct fd_batch *last_batch)
131    assert_dt
132 {
133    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
134    struct fd_batch *batch;
135 
136    foreach_batch (batch, cache, cache->batch_mask) {
137       if (batch->ctx == ctx && fd_batch_has_dep(batch, last_batch)) {
138          fd_batch_reference_locked(&last_batch, batch);
139          return find_dependee(ctx, last_batch);
140       }
141    }
142 
143    return last_batch;
144 }
145 
146 /* This returns the last batch to be flushed.  This is _approximately_ the
147  * last batch to be modified, but it could be a batch that depends on the
148  * last modified batch.
149  */
150 struct fd_batch *
fd_bc_last_batch(struct fd_context * ctx)151 fd_bc_last_batch(struct fd_context *ctx)
152 {
153    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
154    struct fd_batch *batch, *last_batch = NULL;
155 
156    fd_screen_lock(ctx->screen);
157 
158    foreach_batch (batch, cache, cache->batch_mask) {
159       if (batch->ctx == ctx) {
160          if (!last_batch ||
161              /* Note: fd_fence_before() handles rollover for us: */
162              fd_fence_before(last_batch->update_seqno, batch->update_seqno)) {
163             fd_batch_reference_locked(&last_batch, batch);
164          }
165       }
166    }
167 
168    if (last_batch)
169       last_batch = find_dependee(ctx, last_batch);
170 
171    fd_screen_unlock(ctx->screen);
172 
173    return last_batch;
174 }
175 
176 /* Make the current batch depend on all other batches.  So all other
177  * batches will be flushed before the current batch.
178  */
179 void
fd_bc_add_flush_deps(struct fd_context * ctx,struct fd_batch * last_batch)180 fd_bc_add_flush_deps(struct fd_context *ctx, struct fd_batch *last_batch)
181 {
182    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
183 
184    /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
185     * can cause batches to be unref'd and freed under our feet, so grab
186     * a reference to all the batches we need up-front.
187     */
188    struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
189    struct fd_batch *batch;
190    unsigned n = 0;
191 
192    assert(last_batch->ctx == ctx);
193 
194 #ifndef NDEBUG
195    struct fd_batch *tmp = fd_bc_last_batch(ctx);
196    assert(tmp == last_batch);
197    fd_batch_reference(&tmp, NULL);
198 #endif
199 
200    fd_screen_lock(ctx->screen);
201 
202    foreach_batch (batch, cache, cache->batch_mask) {
203       if (batch->ctx == ctx) {
204          fd_batch_reference_locked(&batches[n++], batch);
205       }
206    }
207 
208    for (unsigned i = 0; i < n; i++) {
209       if (batches[i] && (batches[i] != last_batch)) {
210          /* fd_bc_last_batch() should ensure that no other batch depends
211           * on last_batch.  This is needed to avoid dependency loop.
212           */
213          assert(!fd_batch_has_dep(batches[i], last_batch));
214 
215          fd_batch_add_dep(last_batch, batches[i]);
216       }
217    }
218 
219    fd_screen_unlock(ctx->screen);
220 
221    for (unsigned i = 0; i < n; i++) {
222       fd_batch_reference(&batches[i], NULL);
223    }
224 }
225 
226 /**
227  * Flushes the batch (if any) writing this resource.  Must not hold the screen
228  * lock.
229  */
230 void
fd_bc_flush_writer(struct fd_context * ctx,struct fd_resource * rsc)231 fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
232 {
233    fd_screen_lock(ctx->screen);
234    struct fd_batch *write_batch = NULL;
235    fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
236    fd_screen_unlock(ctx->screen);
237 
238    if (write_batch) {
239       if (write_batch->ctx == ctx)
240          fd_batch_flush(write_batch);
241       fd_batch_reference(&write_batch, NULL);
242    }
243 }
244 
245 /**
246  * Flushes any batches reading this resource.  Must not hold the screen lock.
247  */
248 void
fd_bc_flush_readers(struct fd_context * ctx,struct fd_resource * rsc)249 fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
250 {
251    struct fd_batch *batch, *batches[32] = {};
252    uint32_t batch_count = 0;
253 
254    /* This is a bit awkward, probably a fd_batch_flush_locked()
255     * would make things simpler.. but we need to hold the lock
256     * to iterate the batches which reference this resource.  So
257     * we must first grab references under a lock, then flush.
258     */
259    fd_screen_lock(ctx->screen);
260    foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask)
261       fd_batch_reference_locked(&batches[batch_count++], batch);
262    fd_screen_unlock(ctx->screen);
263 
264    for (int i = 0; i < batch_count; i++) {
265       if (batches[i]->ctx == ctx)
266          fd_batch_flush(batches[i]);
267       fd_batch_reference(&batches[i], NULL);
268    }
269 }
270 
271 void
fd_bc_dump(struct fd_context * ctx,const char * fmt,...)272 fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
273 {
274    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
275 
276    if (!FD_DBG(MSGS))
277       return;
278 
279    fd_screen_lock(ctx->screen);
280 
281    va_list ap;
282    va_start(ap, fmt);
283    vprintf(fmt, ap);
284    va_end(ap);
285 
286    for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) {
287       struct fd_batch *batch = cache->batches[i];
288       if (batch) {
289          printf("  %p<%u>%s\n", batch, batch->seqno,
290                 batch->needs_flush ? ", NEEDS FLUSH" : "");
291       }
292    }
293 
294    printf("----\n");
295 
296    fd_screen_unlock(ctx->screen);
297 }
298 
299 /**
300  * Note that when batch is flushed, it needs to remain in the cache so
301  * that fd_bc_invalidate_resource() can work.. otherwise we can have
302  * the case where a rsc is destroyed while a batch still has a dangling
303  * reference to it.
304  *
305  * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
306  * would have a reference to the underlying bo, so it is ok for the
307  * rsc to be destroyed before the batch.
308  */
309 void
fd_bc_invalidate_batch(struct fd_batch * batch,bool remove)310 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
311 {
312    if (!batch)
313       return;
314 
315    struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
316    struct fd_batch_key *key = batch->key;
317 
318    fd_screen_assert_locked(batch->ctx->screen);
319 
320    if (remove) {
321       cache->batches[batch->idx] = NULL;
322       cache->batch_mask &= ~(1 << batch->idx);
323    }
324 
325    if (!key)
326       return;
327 
328    DBG("%p: key=%p", batch, batch->key);
329    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
330       struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
331       rsc->track->bc_batch_mask &= ~(1 << batch->idx);
332    }
333 
334    struct hash_entry *entry =
335       _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
336    _mesa_hash_table_remove(cache->ht, entry);
337 }
338 
339 void
fd_bc_invalidate_resource(struct fd_resource * rsc,bool destroy)340 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
341 {
342    struct fd_screen *screen = fd_screen(rsc->b.b.screen);
343    struct fd_batch *batch;
344 
345    fd_screen_lock(screen);
346 
347    if (destroy) {
348       foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
349          struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
350          _mesa_set_remove(batch->resources, entry);
351       }
352       rsc->track->batch_mask = 0;
353 
354       fd_batch_reference_locked(&rsc->track->write_batch, NULL);
355    }
356 
357    foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
358       fd_bc_invalidate_batch(batch, false);
359 
360    rsc->track->bc_batch_mask = 0;
361 
362    fd_screen_unlock(screen);
363 }
364 
365 static struct fd_batch *
alloc_batch_locked(struct fd_batch_cache * cache,struct fd_context * ctx,bool nondraw)366 alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
367                    bool nondraw) assert_dt
368 {
369    struct fd_batch *batch;
370    uint32_t idx;
371 
372    fd_screen_assert_locked(ctx->screen);
373 
374    while ((idx = ffs(~cache->batch_mask)) == 0) {
375 #if 0
376       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
377          batch = cache->batches[i];
378          debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
379          set_foreach (batch->dependencies, entry) {
380             struct fd_batch *dep = (struct fd_batch *)entry->key;
381             debug_printf(" %d", dep->idx);
382          }
383          debug_printf("\n");
384       }
385 #endif
386       /* TODO: is LRU the better policy?  Or perhaps the batch that
387        * depends on the fewest other batches?
388        */
389       struct fd_batch *flush_batch = NULL;
390       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
391          if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
392             fd_batch_reference_locked(&flush_batch, cache->batches[i]);
393       }
394 
395       /* we can drop lock temporarily here, since we hold a ref,
396        * flush_batch won't disappear under us.
397        */
398       fd_screen_unlock(ctx->screen);
399       DBG("%p: too many batches!  flush forced!", flush_batch);
400       fd_batch_flush(flush_batch);
401       fd_screen_lock(ctx->screen);
402 
403       /* While the resources get cleaned up automatically, the flush_batch
404        * doesn't get removed from the dependencies of other batches, so
405        * it won't be unref'd and will remain in the table.
406        *
407        * TODO maybe keep a bitmask of batches that depend on me, to make
408        * this easier:
409        */
410       for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
411          struct fd_batch *other = cache->batches[i];
412          if (!other)
413             continue;
414          if (fd_batch_has_dep(other, flush_batch)) {
415             other->dependents_mask &= ~(1 << flush_batch->idx);
416             struct fd_batch *ref = flush_batch;
417             fd_batch_reference_locked(&ref, NULL);
418          }
419       }
420 
421       fd_batch_reference_locked(&flush_batch, NULL);
422    }
423 
424    idx--; /* bit zero returns 1 for ffs() */
425 
426    batch = fd_batch_create(ctx, nondraw);
427    if (!batch)
428       return NULL;
429 
430    batch->seqno = seqno_next(&cache->cnt);
431    batch->idx = idx;
432    cache->batch_mask |= (1 << idx);
433 
434    assert(cache->batches[idx] == NULL);
435    cache->batches[idx] = batch;
436 
437    return batch;
438 }
439 
440 static void
alloc_query_buf(struct fd_context * ctx,struct fd_batch * batch)441 alloc_query_buf(struct fd_context *ctx, struct fd_batch *batch)
442 {
443    if (batch->query_buf)
444       return;
445 
446    if ((ctx->screen->gen < 3) || (ctx->screen->gen > 4))
447       return;
448 
449    /* For gens that use fd_hw_query, pre-allocate an initially zero-sized
450     * (unbacked) query buffer.  This simplifies draw/grid/etc-time resource
451     * tracking.
452     */
453    struct pipe_screen *pscreen = &ctx->screen->base;
454    struct pipe_resource templ = {
455       .target = PIPE_BUFFER,
456       .format = PIPE_FORMAT_R8_UNORM,
457       .bind = PIPE_BIND_QUERY_BUFFER,
458       .width0 = 0, /* create initially zero size buffer */
459       .height0 = 1,
460       .depth0 = 1,
461       .array_size = 1,
462       .last_level = 0,
463       .nr_samples = 1,
464    };
465    batch->query_buf = pscreen->resource_create(pscreen, &templ);
466 }
467 
468 struct fd_batch *
fd_bc_alloc_batch(struct fd_context * ctx,bool nondraw)469 fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
470 {
471    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
472    struct fd_batch *batch;
473 
474    /* For normal draw batches, pctx->set_framebuffer_state() handles
475     * this, but for nondraw batches, this is a nice central location
476     * to handle them all.
477     */
478    if (nondraw)
479       fd_context_switch_from(ctx);
480 
481    fd_screen_lock(ctx->screen);
482    batch = alloc_batch_locked(cache, ctx, nondraw);
483    fd_screen_unlock(ctx->screen);
484 
485    alloc_query_buf(ctx, batch);
486 
487    if (batch && nondraw)
488       fd_context_switch_to(ctx, batch);
489 
490    return batch;
491 }
492 
493 static struct fd_batch *
batch_from_key(struct fd_context * ctx,struct fd_batch_key * key)494 batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
495 {
496    struct fd_batch_cache *cache = &ctx->screen->batch_cache;
497    struct fd_batch *batch = NULL;
498    uint32_t hash = fd_batch_key_hash(key);
499    struct hash_entry *entry =
500       _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
501 
502    if (entry) {
503       free(key);
504       fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);
505       assert(!batch->flushed);
506       return batch;
507    }
508 
509    batch = alloc_batch_locked(cache, ctx, false);
510 #if MESA_DEBUG
511    DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
512        key->height, key->layers, key->samples);
513    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
514       DBG("%p:  surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,
515           key->surf[idx].pos, key->surf[idx].texture,
516           util_format_name(key->surf[idx].format),
517           key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
518           key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
519           key->surf[idx].u.tex.level);
520    }
521 #endif
522    if (!batch)
523       return NULL;
524 
525    /* reset max_scissor, which will be adjusted on draws
526     * according to the actual scissor.
527     */
528    batch->max_scissor.minx = ~0;
529    batch->max_scissor.miny = ~0;
530    batch->max_scissor.maxx = 0;
531    batch->max_scissor.maxy = 0;
532 
533    _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
534    batch->key = key;
535    batch->hash = hash;
536 
537    for (unsigned idx = 0; idx < key->num_surfs; idx++) {
538       struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
539       rsc->track->bc_batch_mask = (1 << batch->idx);
540    }
541 
542    return batch;
543 }
544 
545 static void
key_surf(struct fd_batch_key * key,unsigned idx,unsigned pos,struct pipe_surface * psurf)546 key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,
547          struct pipe_surface *psurf)
548 {
549    key->surf[idx].texture = psurf->texture;
550    key->surf[idx].u = psurf->u;
551    key->surf[idx].pos = pos;
552    key->surf[idx].samples = MAX2(1, psurf->nr_samples);
553    key->surf[idx].format = psurf->format;
554 }
555 
556 struct fd_batch *
fd_batch_from_fb(struct fd_context * ctx,const struct pipe_framebuffer_state * pfb)557 fd_batch_from_fb(struct fd_context *ctx,
558                  const struct pipe_framebuffer_state *pfb)
559 {
560    unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
561    struct fd_batch_key *key = key_alloc(n);
562 
563    key->width = pfb->width;
564    key->height = pfb->height;
565    key->layers = pfb->layers;
566    key->samples = util_framebuffer_get_num_samples(pfb);
567    key->ctx_seqno = ctx->seqno;
568 
569    if (pfb->zsbuf)
570       key_surf(key, idx++, 0, pfb->zsbuf);
571 
572    for (unsigned i = 0; i < pfb->nr_cbufs; i++)
573       if (pfb->cbufs[i])
574          key_surf(key, idx++, i + 1, pfb->cbufs[i]);
575 
576    key->num_surfs = idx;
577 
578    fd_screen_lock(ctx->screen);
579    struct fd_batch *batch = batch_from_key(ctx, key);
580    fd_screen_unlock(ctx->screen);
581 
582    alloc_query_buf(ctx, batch);
583 
584    fd_batch_set_fb(batch, pfb);
585 
586    return batch;
587 }
588