1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "util/hash_table.h"
10 #include "util/list.h"
11 #include "util/set.h"
12 #include "util/u_string.h"
13 #define XXH_INLINE_ALL
14 #include "util/xxhash.h"
15
16 #include "freedreno_batch.h"
17 #include "freedreno_batch_cache.h"
18 #include "freedreno_context.h"
19 #include "freedreno_resource.h"
20
21 /* Overview:
22 *
23 * The batch cache provides lookup for mapping pipe_framebuffer_state
24 * to a batch.
25 *
26 * It does this via hashtable, with key that roughly matches the
27 * pipe_framebuffer_state, as described below.
28 *
29 * Batch Cache hashtable key:
30 *
31 * To serialize the key, and to avoid dealing with holding a reference to
32 * pipe_surface's (which hold a reference to pipe_resource and complicate
33 * the whole refcnting thing), the key is variable length and inline's the
34 * pertinent details of the pipe_surface.
35 *
36 * Batch:
37 *
38 * Each batch needs to hold a reference to each resource it depends on (ie.
39 * anything that needs a mem2gmem). And a weak reference to resources it
40 * renders to. (If both src[n] and dst[n] are not NULL then they are the
41 * same.)
42 *
43 * When a resource is destroyed, we need to remove entries in the batch
44 * cache that reference the resource, to avoid dangling pointer issues.
45 * So each resource holds a hashset of batches which have reference them
46 * in their hashtable key.
47 *
48 * When a batch has weak reference to no more resources (ie. all the
49 * surfaces it rendered to are destroyed) the batch can be destroyed.
50 * Could happen in an app that renders and never uses the result. More
51 * common scenario, I think, will be that some, but not all, of the
52 * surfaces are destroyed before the batch is submitted.
53 *
54 * If (for example), batch writes to zsbuf but that surface is destroyed
55 * before batch is submitted, we can skip gmem2mem (but still need to
56 * alloc gmem space as before. If the batch depended on previous contents
57 * of that surface, it would be holding a reference so the surface would
58 * not have been destroyed.
59 */
60
61 struct fd_batch_key {
62 uint32_t width;
63 uint32_t height;
64 uint16_t layers;
65 uint16_t samples;
66 uint16_t num_surfs;
67 uint16_t ctx_seqno;
68 struct {
69 struct pipe_resource *texture;
70 union pipe_surface_desc u;
71 uint8_t pos, samples;
72 uint16_t format;
73 } surf[0];
74 };
75
76 static struct fd_batch_key *
key_alloc(unsigned num_surfs)77 key_alloc(unsigned num_surfs)
78 {
79 struct fd_batch_key *key = CALLOC_VARIANT_LENGTH_STRUCT(
80 fd_batch_key, sizeof(key->surf[0]) * num_surfs);
81 return key;
82 }
83
84 uint32_t
fd_batch_key_hash(const void * _key)85 fd_batch_key_hash(const void *_key)
86 {
87 const struct fd_batch_key *key = _key;
88 uint32_t hash = 0;
89 hash = XXH32(key, offsetof(struct fd_batch_key, surf[0]), hash);
90 hash = XXH32(key->surf, sizeof(key->surf[0]) * key->num_surfs, hash);
91 return hash;
92 }
93
94 bool
fd_batch_key_equals(const void * _a,const void * _b)95 fd_batch_key_equals(const void *_a, const void *_b)
96 {
97 const struct fd_batch_key *a = _a;
98 const struct fd_batch_key *b = _b;
99 return (memcmp(a, b, offsetof(struct fd_batch_key, surf[0])) == 0) &&
100 (memcmp(a->surf, b->surf, sizeof(a->surf[0]) * a->num_surfs) == 0);
101 }
102
103 struct fd_batch_key *
fd_batch_key_clone(void * mem_ctx,const struct fd_batch_key * key)104 fd_batch_key_clone(void *mem_ctx, const struct fd_batch_key *key)
105 {
106 unsigned sz =
107 sizeof(struct fd_batch_key) + (sizeof(key->surf[0]) * key->num_surfs);
108 struct fd_batch_key *new_key = rzalloc_size(mem_ctx, sz);
109 memcpy(new_key, key, sz);
110 return new_key;
111 }
112
113 void
fd_bc_init(struct fd_batch_cache * cache)114 fd_bc_init(struct fd_batch_cache *cache)
115 {
116 cache->ht =
117 _mesa_hash_table_create(NULL, fd_batch_key_hash, fd_batch_key_equals);
118 }
119
120 void
fd_bc_fini(struct fd_batch_cache * cache)121 fd_bc_fini(struct fd_batch_cache *cache)
122 {
123 _mesa_hash_table_destroy(cache->ht, NULL);
124 }
125
126 /* Find a batch that depends on last_batch (recursively if needed).
127 * The returned batch should not be depended on by any other batch.
128 */
129 static struct fd_batch *
find_dependee(struct fd_context * ctx,struct fd_batch * last_batch)130 find_dependee(struct fd_context *ctx, struct fd_batch *last_batch)
131 assert_dt
132 {
133 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
134 struct fd_batch *batch;
135
136 foreach_batch (batch, cache, cache->batch_mask) {
137 if (batch->ctx == ctx && fd_batch_has_dep(batch, last_batch)) {
138 fd_batch_reference_locked(&last_batch, batch);
139 return find_dependee(ctx, last_batch);
140 }
141 }
142
143 return last_batch;
144 }
145
146 /* This returns the last batch to be flushed. This is _approximately_ the
147 * last batch to be modified, but it could be a batch that depends on the
148 * last modified batch.
149 */
150 struct fd_batch *
fd_bc_last_batch(struct fd_context * ctx)151 fd_bc_last_batch(struct fd_context *ctx)
152 {
153 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
154 struct fd_batch *batch, *last_batch = NULL;
155
156 fd_screen_lock(ctx->screen);
157
158 foreach_batch (batch, cache, cache->batch_mask) {
159 if (batch->ctx == ctx) {
160 if (!last_batch ||
161 /* Note: fd_fence_before() handles rollover for us: */
162 fd_fence_before(last_batch->update_seqno, batch->update_seqno)) {
163 fd_batch_reference_locked(&last_batch, batch);
164 }
165 }
166 }
167
168 if (last_batch)
169 last_batch = find_dependee(ctx, last_batch);
170
171 fd_screen_unlock(ctx->screen);
172
173 return last_batch;
174 }
175
176 /* Make the current batch depend on all other batches. So all other
177 * batches will be flushed before the current batch.
178 */
179 void
fd_bc_add_flush_deps(struct fd_context * ctx,struct fd_batch * last_batch)180 fd_bc_add_flush_deps(struct fd_context *ctx, struct fd_batch *last_batch)
181 {
182 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
183
184 /* fd_batch_flush() (and fd_batch_add_dep() which calls it indirectly)
185 * can cause batches to be unref'd and freed under our feet, so grab
186 * a reference to all the batches we need up-front.
187 */
188 struct fd_batch *batches[ARRAY_SIZE(cache->batches)] = {0};
189 struct fd_batch *batch;
190 unsigned n = 0;
191
192 assert(last_batch->ctx == ctx);
193
194 #ifndef NDEBUG
195 struct fd_batch *tmp = fd_bc_last_batch(ctx);
196 assert(tmp == last_batch);
197 fd_batch_reference(&tmp, NULL);
198 #endif
199
200 fd_screen_lock(ctx->screen);
201
202 foreach_batch (batch, cache, cache->batch_mask) {
203 if (batch->ctx == ctx) {
204 fd_batch_reference_locked(&batches[n++], batch);
205 }
206 }
207
208 for (unsigned i = 0; i < n; i++) {
209 if (batches[i] && (batches[i] != last_batch)) {
210 /* fd_bc_last_batch() should ensure that no other batch depends
211 * on last_batch. This is needed to avoid dependency loop.
212 */
213 assert(!fd_batch_has_dep(batches[i], last_batch));
214
215 fd_batch_add_dep(last_batch, batches[i]);
216 }
217 }
218
219 fd_screen_unlock(ctx->screen);
220
221 for (unsigned i = 0; i < n; i++) {
222 fd_batch_reference(&batches[i], NULL);
223 }
224 }
225
226 /**
227 * Flushes the batch (if any) writing this resource. Must not hold the screen
228 * lock.
229 */
230 void
fd_bc_flush_writer(struct fd_context * ctx,struct fd_resource * rsc)231 fd_bc_flush_writer(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
232 {
233 fd_screen_lock(ctx->screen);
234 struct fd_batch *write_batch = NULL;
235 fd_batch_reference_locked(&write_batch, rsc->track->write_batch);
236 fd_screen_unlock(ctx->screen);
237
238 if (write_batch) {
239 if (write_batch->ctx == ctx)
240 fd_batch_flush(write_batch);
241 fd_batch_reference(&write_batch, NULL);
242 }
243 }
244
245 /**
246 * Flushes any batches reading this resource. Must not hold the screen lock.
247 */
248 void
fd_bc_flush_readers(struct fd_context * ctx,struct fd_resource * rsc)249 fd_bc_flush_readers(struct fd_context *ctx, struct fd_resource *rsc) assert_dt
250 {
251 struct fd_batch *batch, *batches[32] = {};
252 uint32_t batch_count = 0;
253
254 /* This is a bit awkward, probably a fd_batch_flush_locked()
255 * would make things simpler.. but we need to hold the lock
256 * to iterate the batches which reference this resource. So
257 * we must first grab references under a lock, then flush.
258 */
259 fd_screen_lock(ctx->screen);
260 foreach_batch (batch, &ctx->screen->batch_cache, rsc->track->batch_mask)
261 fd_batch_reference_locked(&batches[batch_count++], batch);
262 fd_screen_unlock(ctx->screen);
263
264 for (int i = 0; i < batch_count; i++) {
265 if (batches[i]->ctx == ctx)
266 fd_batch_flush(batches[i]);
267 fd_batch_reference(&batches[i], NULL);
268 }
269 }
270
271 void
fd_bc_dump(struct fd_context * ctx,const char * fmt,...)272 fd_bc_dump(struct fd_context *ctx, const char *fmt, ...)
273 {
274 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
275
276 if (!FD_DBG(MSGS))
277 return;
278
279 fd_screen_lock(ctx->screen);
280
281 va_list ap;
282 va_start(ap, fmt);
283 vprintf(fmt, ap);
284 va_end(ap);
285
286 for (int i = 0; i < ARRAY_SIZE(cache->batches); i++) {
287 struct fd_batch *batch = cache->batches[i];
288 if (batch) {
289 printf(" %p<%u>%s\n", batch, batch->seqno,
290 batch->needs_flush ? ", NEEDS FLUSH" : "");
291 }
292 }
293
294 printf("----\n");
295
296 fd_screen_unlock(ctx->screen);
297 }
298
299 /**
300 * Note that when batch is flushed, it needs to remain in the cache so
301 * that fd_bc_invalidate_resource() can work.. otherwise we can have
302 * the case where a rsc is destroyed while a batch still has a dangling
303 * reference to it.
304 *
305 * Note that the cmdstream (or, after the SUBMIT ioctl, the kernel)
306 * would have a reference to the underlying bo, so it is ok for the
307 * rsc to be destroyed before the batch.
308 */
309 void
fd_bc_invalidate_batch(struct fd_batch * batch,bool remove)310 fd_bc_invalidate_batch(struct fd_batch *batch, bool remove)
311 {
312 if (!batch)
313 return;
314
315 struct fd_batch_cache *cache = &batch->ctx->screen->batch_cache;
316 struct fd_batch_key *key = batch->key;
317
318 fd_screen_assert_locked(batch->ctx->screen);
319
320 if (remove) {
321 cache->batches[batch->idx] = NULL;
322 cache->batch_mask &= ~(1 << batch->idx);
323 }
324
325 if (!key)
326 return;
327
328 DBG("%p: key=%p", batch, batch->key);
329 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
330 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
331 rsc->track->bc_batch_mask &= ~(1 << batch->idx);
332 }
333
334 struct hash_entry *entry =
335 _mesa_hash_table_search_pre_hashed(cache->ht, batch->hash, key);
336 _mesa_hash_table_remove(cache->ht, entry);
337 }
338
339 void
fd_bc_invalidate_resource(struct fd_resource * rsc,bool destroy)340 fd_bc_invalidate_resource(struct fd_resource *rsc, bool destroy)
341 {
342 struct fd_screen *screen = fd_screen(rsc->b.b.screen);
343 struct fd_batch *batch;
344
345 fd_screen_lock(screen);
346
347 if (destroy) {
348 foreach_batch (batch, &screen->batch_cache, rsc->track->batch_mask) {
349 struct set_entry *entry = _mesa_set_search_pre_hashed(batch->resources, rsc->hash, rsc);
350 _mesa_set_remove(batch->resources, entry);
351 }
352 rsc->track->batch_mask = 0;
353
354 fd_batch_reference_locked(&rsc->track->write_batch, NULL);
355 }
356
357 foreach_batch (batch, &screen->batch_cache, rsc->track->bc_batch_mask)
358 fd_bc_invalidate_batch(batch, false);
359
360 rsc->track->bc_batch_mask = 0;
361
362 fd_screen_unlock(screen);
363 }
364
365 static struct fd_batch *
alloc_batch_locked(struct fd_batch_cache * cache,struct fd_context * ctx,bool nondraw)366 alloc_batch_locked(struct fd_batch_cache *cache, struct fd_context *ctx,
367 bool nondraw) assert_dt
368 {
369 struct fd_batch *batch;
370 uint32_t idx;
371
372 fd_screen_assert_locked(ctx->screen);
373
374 while ((idx = ffs(~cache->batch_mask)) == 0) {
375 #if 0
376 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
377 batch = cache->batches[i];
378 debug_printf("%d: needs_flush=%d, depends:", batch->idx, batch->needs_flush);
379 set_foreach (batch->dependencies, entry) {
380 struct fd_batch *dep = (struct fd_batch *)entry->key;
381 debug_printf(" %d", dep->idx);
382 }
383 debug_printf("\n");
384 }
385 #endif
386 /* TODO: is LRU the better policy? Or perhaps the batch that
387 * depends on the fewest other batches?
388 */
389 struct fd_batch *flush_batch = NULL;
390 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
391 if (!flush_batch || (cache->batches[i]->seqno < flush_batch->seqno))
392 fd_batch_reference_locked(&flush_batch, cache->batches[i]);
393 }
394
395 /* we can drop lock temporarily here, since we hold a ref,
396 * flush_batch won't disappear under us.
397 */
398 fd_screen_unlock(ctx->screen);
399 DBG("%p: too many batches! flush forced!", flush_batch);
400 fd_batch_flush(flush_batch);
401 fd_screen_lock(ctx->screen);
402
403 /* While the resources get cleaned up automatically, the flush_batch
404 * doesn't get removed from the dependencies of other batches, so
405 * it won't be unref'd and will remain in the table.
406 *
407 * TODO maybe keep a bitmask of batches that depend on me, to make
408 * this easier:
409 */
410 for (unsigned i = 0; i < ARRAY_SIZE(cache->batches); i++) {
411 struct fd_batch *other = cache->batches[i];
412 if (!other)
413 continue;
414 if (fd_batch_has_dep(other, flush_batch)) {
415 other->dependents_mask &= ~(1 << flush_batch->idx);
416 struct fd_batch *ref = flush_batch;
417 fd_batch_reference_locked(&ref, NULL);
418 }
419 }
420
421 fd_batch_reference_locked(&flush_batch, NULL);
422 }
423
424 idx--; /* bit zero returns 1 for ffs() */
425
426 batch = fd_batch_create(ctx, nondraw);
427 if (!batch)
428 return NULL;
429
430 batch->seqno = seqno_next(&cache->cnt);
431 batch->idx = idx;
432 cache->batch_mask |= (1 << idx);
433
434 assert(cache->batches[idx] == NULL);
435 cache->batches[idx] = batch;
436
437 return batch;
438 }
439
440 static void
alloc_query_buf(struct fd_context * ctx,struct fd_batch * batch)441 alloc_query_buf(struct fd_context *ctx, struct fd_batch *batch)
442 {
443 if (batch->query_buf)
444 return;
445
446 if ((ctx->screen->gen < 3) || (ctx->screen->gen > 4))
447 return;
448
449 /* For gens that use fd_hw_query, pre-allocate an initially zero-sized
450 * (unbacked) query buffer. This simplifies draw/grid/etc-time resource
451 * tracking.
452 */
453 struct pipe_screen *pscreen = &ctx->screen->base;
454 struct pipe_resource templ = {
455 .target = PIPE_BUFFER,
456 .format = PIPE_FORMAT_R8_UNORM,
457 .bind = PIPE_BIND_QUERY_BUFFER,
458 .width0 = 0, /* create initially zero size buffer */
459 .height0 = 1,
460 .depth0 = 1,
461 .array_size = 1,
462 .last_level = 0,
463 .nr_samples = 1,
464 };
465 batch->query_buf = pscreen->resource_create(pscreen, &templ);
466 }
467
468 struct fd_batch *
fd_bc_alloc_batch(struct fd_context * ctx,bool nondraw)469 fd_bc_alloc_batch(struct fd_context *ctx, bool nondraw)
470 {
471 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
472 struct fd_batch *batch;
473
474 /* For normal draw batches, pctx->set_framebuffer_state() handles
475 * this, but for nondraw batches, this is a nice central location
476 * to handle them all.
477 */
478 if (nondraw)
479 fd_context_switch_from(ctx);
480
481 fd_screen_lock(ctx->screen);
482 batch = alloc_batch_locked(cache, ctx, nondraw);
483 fd_screen_unlock(ctx->screen);
484
485 alloc_query_buf(ctx, batch);
486
487 if (batch && nondraw)
488 fd_context_switch_to(ctx, batch);
489
490 return batch;
491 }
492
493 static struct fd_batch *
batch_from_key(struct fd_context * ctx,struct fd_batch_key * key)494 batch_from_key(struct fd_context *ctx, struct fd_batch_key *key) assert_dt
495 {
496 struct fd_batch_cache *cache = &ctx->screen->batch_cache;
497 struct fd_batch *batch = NULL;
498 uint32_t hash = fd_batch_key_hash(key);
499 struct hash_entry *entry =
500 _mesa_hash_table_search_pre_hashed(cache->ht, hash, key);
501
502 if (entry) {
503 free(key);
504 fd_batch_reference_locked(&batch, (struct fd_batch *)entry->data);
505 assert(!batch->flushed);
506 return batch;
507 }
508
509 batch = alloc_batch_locked(cache, ctx, false);
510 #if MESA_DEBUG
511 DBG("%p: hash=0x%08x, %ux%u, %u layers, %u samples", batch, hash, key->width,
512 key->height, key->layers, key->samples);
513 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
514 DBG("%p: surf[%u]: %p (%s) (%u,%u / %u,%u,%u)", batch,
515 key->surf[idx].pos, key->surf[idx].texture,
516 util_format_name(key->surf[idx].format),
517 key->surf[idx].u.buf.first_element, key->surf[idx].u.buf.last_element,
518 key->surf[idx].u.tex.first_layer, key->surf[idx].u.tex.last_layer,
519 key->surf[idx].u.tex.level);
520 }
521 #endif
522 if (!batch)
523 return NULL;
524
525 /* reset max_scissor, which will be adjusted on draws
526 * according to the actual scissor.
527 */
528 batch->max_scissor.minx = ~0;
529 batch->max_scissor.miny = ~0;
530 batch->max_scissor.maxx = 0;
531 batch->max_scissor.maxy = 0;
532
533 _mesa_hash_table_insert_pre_hashed(cache->ht, hash, key, batch);
534 batch->key = key;
535 batch->hash = hash;
536
537 for (unsigned idx = 0; idx < key->num_surfs; idx++) {
538 struct fd_resource *rsc = fd_resource(key->surf[idx].texture);
539 rsc->track->bc_batch_mask = (1 << batch->idx);
540 }
541
542 return batch;
543 }
544
545 static void
key_surf(struct fd_batch_key * key,unsigned idx,unsigned pos,struct pipe_surface * psurf)546 key_surf(struct fd_batch_key *key, unsigned idx, unsigned pos,
547 struct pipe_surface *psurf)
548 {
549 key->surf[idx].texture = psurf->texture;
550 key->surf[idx].u = psurf->u;
551 key->surf[idx].pos = pos;
552 key->surf[idx].samples = MAX2(1, psurf->nr_samples);
553 key->surf[idx].format = psurf->format;
554 }
555
556 struct fd_batch *
fd_batch_from_fb(struct fd_context * ctx,const struct pipe_framebuffer_state * pfb)557 fd_batch_from_fb(struct fd_context *ctx,
558 const struct pipe_framebuffer_state *pfb)
559 {
560 unsigned idx = 0, n = pfb->nr_cbufs + (pfb->zsbuf ? 1 : 0);
561 struct fd_batch_key *key = key_alloc(n);
562
563 key->width = pfb->width;
564 key->height = pfb->height;
565 key->layers = pfb->layers;
566 key->samples = util_framebuffer_get_num_samples(pfb);
567 key->ctx_seqno = ctx->seqno;
568
569 if (pfb->zsbuf)
570 key_surf(key, idx++, 0, pfb->zsbuf);
571
572 for (unsigned i = 0; i < pfb->nr_cbufs; i++)
573 if (pfb->cbufs[i])
574 key_surf(key, idx++, i + 1, pfb->cbufs[i]);
575
576 key->num_surfs = idx;
577
578 fd_screen_lock(ctx->screen);
579 struct fd_batch *batch = batch_from_key(ctx, key);
580 fd_screen_unlock(ctx->screen);
581
582 alloc_query_buf(ctx, batch);
583
584 fd_batch_set_fb(batch, pfb);
585
586 return batch;
587 }
588