xref: /aosp_15_r20/external/mesa3d/src/freedreno/drm/freedreno_ringbuffer_sp.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include <assert.h>
10 #include <inttypes.h>
11 #include <pthread.h>
12 
13 #include "util/hash_table.h"
14 #include "util/libsync.h"
15 #include "util/os_file.h"
16 #include "util/slab.h"
17 
18 #include "freedreno_ringbuffer_sp.h"
19 
20 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
21  * by avoiding the additional tracking necessary to build cmds/relocs tables
22  * (but still builds a bos table)
23  */
24 
25 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
26  * instead use a condition-variable.  Note that pipe->flush() is not expected
27  * to be a common/hot path.
28  */
29 static pthread_cond_t  flush_cnd = PTHREAD_COND_INITIALIZER;
30 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
31 
32 static void finalize_current_cmd(struct fd_ringbuffer *ring);
33 static struct fd_ringbuffer *
34 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
35                       enum fd_ringbuffer_flags flags);
36 
37 
38 static inline bool
check_append_suballoc_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)39 check_append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
40 {
41    uint32_t idx = READ_ONCE(bo->idx);
42 
43    if (unlikely((idx >= submit->nr_suballoc_bos) ||
44        (submit->suballoc_bos[idx] != bo))) {
45       uint32_t hash = _mesa_hash_pointer(bo);
46       struct hash_entry *entry;
47 
48       entry = _mesa_hash_table_search_pre_hashed(
49             submit->suballoc_bo_table, hash, bo);
50       if (entry) {
51          /* found */
52          idx = (uint32_t)(uintptr_t)entry->data;
53       } else if (unlikely(check)) {
54          return false;
55       } else {
56          idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
57 
58          _mesa_hash_table_insert_pre_hashed(
59                submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
60       }
61       bo->idx = idx;
62    }
63 
64    return true;
65 }
66 
67 static inline uint32_t
check_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)68 check_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
69 {
70    if (suballoc_bo(bo)) {
71       if (check) {
72          if (!check_append_suballoc_bo(submit, bo, true)) {
73             return ~0;
74          }
75          bo = fd_bo_heap_block(bo);
76       } else {
77          check_append_suballoc_bo(submit, bo, false);
78          bo = fd_bo_heap_block(bo);
79       }
80    }
81 
82    /* NOTE: it is legal to use the same bo on different threads for
83     * different submits.  But it is not legal to use the same submit
84     * from different threads.
85     */
86    uint32_t idx = READ_ONCE(bo->idx);
87 
88    if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
89       uint32_t hash = _mesa_hash_pointer(bo);
90       struct hash_entry *entry;
91 
92       entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
93       if (entry) {
94          /* found */
95          idx = (uint32_t)(uintptr_t)entry->data;
96       } else if (unlikely(check)) {
97          return ~0;
98       } else {
99          idx = APPEND(submit, bos, fd_bo_ref(bo));
100 
101          _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
102                                             (void *)(uintptr_t)idx);
103       }
104       bo->idx = idx;
105    }
106 
107    return idx;
108 }
109 
110 /* add (if needed) bo to submit and return index: */
111 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)112 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
113 {
114    return check_append_bo(submit, bo, false);
115 }
116 
117 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)118 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
119                            struct fd_ringbuffer_sp *fd_ring, uint32_t size)
120 {
121    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
122    unsigned suballoc_offset = 0;
123    struct fd_bo *suballoc_bo = NULL;
124 
125    if (fd_submit->suballoc_ring) {
126       struct fd_ringbuffer_sp *suballoc_ring =
127          to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
128 
129       suballoc_bo = suballoc_ring->ring_bo;
130       suballoc_offset =
131          fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
132 
133       suballoc_offset = align(suballoc_offset, SUBALLOC_ALIGNMENT);
134 
135       if ((size + suballoc_offset) > suballoc_bo->size) {
136          suballoc_bo = NULL;
137       }
138    }
139 
140    if (!suballoc_bo) {
141       // TODO possibly larger size for streaming bo?
142       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
143       fd_ring->offset = 0;
144    } else {
145       fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
146       fd_ring->offset = suballoc_offset;
147    }
148 
149    struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
150 
151    fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
152 
153    if (old_suballoc_ring)
154       fd_ringbuffer_del(old_suballoc_ring);
155 }
156 
157 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)158 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
159                             enum fd_ringbuffer_flags flags)
160 {
161    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
162    struct fd_ringbuffer_sp *fd_ring;
163 
164    fd_ring = slab_alloc(&fd_submit->ring_pool);
165 
166    fd_ring->u.submit = submit;
167 
168    /* NOTE: needs to be before _suballoc_ring_bo() since it could
169     * increment the refcnt of the current ring
170     */
171    fd_ring->base.refcnt = 1;
172 
173    if (flags & FD_RINGBUFFER_STREAMING) {
174       fd_submit_suballoc_ring_bo(submit, fd_ring, size);
175    } else {
176       if (flags & FD_RINGBUFFER_GROWABLE)
177          size = SUBALLOC_SIZE;
178 
179       fd_ring->offset = 0;
180       fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
181    }
182 
183    if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
184       return NULL;
185 
186    return &fd_ring->base;
187 }
188 
189 /**
190  * Prepare submit for flush, always done synchronously.
191  *
192  * 1) Finalize primary ringbuffer, at this point no more cmdstream may
193  *    be written into it, since from the PoV of the upper level driver
194  *    the submit is flushed, even if deferred
195  * 2) Add cmdstream bos to bos table
196  * 3) Update bo fences
197  */
198 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_fence * out_fence)199 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
200                         struct fd_fence *out_fence)
201 {
202    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
203    bool has_shared = false;
204 
205    finalize_current_cmd(submit->primary);
206 
207    struct fd_ringbuffer_sp *primary =
208       to_fd_ringbuffer_sp(submit->primary);
209 
210    for (unsigned i = 0; i < primary->u.nr_cmds; i++)
211       fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
212 
213    out_fence->ufence = submit->fence;
214 
215    simple_mtx_lock(&fence_lock);
216    for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
217       fd_bo_add_fence(fd_submit->bos[i], out_fence);
218       has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
219    }
220    for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
221       fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
222    }
223    simple_mtx_unlock(&fence_lock);
224 
225    fd_submit->out_fence   = fd_fence_ref(out_fence);
226    fd_submit->in_fence_fd = (in_fence_fd == -1) ?
227          -1 : os_dupfd_cloexec(in_fence_fd);
228 
229    return has_shared;
230 }
231 
232 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)233 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
234 {
235    struct fd_submit *submit = job;
236    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
237    struct fd_pipe *pipe = submit->pipe;
238 
239    fd_submit->flush_submit_list(&fd_submit->submit_list);
240 
241    pthread_mutex_lock(&flush_mtx);
242    assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
243    pipe->last_submit_fence = fd_submit->base.fence;
244    pthread_cond_broadcast(&flush_cnd);
245    pthread_mutex_unlock(&flush_mtx);
246 
247    DEBUG_MSG("finish: %u", submit->fence);
248 }
249 
250 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)251 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
252 {
253    struct fd_submit *submit = job;
254    fd_submit_del(submit);
255 }
256 
257 static void
flush_deferred_submits(struct fd_device * dev)258 flush_deferred_submits(struct fd_device *dev)
259 {
260    MESA_TRACE_FUNC();
261 
262    simple_mtx_assert_locked(&dev->submit_lock);
263 
264    if (list_is_empty(&dev->deferred_submits))
265       return;
266 
267    struct fd_submit *submit = last_submit(&dev->deferred_submits);
268    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
269    list_replace(&dev->deferred_submits, &fd_submit->submit_list);
270    list_inithead(&dev->deferred_submits);
271    dev->deferred_cmds = 0;
272 
273    /* If we have multiple submits with in-fence-fd's then merge them: */
274    foreach_submit (submit, &fd_submit->submit_list) {
275       struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
276 
277       if (fd_deferred_submit == fd_submit)
278          break;
279 
280       if (fd_deferred_submit->in_fence_fd != -1) {
281          sync_accumulate("freedreno",
282                          &fd_submit->in_fence_fd,
283                          fd_deferred_submit->in_fence_fd);
284          close(fd_deferred_submit->in_fence_fd);
285          fd_deferred_submit->in_fence_fd = -1;
286       }
287    }
288 
289    fd_fence_del(dev->deferred_submits_fence);
290    dev->deferred_submits_fence = NULL;
291 
292    struct util_queue_fence *fence = &fd_submit->out_fence->ready;
293 
294    DEBUG_MSG("enqueue: %u", submit->fence);
295 
296    if (fd_device_threaded_submit(submit->pipe->dev)) {
297       util_queue_add_job(&submit->pipe->dev->submit_queue,
298                          submit, fence,
299                          fd_submit_sp_flush_execute,
300                          fd_submit_sp_flush_cleanup,
301                          0);
302    } else {
303       fd_submit_sp_flush_execute(submit, NULL, 0);
304       fd_submit_sp_flush_cleanup(submit, NULL, 0);
305    }
306 }
307 
308 static bool
should_defer(struct fd_submit * submit)309 should_defer(struct fd_submit *submit)
310 {
311    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
312 
313    /* if too many bo's, it may not be worth the CPU cost of submit merging: */
314    if (fd_submit->nr_bos > 30)
315       return false;
316 
317    /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
318     * cmds before we exceed the size of the ringbuffer, which results in
319     * deadlock writing into the RB (ie. kernel doesn't finish writing into
320     * the RB so it doesn't kick the GPU to start consuming from the RB)
321     */
322    if (submit->pipe->dev->deferred_cmds > 128)
323       return false;
324 
325    return true;
326 }
327 
328 static struct fd_fence *
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,bool use_fence_fd)329 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
330 {
331    struct fd_device *dev = submit->pipe->dev;
332    struct fd_pipe *pipe = submit->pipe;
333 
334    MESA_TRACE_FUNC();
335 
336    /* Acquire lock before flush_prep() because it is possible to race between
337     * this and pipe->flush():
338     */
339    simple_mtx_lock(&dev->submit_lock);
340 
341    /* If there are deferred submits from another fd_pipe, flush them now,
342     * since we can't merge submits from different submitqueue's (ie. they
343     * could have different priority, etc)
344     */
345    if (!list_is_empty(&dev->deferred_submits) &&
346        (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
347       flush_deferred_submits(dev);
348    }
349 
350    list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
351 
352    if (!dev->deferred_submits_fence)
353       dev->deferred_submits_fence = fd_fence_new(submit->pipe, use_fence_fd);
354 
355    struct fd_fence *out_fence = fd_fence_ref(dev->deferred_submits_fence);
356 
357    /* upgrade the out_fence for the deferred submits, if needed: */
358    if (use_fence_fd)
359       out_fence->use_fence_fd = true;
360 
361    bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
362 
363    if ((in_fence_fd != -1) || out_fence->use_fence_fd)
364       pipe->no_implicit_sync = true;
365 
366    /* The rule about skipping submit merging with shared buffers is only
367     * needed for implicit-sync.
368     */
369    if (pipe->no_implicit_sync)
370       has_shared = false;
371 
372    assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
373    pipe->last_enqueue_fence = submit->fence;
374 
375    /* If we don't need an out-fence, we can defer the submit.
376     *
377     * TODO we could defer submits with in-fence as well.. if we took our own
378     * reference to the fd, and merged all the in-fence-fd's when we flush the
379     * deferred submits
380     */
381    if (!use_fence_fd && !has_shared && should_defer(submit)) {
382       DEBUG_MSG("defer: %u", submit->fence);
383       dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
384       assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
385       simple_mtx_unlock(&dev->submit_lock);
386 
387       return out_fence;
388    }
389 
390    flush_deferred_submits(dev);
391 
392    simple_mtx_unlock(&dev->submit_lock);
393 
394    return out_fence;
395 }
396 
397 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)398 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
399 {
400    struct fd_device *dev = pipe->dev;
401 
402    if (!fd_fence_before(pipe->last_submit_fence, fence))
403       return;
404 
405    MESA_TRACE_FUNC();
406 
407    simple_mtx_lock(&dev->submit_lock);
408 
409    assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
410 
411    flush_deferred_submits(dev);
412 
413    simple_mtx_unlock(&dev->submit_lock);
414 
415    if (!fd_device_threaded_submit(pipe->dev))
416       return;
417 
418    /* Once we are sure that we've enqueued at least up to the requested
419     * submit, we need to be sure that submitq has caught up and flushed
420     * them to the kernel
421     */
422    pthread_mutex_lock(&flush_mtx);
423    while (fd_fence_before(pipe->last_submit_fence, fence)) {
424       pthread_cond_wait(&flush_cnd, &flush_mtx);
425    }
426    pthread_mutex_unlock(&flush_mtx);
427 }
428 
429 static void
fd_submit_sp_destroy(struct fd_submit * submit)430 fd_submit_sp_destroy(struct fd_submit *submit)
431 {
432    struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
433 
434    if (fd_submit->suballoc_ring)
435       fd_ringbuffer_del(fd_submit->suballoc_ring);
436 
437    _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
438    _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
439 
440    // TODO it would be nice to have a way to assert() if all
441    // rb's haven't been free'd back to the slab, because that is
442    // an indication that we are leaking bo's
443    slab_destroy_child(&fd_submit->ring_pool);
444 
445    fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
446    free(fd_submit->bos);
447 
448    fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
449    free(fd_submit->suballoc_bos);
450 
451    if (fd_submit->out_fence)
452       fd_fence_del(fd_submit->out_fence);
453 
454    free(fd_submit);
455 }
456 
457 static const struct fd_submit_funcs submit_funcs = {
458    .new_ringbuffer = fd_submit_sp_new_ringbuffer,
459    .flush = fd_submit_sp_flush,
460    .destroy = fd_submit_sp_destroy,
461 };
462 
463 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)464 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
465 {
466    struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
467    struct fd_submit *submit;
468 
469    fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
470    fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
471 
472    slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
473 
474    fd_submit->flush_submit_list = flush_submit_list;
475    fd_submit->seqno = seqno_next(&pipe->submit_seqno);
476 
477    submit = &fd_submit->base;
478    submit->funcs = &submit_funcs;
479 
480    return submit;
481 }
482 
483 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)484 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
485 {
486    // TODO tune size:
487    slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
488 }
489 
490 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)491 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
492 {
493    if (pipe->ring_pool.num_elements)
494       slab_destroy_parent(&pipe->ring_pool);
495 }
496 
497 static void
finalize_current_cmd(struct fd_ringbuffer * ring)498 finalize_current_cmd(struct fd_ringbuffer *ring)
499 {
500    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
501 
502    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
503    APPEND(&fd_ring->u, cmds,
504           (struct fd_cmd_sp){
505              .ring_bo = fd_bo_ref(fd_ring->ring_bo),
506              .size = offset_bytes(ring->cur, ring->start),
507           });
508 }
509 
510 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)511 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
512 {
513    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
514    struct fd_pipe *pipe = fd_ring->u.submit->pipe;
515 
516    assert(ring->flags & FD_RINGBUFFER_GROWABLE);
517 
518    finalize_current_cmd(ring);
519 
520    fd_bo_del(fd_ring->ring_bo);
521    fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
522 
523    ring->start = fd_bo_map(fd_ring->ring_bo);
524    ring->end = &(ring->start[size / 4]);
525    ring->cur = ring->start;
526    ring->size = size;
527 }
528 
529 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)530 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
531 {
532    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
533 
534    for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
535       if (fd_ring->u.reloc_bos[i] == bo)
536          return true;
537    }
538    return false;
539 }
540 
541 static void
fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)542 fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
543 {
544    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
545 
546    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
547    struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
548 
549    fd_submit_append_bo(fd_submit, bo);
550 }
551 
552 static void
fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)553 fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
554 {
555 #ifndef NDEBUG
556    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
557    struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
558    assert(check_append_bo(fd_submit, bo, true) != ~0);
559 #endif
560 }
561 
562 static void
fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)563 fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
564 {
565    assert(ring->flags & _FD_RINGBUFFER_OBJECT);
566 
567    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
568 
569    /* Avoid emitting duplicate BO references into the list.  Ringbuffer
570     * objects are long-lived, so this saves ongoing work at draw time in
571     * exchange for a bit at context setup/first draw.  And the number of
572     * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
573     * hurt much.
574     */
575    if (!fd_ringbuffer_references_bo(ring, bo)) {
576       APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(bo));
577    }
578 }
579 
580 static void
fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)581 fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
582 {
583 #ifndef NDEBUG
584    /* If the stateobj already references the bo, nothing more to do: */
585    if (fd_ringbuffer_references_bo(ring, bo))
586       return;
587 
588    /* If not, we need to defer the assert.. because the batch resource
589     * tracking may have attached the bo to the submit that the stateobj
590     * will eventually be referenced by:
591     */
592    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
593    for (int i = 0; i < fd_ring->u.nr_assert_bos; i++)
594       if (fd_ring->u.assert_bos[i] == bo)
595          return;
596 
597    APPEND(&fd_ring->u, assert_bos, fd_bo_ref(bo));
598 #endif
599 }
600 
601 #define PTRSZ 64
602 #include "freedreno_ringbuffer_sp_reloc.h"
603 #undef PTRSZ
604 #define PTRSZ 32
605 #include "freedreno_ringbuffer_sp_reloc.h"
606 #undef PTRSZ
607 
608 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)609 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
610 {
611    if (ring->flags & FD_RINGBUFFER_GROWABLE)
612       return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
613    return 1;
614 }
615 
616 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)617 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
618 {
619    assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
620    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
621    struct fd_submit *submit = fd_ring->u.submit;
622 
623    if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
624       return false;
625    }
626 
627    if (to_fd_submit_sp(submit)->nr_suballoc_bos > MAX_ARRAY_SIZE/2) {
628       return false;
629    }
630 
631    return true;
632 }
633 
634 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)635 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
636 {
637    struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
638 
639    fd_bo_del(fd_ring->ring_bo);
640 
641    if (ring->flags & _FD_RINGBUFFER_OBJECT) {
642       fd_bo_del_array(fd_ring->u.reloc_bos, fd_ring->u.nr_reloc_bos);
643       free(fd_ring->u.reloc_bos);
644 #ifndef NDEBUG
645       fd_bo_del_array(fd_ring->u.assert_bos, fd_ring->u.nr_assert_bos);
646       free(fd_ring->u.assert_bos);
647 #endif
648       free(fd_ring);
649    } else {
650       struct fd_submit *submit = fd_ring->u.submit;
651 
652       // TODO re-arrange the data structures so we can use fd_bo_del_array()
653       for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
654          fd_bo_del(fd_ring->u.cmds[i].ring_bo);
655       }
656       free(fd_ring->u.cmds);
657 
658       slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
659    }
660 }
661 
662 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
663    .grow = fd_ringbuffer_sp_grow,
664    .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
665    .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
666    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
667    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
668    .cmd_count = fd_ringbuffer_sp_cmd_count,
669    .check_size = fd_ringbuffer_sp_check_size,
670    .destroy = fd_ringbuffer_sp_destroy,
671 };
672 
673 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
674    .grow = fd_ringbuffer_sp_grow,
675    .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
676    .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
677    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
678    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
679    .cmd_count = fd_ringbuffer_sp_cmd_count,
680    .destroy = fd_ringbuffer_sp_destroy,
681 };
682 
683 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
684    .grow = fd_ringbuffer_sp_grow,
685    .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
686    .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
687    .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
688    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
689    .cmd_count = fd_ringbuffer_sp_cmd_count,
690    .check_size = fd_ringbuffer_sp_check_size,
691    .destroy = fd_ringbuffer_sp_destroy,
692 };
693 
694 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
695    .grow = fd_ringbuffer_sp_grow,
696    .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
697    .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
698    .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
699    .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
700    .cmd_count = fd_ringbuffer_sp_cmd_count,
701    .destroy = fd_ringbuffer_sp_destroy,
702 };
703 
704 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)705 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
706                       enum fd_ringbuffer_flags flags)
707 {
708    struct fd_ringbuffer *ring = &fd_ring->base;
709 
710    assert(fd_ring->ring_bo);
711 
712    uint8_t *base = fd_bo_map(fd_ring->ring_bo);
713    ring->start = (void *)(base + fd_ring->offset);
714    ring->end = &(ring->start[size / 4]);
715    ring->cur = ring->start;
716 
717    ring->size = size;
718    ring->flags = flags;
719 
720    if (flags & _FD_RINGBUFFER_OBJECT) {
721       if (fd_ring->u.pipe->is_64bit) {
722          ring->funcs = &ring_funcs_obj_64;
723       } else {
724          ring->funcs = &ring_funcs_obj_32;
725       }
726    } else {
727       if (fd_ring->u.submit->pipe->is_64bit) {
728          ring->funcs = &ring_funcs_nonobj_64;
729       } else {
730          ring->funcs = &ring_funcs_nonobj_32;
731       }
732    }
733 
734    // TODO initializing these could probably be conditional on flags
735    // since unneed for FD_RINGBUFFER_STAGING case..
736    fd_ring->u.cmds = NULL;
737    fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
738 
739    fd_ring->u.reloc_bos = NULL;
740    fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
741 #ifndef NDEBUG
742    fd_ring->u.assert_bos = NULL;
743    fd_ring->u.nr_assert_bos = fd_ring->u.max_assert_bos = 0;
744 #endif
745 
746    return ring;
747 }
748 
749 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)750 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
751 {
752    struct fd_device *dev = pipe->dev;
753    struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
754 
755    /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
756     * can happen both on the frontend (most CSOs) and the driver thread (a6xx
757     * cached tex state, for example)
758     */
759    simple_mtx_lock(&dev->suballoc_lock);
760 
761    fd_ring->offset = align(dev->suballoc_offset, SUBALLOC_ALIGNMENT);
762    if (!dev->suballoc_bo ||
763        fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
764       if (dev->suballoc_bo)
765          fd_bo_del(dev->suballoc_bo);
766       dev->suballoc_bo =
767          fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, os_page_size)));
768       fd_ring->offset = 0;
769    }
770 
771    fd_ring->u.pipe = pipe;
772    fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
773    fd_ring->base.refcnt = 1;
774    fd_ring->u.last_submit_seqno = 0;
775 
776    dev->suballoc_offset = fd_ring->offset + size;
777 
778    simple_mtx_unlock(&dev->suballoc_lock);
779 
780    return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
781 }
782