1 /*
2 * Copyright © 2018 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include <assert.h>
10 #include <inttypes.h>
11 #include <pthread.h>
12
13 #include "util/hash_table.h"
14 #include "util/libsync.h"
15 #include "util/os_file.h"
16 #include "util/slab.h"
17
18 #include "freedreno_ringbuffer_sp.h"
19
20 /* A "softpin" implementation of submit/ringbuffer, which lowers CPU overhead
21 * by avoiding the additional tracking necessary to build cmds/relocs tables
22 * (but still builds a bos table)
23 */
24
25 /* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
26 * instead use a condition-variable. Note that pipe->flush() is not expected
27 * to be a common/hot path.
28 */
29 static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
30 static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
31
32 static void finalize_current_cmd(struct fd_ringbuffer *ring);
33 static struct fd_ringbuffer *
34 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
35 enum fd_ringbuffer_flags flags);
36
37
38 static inline bool
check_append_suballoc_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)39 check_append_suballoc_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
40 {
41 uint32_t idx = READ_ONCE(bo->idx);
42
43 if (unlikely((idx >= submit->nr_suballoc_bos) ||
44 (submit->suballoc_bos[idx] != bo))) {
45 uint32_t hash = _mesa_hash_pointer(bo);
46 struct hash_entry *entry;
47
48 entry = _mesa_hash_table_search_pre_hashed(
49 submit->suballoc_bo_table, hash, bo);
50 if (entry) {
51 /* found */
52 idx = (uint32_t)(uintptr_t)entry->data;
53 } else if (unlikely(check)) {
54 return false;
55 } else {
56 idx = APPEND(submit, suballoc_bos, fd_bo_ref(bo));
57
58 _mesa_hash_table_insert_pre_hashed(
59 submit->suballoc_bo_table, hash, bo, (void *)(uintptr_t)idx);
60 }
61 bo->idx = idx;
62 }
63
64 return true;
65 }
66
67 static inline uint32_t
check_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo,bool check)68 check_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo, bool check)
69 {
70 if (suballoc_bo(bo)) {
71 if (check) {
72 if (!check_append_suballoc_bo(submit, bo, true)) {
73 return ~0;
74 }
75 bo = fd_bo_heap_block(bo);
76 } else {
77 check_append_suballoc_bo(submit, bo, false);
78 bo = fd_bo_heap_block(bo);
79 }
80 }
81
82 /* NOTE: it is legal to use the same bo on different threads for
83 * different submits. But it is not legal to use the same submit
84 * from different threads.
85 */
86 uint32_t idx = READ_ONCE(bo->idx);
87
88 if (unlikely((idx >= submit->nr_bos) || (submit->bos[idx] != bo))) {
89 uint32_t hash = _mesa_hash_pointer(bo);
90 struct hash_entry *entry;
91
92 entry = _mesa_hash_table_search_pre_hashed(submit->bo_table, hash, bo);
93 if (entry) {
94 /* found */
95 idx = (uint32_t)(uintptr_t)entry->data;
96 } else if (unlikely(check)) {
97 return ~0;
98 } else {
99 idx = APPEND(submit, bos, fd_bo_ref(bo));
100
101 _mesa_hash_table_insert_pre_hashed(submit->bo_table, hash, bo,
102 (void *)(uintptr_t)idx);
103 }
104 bo->idx = idx;
105 }
106
107 return idx;
108 }
109
110 /* add (if needed) bo to submit and return index: */
111 uint32_t
fd_submit_append_bo(struct fd_submit_sp * submit,struct fd_bo * bo)112 fd_submit_append_bo(struct fd_submit_sp *submit, struct fd_bo *bo)
113 {
114 return check_append_bo(submit, bo, false);
115 }
116
117 static void
fd_submit_suballoc_ring_bo(struct fd_submit * submit,struct fd_ringbuffer_sp * fd_ring,uint32_t size)118 fd_submit_suballoc_ring_bo(struct fd_submit *submit,
119 struct fd_ringbuffer_sp *fd_ring, uint32_t size)
120 {
121 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
122 unsigned suballoc_offset = 0;
123 struct fd_bo *suballoc_bo = NULL;
124
125 if (fd_submit->suballoc_ring) {
126 struct fd_ringbuffer_sp *suballoc_ring =
127 to_fd_ringbuffer_sp(fd_submit->suballoc_ring);
128
129 suballoc_bo = suballoc_ring->ring_bo;
130 suballoc_offset =
131 fd_ringbuffer_size(fd_submit->suballoc_ring) + suballoc_ring->offset;
132
133 suballoc_offset = align(suballoc_offset, SUBALLOC_ALIGNMENT);
134
135 if ((size + suballoc_offset) > suballoc_bo->size) {
136 suballoc_bo = NULL;
137 }
138 }
139
140 if (!suballoc_bo) {
141 // TODO possibly larger size for streaming bo?
142 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, SUBALLOC_SIZE);
143 fd_ring->offset = 0;
144 } else {
145 fd_ring->ring_bo = fd_bo_ref(suballoc_bo);
146 fd_ring->offset = suballoc_offset;
147 }
148
149 struct fd_ringbuffer *old_suballoc_ring = fd_submit->suballoc_ring;
150
151 fd_submit->suballoc_ring = fd_ringbuffer_ref(&fd_ring->base);
152
153 if (old_suballoc_ring)
154 fd_ringbuffer_del(old_suballoc_ring);
155 }
156
157 static struct fd_ringbuffer *
fd_submit_sp_new_ringbuffer(struct fd_submit * submit,uint32_t size,enum fd_ringbuffer_flags flags)158 fd_submit_sp_new_ringbuffer(struct fd_submit *submit, uint32_t size,
159 enum fd_ringbuffer_flags flags)
160 {
161 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
162 struct fd_ringbuffer_sp *fd_ring;
163
164 fd_ring = slab_alloc(&fd_submit->ring_pool);
165
166 fd_ring->u.submit = submit;
167
168 /* NOTE: needs to be before _suballoc_ring_bo() since it could
169 * increment the refcnt of the current ring
170 */
171 fd_ring->base.refcnt = 1;
172
173 if (flags & FD_RINGBUFFER_STREAMING) {
174 fd_submit_suballoc_ring_bo(submit, fd_ring, size);
175 } else {
176 if (flags & FD_RINGBUFFER_GROWABLE)
177 size = SUBALLOC_SIZE;
178
179 fd_ring->offset = 0;
180 fd_ring->ring_bo = fd_bo_new_ring(submit->pipe->dev, size);
181 }
182
183 if (!fd_ringbuffer_sp_init(fd_ring, size, flags))
184 return NULL;
185
186 return &fd_ring->base;
187 }
188
189 /**
190 * Prepare submit for flush, always done synchronously.
191 *
192 * 1) Finalize primary ringbuffer, at this point no more cmdstream may
193 * be written into it, since from the PoV of the upper level driver
194 * the submit is flushed, even if deferred
195 * 2) Add cmdstream bos to bos table
196 * 3) Update bo fences
197 */
198 static bool
fd_submit_sp_flush_prep(struct fd_submit * submit,int in_fence_fd,struct fd_fence * out_fence)199 fd_submit_sp_flush_prep(struct fd_submit *submit, int in_fence_fd,
200 struct fd_fence *out_fence)
201 {
202 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
203 bool has_shared = false;
204
205 finalize_current_cmd(submit->primary);
206
207 struct fd_ringbuffer_sp *primary =
208 to_fd_ringbuffer_sp(submit->primary);
209
210 for (unsigned i = 0; i < primary->u.nr_cmds; i++)
211 fd_submit_append_bo(fd_submit, primary->u.cmds[i].ring_bo);
212
213 out_fence->ufence = submit->fence;
214
215 simple_mtx_lock(&fence_lock);
216 for (unsigned i = 0; i < fd_submit->nr_bos; i++) {
217 fd_bo_add_fence(fd_submit->bos[i], out_fence);
218 has_shared |= fd_submit->bos[i]->alloc_flags & FD_BO_SHARED;
219 }
220 for (unsigned i = 0; i < fd_submit->nr_suballoc_bos; i++) {
221 fd_bo_add_fence(fd_submit->suballoc_bos[i], out_fence);
222 }
223 simple_mtx_unlock(&fence_lock);
224
225 fd_submit->out_fence = fd_fence_ref(out_fence);
226 fd_submit->in_fence_fd = (in_fence_fd == -1) ?
227 -1 : os_dupfd_cloexec(in_fence_fd);
228
229 return has_shared;
230 }
231
232 static void
fd_submit_sp_flush_execute(void * job,void * gdata,int thread_index)233 fd_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
234 {
235 struct fd_submit *submit = job;
236 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
237 struct fd_pipe *pipe = submit->pipe;
238
239 fd_submit->flush_submit_list(&fd_submit->submit_list);
240
241 pthread_mutex_lock(&flush_mtx);
242 assert(fd_fence_before(pipe->last_submit_fence, fd_submit->base.fence));
243 pipe->last_submit_fence = fd_submit->base.fence;
244 pthread_cond_broadcast(&flush_cnd);
245 pthread_mutex_unlock(&flush_mtx);
246
247 DEBUG_MSG("finish: %u", submit->fence);
248 }
249
250 static void
fd_submit_sp_flush_cleanup(void * job,void * gdata,int thread_index)251 fd_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
252 {
253 struct fd_submit *submit = job;
254 fd_submit_del(submit);
255 }
256
257 static void
flush_deferred_submits(struct fd_device * dev)258 flush_deferred_submits(struct fd_device *dev)
259 {
260 MESA_TRACE_FUNC();
261
262 simple_mtx_assert_locked(&dev->submit_lock);
263
264 if (list_is_empty(&dev->deferred_submits))
265 return;
266
267 struct fd_submit *submit = last_submit(&dev->deferred_submits);
268 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
269 list_replace(&dev->deferred_submits, &fd_submit->submit_list);
270 list_inithead(&dev->deferred_submits);
271 dev->deferred_cmds = 0;
272
273 /* If we have multiple submits with in-fence-fd's then merge them: */
274 foreach_submit (submit, &fd_submit->submit_list) {
275 struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit);
276
277 if (fd_deferred_submit == fd_submit)
278 break;
279
280 if (fd_deferred_submit->in_fence_fd != -1) {
281 sync_accumulate("freedreno",
282 &fd_submit->in_fence_fd,
283 fd_deferred_submit->in_fence_fd);
284 close(fd_deferred_submit->in_fence_fd);
285 fd_deferred_submit->in_fence_fd = -1;
286 }
287 }
288
289 fd_fence_del(dev->deferred_submits_fence);
290 dev->deferred_submits_fence = NULL;
291
292 struct util_queue_fence *fence = &fd_submit->out_fence->ready;
293
294 DEBUG_MSG("enqueue: %u", submit->fence);
295
296 if (fd_device_threaded_submit(submit->pipe->dev)) {
297 util_queue_add_job(&submit->pipe->dev->submit_queue,
298 submit, fence,
299 fd_submit_sp_flush_execute,
300 fd_submit_sp_flush_cleanup,
301 0);
302 } else {
303 fd_submit_sp_flush_execute(submit, NULL, 0);
304 fd_submit_sp_flush_cleanup(submit, NULL, 0);
305 }
306 }
307
308 static bool
should_defer(struct fd_submit * submit)309 should_defer(struct fd_submit *submit)
310 {
311 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
312
313 /* if too many bo's, it may not be worth the CPU cost of submit merging: */
314 if (fd_submit->nr_bos > 30)
315 return false;
316
317 /* On the kernel side, with 32K ringbuffer, we have an upper limit of 2k
318 * cmds before we exceed the size of the ringbuffer, which results in
319 * deadlock writing into the RB (ie. kernel doesn't finish writing into
320 * the RB so it doesn't kick the GPU to start consuming from the RB)
321 */
322 if (submit->pipe->dev->deferred_cmds > 128)
323 return false;
324
325 return true;
326 }
327
328 static struct fd_fence *
fd_submit_sp_flush(struct fd_submit * submit,int in_fence_fd,bool use_fence_fd)329 fd_submit_sp_flush(struct fd_submit *submit, int in_fence_fd, bool use_fence_fd)
330 {
331 struct fd_device *dev = submit->pipe->dev;
332 struct fd_pipe *pipe = submit->pipe;
333
334 MESA_TRACE_FUNC();
335
336 /* Acquire lock before flush_prep() because it is possible to race between
337 * this and pipe->flush():
338 */
339 simple_mtx_lock(&dev->submit_lock);
340
341 /* If there are deferred submits from another fd_pipe, flush them now,
342 * since we can't merge submits from different submitqueue's (ie. they
343 * could have different priority, etc)
344 */
345 if (!list_is_empty(&dev->deferred_submits) &&
346 (last_submit(&dev->deferred_submits)->pipe != submit->pipe)) {
347 flush_deferred_submits(dev);
348 }
349
350 list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
351
352 if (!dev->deferred_submits_fence)
353 dev->deferred_submits_fence = fd_fence_new(submit->pipe, use_fence_fd);
354
355 struct fd_fence *out_fence = fd_fence_ref(dev->deferred_submits_fence);
356
357 /* upgrade the out_fence for the deferred submits, if needed: */
358 if (use_fence_fd)
359 out_fence->use_fence_fd = true;
360
361 bool has_shared = fd_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
362
363 if ((in_fence_fd != -1) || out_fence->use_fence_fd)
364 pipe->no_implicit_sync = true;
365
366 /* The rule about skipping submit merging with shared buffers is only
367 * needed for implicit-sync.
368 */
369 if (pipe->no_implicit_sync)
370 has_shared = false;
371
372 assert(fd_fence_before(pipe->last_enqueue_fence, submit->fence));
373 pipe->last_enqueue_fence = submit->fence;
374
375 /* If we don't need an out-fence, we can defer the submit.
376 *
377 * TODO we could defer submits with in-fence as well.. if we took our own
378 * reference to the fd, and merged all the in-fence-fd's when we flush the
379 * deferred submits
380 */
381 if (!use_fence_fd && !has_shared && should_defer(submit)) {
382 DEBUG_MSG("defer: %u", submit->fence);
383 dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
384 assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
385 simple_mtx_unlock(&dev->submit_lock);
386
387 return out_fence;
388 }
389
390 flush_deferred_submits(dev);
391
392 simple_mtx_unlock(&dev->submit_lock);
393
394 return out_fence;
395 }
396
397 void
fd_pipe_sp_flush(struct fd_pipe * pipe,uint32_t fence)398 fd_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
399 {
400 struct fd_device *dev = pipe->dev;
401
402 if (!fd_fence_before(pipe->last_submit_fence, fence))
403 return;
404
405 MESA_TRACE_FUNC();
406
407 simple_mtx_lock(&dev->submit_lock);
408
409 assert(!fd_fence_after(fence, pipe->last_enqueue_fence));
410
411 flush_deferred_submits(dev);
412
413 simple_mtx_unlock(&dev->submit_lock);
414
415 if (!fd_device_threaded_submit(pipe->dev))
416 return;
417
418 /* Once we are sure that we've enqueued at least up to the requested
419 * submit, we need to be sure that submitq has caught up and flushed
420 * them to the kernel
421 */
422 pthread_mutex_lock(&flush_mtx);
423 while (fd_fence_before(pipe->last_submit_fence, fence)) {
424 pthread_cond_wait(&flush_cnd, &flush_mtx);
425 }
426 pthread_mutex_unlock(&flush_mtx);
427 }
428
429 static void
fd_submit_sp_destroy(struct fd_submit * submit)430 fd_submit_sp_destroy(struct fd_submit *submit)
431 {
432 struct fd_submit_sp *fd_submit = to_fd_submit_sp(submit);
433
434 if (fd_submit->suballoc_ring)
435 fd_ringbuffer_del(fd_submit->suballoc_ring);
436
437 _mesa_hash_table_destroy(fd_submit->bo_table, NULL);
438 _mesa_hash_table_destroy(fd_submit->suballoc_bo_table, NULL);
439
440 // TODO it would be nice to have a way to assert() if all
441 // rb's haven't been free'd back to the slab, because that is
442 // an indication that we are leaking bo's
443 slab_destroy_child(&fd_submit->ring_pool);
444
445 fd_bo_del_array(fd_submit->bos, fd_submit->nr_bos);
446 free(fd_submit->bos);
447
448 fd_bo_del_array(fd_submit->suballoc_bos, fd_submit->nr_suballoc_bos);
449 free(fd_submit->suballoc_bos);
450
451 if (fd_submit->out_fence)
452 fd_fence_del(fd_submit->out_fence);
453
454 free(fd_submit);
455 }
456
457 static const struct fd_submit_funcs submit_funcs = {
458 .new_ringbuffer = fd_submit_sp_new_ringbuffer,
459 .flush = fd_submit_sp_flush,
460 .destroy = fd_submit_sp_destroy,
461 };
462
463 struct fd_submit *
fd_submit_sp_new(struct fd_pipe * pipe,flush_submit_list_fn flush_submit_list)464 fd_submit_sp_new(struct fd_pipe *pipe, flush_submit_list_fn flush_submit_list)
465 {
466 struct fd_submit_sp *fd_submit = calloc(1, sizeof(*fd_submit));
467 struct fd_submit *submit;
468
469 fd_submit->bo_table = _mesa_pointer_hash_table_create(NULL);
470 fd_submit->suballoc_bo_table = _mesa_pointer_hash_table_create(NULL);
471
472 slab_create_child(&fd_submit->ring_pool, &pipe->ring_pool);
473
474 fd_submit->flush_submit_list = flush_submit_list;
475 fd_submit->seqno = seqno_next(&pipe->submit_seqno);
476
477 submit = &fd_submit->base;
478 submit->funcs = &submit_funcs;
479
480 return submit;
481 }
482
483 void
fd_pipe_sp_ringpool_init(struct fd_pipe * pipe)484 fd_pipe_sp_ringpool_init(struct fd_pipe *pipe)
485 {
486 // TODO tune size:
487 slab_create_parent(&pipe->ring_pool, sizeof(struct fd_ringbuffer_sp), 16);
488 }
489
490 void
fd_pipe_sp_ringpool_fini(struct fd_pipe * pipe)491 fd_pipe_sp_ringpool_fini(struct fd_pipe *pipe)
492 {
493 if (pipe->ring_pool.num_elements)
494 slab_destroy_parent(&pipe->ring_pool);
495 }
496
497 static void
finalize_current_cmd(struct fd_ringbuffer * ring)498 finalize_current_cmd(struct fd_ringbuffer *ring)
499 {
500 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
501
502 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
503 APPEND(&fd_ring->u, cmds,
504 (struct fd_cmd_sp){
505 .ring_bo = fd_bo_ref(fd_ring->ring_bo),
506 .size = offset_bytes(ring->cur, ring->start),
507 });
508 }
509
510 static void
fd_ringbuffer_sp_grow(struct fd_ringbuffer * ring,uint32_t size)511 fd_ringbuffer_sp_grow(struct fd_ringbuffer *ring, uint32_t size)
512 {
513 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
514 struct fd_pipe *pipe = fd_ring->u.submit->pipe;
515
516 assert(ring->flags & FD_RINGBUFFER_GROWABLE);
517
518 finalize_current_cmd(ring);
519
520 fd_bo_del(fd_ring->ring_bo);
521 fd_ring->ring_bo = fd_bo_new_ring(pipe->dev, size);
522
523 ring->start = fd_bo_map(fd_ring->ring_bo);
524 ring->end = &(ring->start[size / 4]);
525 ring->cur = ring->start;
526 ring->size = size;
527 }
528
529 static inline bool
fd_ringbuffer_references_bo(struct fd_ringbuffer * ring,struct fd_bo * bo)530 fd_ringbuffer_references_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
531 {
532 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
533
534 for (int i = 0; i < fd_ring->u.nr_reloc_bos; i++) {
535 if (fd_ring->u.reloc_bos[i] == bo)
536 return true;
537 }
538 return false;
539 }
540
541 static void
fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)542 fd_ringbuffer_sp_emit_bo_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
543 {
544 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
545
546 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
547 struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
548
549 fd_submit_append_bo(fd_submit, bo);
550 }
551
552 static void
fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer * ring,struct fd_bo * bo)553 fd_ringbuffer_sp_assert_attached_nonobj(struct fd_ringbuffer *ring, struct fd_bo *bo)
554 {
555 #ifndef NDEBUG
556 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
557 struct fd_submit_sp *fd_submit = to_fd_submit_sp(fd_ring->u.submit);
558 assert(check_append_bo(fd_submit, bo, true) != ~0);
559 #endif
560 }
561
562 static void
fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)563 fd_ringbuffer_sp_emit_bo_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
564 {
565 assert(ring->flags & _FD_RINGBUFFER_OBJECT);
566
567 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
568
569 /* Avoid emitting duplicate BO references into the list. Ringbuffer
570 * objects are long-lived, so this saves ongoing work at draw time in
571 * exchange for a bit at context setup/first draw. And the number of
572 * relocs per ringbuffer object is fairly small, so the O(n^2) doesn't
573 * hurt much.
574 */
575 if (!fd_ringbuffer_references_bo(ring, bo)) {
576 APPEND(&fd_ring->u, reloc_bos, fd_bo_ref(bo));
577 }
578 }
579
580 static void
fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer * ring,struct fd_bo * bo)581 fd_ringbuffer_sp_assert_attached_obj(struct fd_ringbuffer *ring, struct fd_bo *bo)
582 {
583 #ifndef NDEBUG
584 /* If the stateobj already references the bo, nothing more to do: */
585 if (fd_ringbuffer_references_bo(ring, bo))
586 return;
587
588 /* If not, we need to defer the assert.. because the batch resource
589 * tracking may have attached the bo to the submit that the stateobj
590 * will eventually be referenced by:
591 */
592 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
593 for (int i = 0; i < fd_ring->u.nr_assert_bos; i++)
594 if (fd_ring->u.assert_bos[i] == bo)
595 return;
596
597 APPEND(&fd_ring->u, assert_bos, fd_bo_ref(bo));
598 #endif
599 }
600
601 #define PTRSZ 64
602 #include "freedreno_ringbuffer_sp_reloc.h"
603 #undef PTRSZ
604 #define PTRSZ 32
605 #include "freedreno_ringbuffer_sp_reloc.h"
606 #undef PTRSZ
607
608 static uint32_t
fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer * ring)609 fd_ringbuffer_sp_cmd_count(struct fd_ringbuffer *ring)
610 {
611 if (ring->flags & FD_RINGBUFFER_GROWABLE)
612 return to_fd_ringbuffer_sp(ring)->u.nr_cmds + 1;
613 return 1;
614 }
615
616 static bool
fd_ringbuffer_sp_check_size(struct fd_ringbuffer * ring)617 fd_ringbuffer_sp_check_size(struct fd_ringbuffer *ring)
618 {
619 assert(!(ring->flags & _FD_RINGBUFFER_OBJECT));
620 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
621 struct fd_submit *submit = fd_ring->u.submit;
622
623 if (to_fd_submit_sp(submit)->nr_bos > MAX_ARRAY_SIZE/2) {
624 return false;
625 }
626
627 if (to_fd_submit_sp(submit)->nr_suballoc_bos > MAX_ARRAY_SIZE/2) {
628 return false;
629 }
630
631 return true;
632 }
633
634 static void
fd_ringbuffer_sp_destroy(struct fd_ringbuffer * ring)635 fd_ringbuffer_sp_destroy(struct fd_ringbuffer *ring)
636 {
637 struct fd_ringbuffer_sp *fd_ring = to_fd_ringbuffer_sp(ring);
638
639 fd_bo_del(fd_ring->ring_bo);
640
641 if (ring->flags & _FD_RINGBUFFER_OBJECT) {
642 fd_bo_del_array(fd_ring->u.reloc_bos, fd_ring->u.nr_reloc_bos);
643 free(fd_ring->u.reloc_bos);
644 #ifndef NDEBUG
645 fd_bo_del_array(fd_ring->u.assert_bos, fd_ring->u.nr_assert_bos);
646 free(fd_ring->u.assert_bos);
647 #endif
648 free(fd_ring);
649 } else {
650 struct fd_submit *submit = fd_ring->u.submit;
651
652 // TODO re-arrange the data structures so we can use fd_bo_del_array()
653 for (unsigned i = 0; i < fd_ring->u.nr_cmds; i++) {
654 fd_bo_del(fd_ring->u.cmds[i].ring_bo);
655 }
656 free(fd_ring->u.cmds);
657
658 slab_free(&to_fd_submit_sp(submit)->ring_pool, fd_ring);
659 }
660 }
661
662 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_32 = {
663 .grow = fd_ringbuffer_sp_grow,
664 .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
665 .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
666 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_32,
667 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
668 .cmd_count = fd_ringbuffer_sp_cmd_count,
669 .check_size = fd_ringbuffer_sp_check_size,
670 .destroy = fd_ringbuffer_sp_destroy,
671 };
672
673 static const struct fd_ringbuffer_funcs ring_funcs_obj_32 = {
674 .grow = fd_ringbuffer_sp_grow,
675 .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
676 .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
677 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_32,
678 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_32,
679 .cmd_count = fd_ringbuffer_sp_cmd_count,
680 .destroy = fd_ringbuffer_sp_destroy,
681 };
682
683 static const struct fd_ringbuffer_funcs ring_funcs_nonobj_64 = {
684 .grow = fd_ringbuffer_sp_grow,
685 .emit_bo = fd_ringbuffer_sp_emit_bo_nonobj,
686 .assert_attached = fd_ringbuffer_sp_assert_attached_nonobj,
687 .emit_reloc = fd_ringbuffer_sp_emit_reloc_nonobj_64,
688 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
689 .cmd_count = fd_ringbuffer_sp_cmd_count,
690 .check_size = fd_ringbuffer_sp_check_size,
691 .destroy = fd_ringbuffer_sp_destroy,
692 };
693
694 static const struct fd_ringbuffer_funcs ring_funcs_obj_64 = {
695 .grow = fd_ringbuffer_sp_grow,
696 .emit_bo = fd_ringbuffer_sp_emit_bo_obj,
697 .assert_attached = fd_ringbuffer_sp_assert_attached_obj,
698 .emit_reloc = fd_ringbuffer_sp_emit_reloc_obj_64,
699 .emit_reloc_ring = fd_ringbuffer_sp_emit_reloc_ring_64,
700 .cmd_count = fd_ringbuffer_sp_cmd_count,
701 .destroy = fd_ringbuffer_sp_destroy,
702 };
703
704 static inline struct fd_ringbuffer *
fd_ringbuffer_sp_init(struct fd_ringbuffer_sp * fd_ring,uint32_t size,enum fd_ringbuffer_flags flags)705 fd_ringbuffer_sp_init(struct fd_ringbuffer_sp *fd_ring, uint32_t size,
706 enum fd_ringbuffer_flags flags)
707 {
708 struct fd_ringbuffer *ring = &fd_ring->base;
709
710 assert(fd_ring->ring_bo);
711
712 uint8_t *base = fd_bo_map(fd_ring->ring_bo);
713 ring->start = (void *)(base + fd_ring->offset);
714 ring->end = &(ring->start[size / 4]);
715 ring->cur = ring->start;
716
717 ring->size = size;
718 ring->flags = flags;
719
720 if (flags & _FD_RINGBUFFER_OBJECT) {
721 if (fd_ring->u.pipe->is_64bit) {
722 ring->funcs = &ring_funcs_obj_64;
723 } else {
724 ring->funcs = &ring_funcs_obj_32;
725 }
726 } else {
727 if (fd_ring->u.submit->pipe->is_64bit) {
728 ring->funcs = &ring_funcs_nonobj_64;
729 } else {
730 ring->funcs = &ring_funcs_nonobj_32;
731 }
732 }
733
734 // TODO initializing these could probably be conditional on flags
735 // since unneed for FD_RINGBUFFER_STAGING case..
736 fd_ring->u.cmds = NULL;
737 fd_ring->u.nr_cmds = fd_ring->u.max_cmds = 0;
738
739 fd_ring->u.reloc_bos = NULL;
740 fd_ring->u.nr_reloc_bos = fd_ring->u.max_reloc_bos = 0;
741 #ifndef NDEBUG
742 fd_ring->u.assert_bos = NULL;
743 fd_ring->u.nr_assert_bos = fd_ring->u.max_assert_bos = 0;
744 #endif
745
746 return ring;
747 }
748
749 struct fd_ringbuffer *
fd_ringbuffer_sp_new_object(struct fd_pipe * pipe,uint32_t size)750 fd_ringbuffer_sp_new_object(struct fd_pipe *pipe, uint32_t size)
751 {
752 struct fd_device *dev = pipe->dev;
753 struct fd_ringbuffer_sp *fd_ring = malloc(sizeof(*fd_ring));
754
755 /* Lock access to the fd_pipe->suballoc_* since ringbuffer object allocation
756 * can happen both on the frontend (most CSOs) and the driver thread (a6xx
757 * cached tex state, for example)
758 */
759 simple_mtx_lock(&dev->suballoc_lock);
760
761 fd_ring->offset = align(dev->suballoc_offset, SUBALLOC_ALIGNMENT);
762 if (!dev->suballoc_bo ||
763 fd_ring->offset + size > fd_bo_size(dev->suballoc_bo)) {
764 if (dev->suballoc_bo)
765 fd_bo_del(dev->suballoc_bo);
766 dev->suballoc_bo =
767 fd_bo_new_ring(dev, MAX2(SUBALLOC_SIZE, align(size, os_page_size)));
768 fd_ring->offset = 0;
769 }
770
771 fd_ring->u.pipe = pipe;
772 fd_ring->ring_bo = fd_bo_ref(dev->suballoc_bo);
773 fd_ring->base.refcnt = 1;
774 fd_ring->u.last_submit_seqno = 0;
775
776 dev->suballoc_offset = fd_ring->offset + size;
777
778 simple_mtx_unlock(&dev->suballoc_lock);
779
780 return fd_ringbuffer_sp_init(fd_ring, size, _FD_RINGBUFFER_OBJECT);
781 }
782