xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_barrier.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #define FD_BO_NO_HARDPIN 1
7 
8 #include "freedreno_batch.h"
9 
10 #include "fd6_barrier.h"
11 #include "fd6_emit.h"
12 
13 template <chip CHIP>
14 void
fd6_emit_flushes(struct fd_context * ctx,struct fd_ringbuffer * ring,unsigned flushes)15 fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
16                  unsigned flushes)
17 {
18    /* Experiments show that invalidating CCU while it still has data in it
19     * doesn't work, so make sure to always flush before invalidating in case
20     * any data remains that hasn't yet been made available through a barrier.
21     * However it does seem to work for UCHE.
22     */
23    if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR))
24       fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_COLOR);
25 
26    if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH))
27       fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
28 
29    if (flushes & FD6_INVALIDATE_CCU_COLOR)
30       fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
31 
32    if (flushes & FD6_INVALIDATE_CCU_DEPTH)
33       fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
34 
35    if (flushes & FD6_FLUSH_CACHE)
36       fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
37 
38    if (flushes & FD6_INVALIDATE_CACHE)
39       fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
40 
41    if (flushes & FD6_WAIT_MEM_WRITES)
42       OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
43 
44    if (flushes & FD6_WAIT_FOR_IDLE)
45       OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
46 
47    if (flushes & FD6_WAIT_FOR_ME)
48       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
49 }
50 FD_GENX(fd6_emit_flushes);
51 
52 template <chip CHIP>
53 void
fd6_barrier_flush(struct fd_batch * batch)54 fd6_barrier_flush(struct fd_batch *batch)
55 {
56    fd6_emit_flushes<CHIP>(batch->ctx, batch->draw, batch->barrier);
57    batch->barrier = 0;
58 }
59 FD_GENX(fd6_barrier_flush);
60 
61 static void
add_flushes(struct pipe_context * pctx,unsigned flushes)62 add_flushes(struct pipe_context *pctx, unsigned flushes)
63    assert_dt
64 {
65    struct fd_context *ctx = fd_context(pctx);
66    struct fd_batch *batch = NULL;
67 
68    /* If there is an active compute/nondraw batch, that is the one
69     * we want to add the flushes to.  Ie. last op was a launch_grid,
70     * if the next one is a launch_grid then the barriers should come
71     * between them.  If the next op is a draw_vbo then the batch
72     * switch is a sufficient barrier so it doesn't really matter.
73     */
74    fd_batch_reference(&batch, ctx->batch_nondraw);
75    if (!batch)
76       fd_batch_reference(&batch, ctx->batch);
77 
78    /* A batch flush is already a sufficient barrier: */
79    if (!batch)
80       return;
81 
82    batch->barrier |= flushes;
83 
84    fd_batch_reference(&batch, NULL);
85 }
86 
87 static void
fd6_texture_barrier(struct pipe_context * pctx,unsigned flags)88 fd6_texture_barrier(struct pipe_context *pctx, unsigned flags)
89    in_dt
90 {
91    unsigned flushes = 0;
92 
93    if (flags & PIPE_TEXTURE_BARRIER_SAMPLER) {
94       /* If we are sampling from the fb, we could get away with treating
95        * this as a PIPE_TEXTURE_BARRIER_FRAMEBUFFER in sysmem mode, but
96        * that won't work out in gmem mode because we don't patch the tex
97        * state outside of the case that the frag shader tells us it is
98        * an fb-read.  And in particular, the fb-read case guarantees us
99        * that the read will be from the same texel, but the fb-bound-as-
100        * tex case does not.
101        *
102        * We could try to be clever here and detect if zsbuf/cbuf[n] is
103        * bound as a texture, but that doesn't really help if it is bound
104        * as a texture after the barrier without a lot of extra book-
105        * keeping.  So hopefully no one calls glTextureBarrierNV() just
106        * for lolz.
107        */
108       pctx->flush(pctx, NULL, 0);
109       return;
110    }
111 
112    if (flags & PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
113       flushes |= FD6_WAIT_FOR_IDLE | FD6_WAIT_FOR_ME |
114             FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH |
115             FD6_FLUSH_CACHE | FD6_INVALIDATE_CACHE;
116    }
117 
118    add_flushes(pctx, flushes);
119 }
120 
121 static void
fd6_memory_barrier(struct pipe_context * pctx,unsigned flags)122 fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
123    in_dt
124 {
125    unsigned flushes = 0;
126 
127    if (flags & (PIPE_BARRIER_SHADER_BUFFER |
128                 PIPE_BARRIER_CONSTANT_BUFFER |
129                 PIPE_BARRIER_VERTEX_BUFFER |
130                 PIPE_BARRIER_INDEX_BUFFER |
131                 PIPE_BARRIER_STREAMOUT_BUFFER)) {
132       flushes |= FD6_WAIT_FOR_IDLE;
133    }
134 
135    if (flags & (PIPE_BARRIER_TEXTURE |
136                 PIPE_BARRIER_IMAGE |
137                 PIPE_BARRIER_UPDATE_BUFFER |
138                 PIPE_BARRIER_UPDATE_TEXTURE)) {
139       flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
140    }
141 
142    if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
143       flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
144 
145      /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
146       * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
147       * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
148       * with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
149       * before draw opcodes that don't need it.
150       */
151       if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
152          flushes |= FD6_WAIT_FOR_ME;
153       }
154    }
155 
156    if (flags & PIPE_BARRIER_FRAMEBUFFER) {
157       fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
158    }
159 
160    add_flushes(pctx, flushes);
161 }
162 
163 void
fd6_barrier_init(struct pipe_context * pctx)164 fd6_barrier_init(struct pipe_context *pctx)
165 {
166    pctx->texture_barrier = fd6_texture_barrier;
167    pctx->memory_barrier = fd6_memory_barrier;
168 }
169