1 /*
2 * Copyright © 2023 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #define FD_BO_NO_HARDPIN 1
7
8 #include "freedreno_batch.h"
9
10 #include "fd6_barrier.h"
11 #include "fd6_emit.h"
12
13 template <chip CHIP>
14 void
fd6_emit_flushes(struct fd_context * ctx,struct fd_ringbuffer * ring,unsigned flushes)15 fd6_emit_flushes(struct fd_context *ctx, struct fd_ringbuffer *ring,
16 unsigned flushes)
17 {
18 /* Experiments show that invalidating CCU while it still has data in it
19 * doesn't work, so make sure to always flush before invalidating in case
20 * any data remains that hasn't yet been made available through a barrier.
21 * However it does seem to work for UCHE.
22 */
23 if (flushes & (FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR))
24 fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_COLOR);
25
26 if (flushes & (FD6_FLUSH_CCU_DEPTH | FD6_INVALIDATE_CCU_DEPTH))
27 fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
28
29 if (flushes & FD6_INVALIDATE_CCU_COLOR)
30 fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
31
32 if (flushes & FD6_INVALIDATE_CCU_DEPTH)
33 fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
34
35 if (flushes & FD6_FLUSH_CACHE)
36 fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
37
38 if (flushes & FD6_INVALIDATE_CACHE)
39 fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
40
41 if (flushes & FD6_WAIT_MEM_WRITES)
42 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
43
44 if (flushes & FD6_WAIT_FOR_IDLE)
45 OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
46
47 if (flushes & FD6_WAIT_FOR_ME)
48 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
49 }
50 FD_GENX(fd6_emit_flushes);
51
52 template <chip CHIP>
53 void
fd6_barrier_flush(struct fd_batch * batch)54 fd6_barrier_flush(struct fd_batch *batch)
55 {
56 fd6_emit_flushes<CHIP>(batch->ctx, batch->draw, batch->barrier);
57 batch->barrier = 0;
58 }
59 FD_GENX(fd6_barrier_flush);
60
61 static void
add_flushes(struct pipe_context * pctx,unsigned flushes)62 add_flushes(struct pipe_context *pctx, unsigned flushes)
63 assert_dt
64 {
65 struct fd_context *ctx = fd_context(pctx);
66 struct fd_batch *batch = NULL;
67
68 /* If there is an active compute/nondraw batch, that is the one
69 * we want to add the flushes to. Ie. last op was a launch_grid,
70 * if the next one is a launch_grid then the barriers should come
71 * between them. If the next op is a draw_vbo then the batch
72 * switch is a sufficient barrier so it doesn't really matter.
73 */
74 fd_batch_reference(&batch, ctx->batch_nondraw);
75 if (!batch)
76 fd_batch_reference(&batch, ctx->batch);
77
78 /* A batch flush is already a sufficient barrier: */
79 if (!batch)
80 return;
81
82 batch->barrier |= flushes;
83
84 fd_batch_reference(&batch, NULL);
85 }
86
87 static void
fd6_texture_barrier(struct pipe_context * pctx,unsigned flags)88 fd6_texture_barrier(struct pipe_context *pctx, unsigned flags)
89 in_dt
90 {
91 unsigned flushes = 0;
92
93 if (flags & PIPE_TEXTURE_BARRIER_SAMPLER) {
94 /* If we are sampling from the fb, we could get away with treating
95 * this as a PIPE_TEXTURE_BARRIER_FRAMEBUFFER in sysmem mode, but
96 * that won't work out in gmem mode because we don't patch the tex
97 * state outside of the case that the frag shader tells us it is
98 * an fb-read. And in particular, the fb-read case guarantees us
99 * that the read will be from the same texel, but the fb-bound-as-
100 * tex case does not.
101 *
102 * We could try to be clever here and detect if zsbuf/cbuf[n] is
103 * bound as a texture, but that doesn't really help if it is bound
104 * as a texture after the barrier without a lot of extra book-
105 * keeping. So hopefully no one calls glTextureBarrierNV() just
106 * for lolz.
107 */
108 pctx->flush(pctx, NULL, 0);
109 return;
110 }
111
112 if (flags & PIPE_TEXTURE_BARRIER_FRAMEBUFFER) {
113 flushes |= FD6_WAIT_FOR_IDLE | FD6_WAIT_FOR_ME |
114 FD6_FLUSH_CCU_COLOR | FD6_FLUSH_CCU_DEPTH |
115 FD6_FLUSH_CACHE | FD6_INVALIDATE_CACHE;
116 }
117
118 add_flushes(pctx, flushes);
119 }
120
121 static void
fd6_memory_barrier(struct pipe_context * pctx,unsigned flags)122 fd6_memory_barrier(struct pipe_context *pctx, unsigned flags)
123 in_dt
124 {
125 unsigned flushes = 0;
126
127 if (flags & (PIPE_BARRIER_SHADER_BUFFER |
128 PIPE_BARRIER_CONSTANT_BUFFER |
129 PIPE_BARRIER_VERTEX_BUFFER |
130 PIPE_BARRIER_INDEX_BUFFER |
131 PIPE_BARRIER_STREAMOUT_BUFFER)) {
132 flushes |= FD6_WAIT_FOR_IDLE;
133 }
134
135 if (flags & (PIPE_BARRIER_TEXTURE |
136 PIPE_BARRIER_IMAGE |
137 PIPE_BARRIER_UPDATE_BUFFER |
138 PIPE_BARRIER_UPDATE_TEXTURE)) {
139 flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
140 }
141
142 if (flags & PIPE_BARRIER_INDIRECT_BUFFER) {
143 flushes |= FD6_FLUSH_CACHE | FD6_WAIT_FOR_IDLE;
144
145 /* Various firmware bugs/inconsistencies mean that some indirect draw opcodes
146 * do not wait for WFI's to complete before executing. Add a WAIT_FOR_ME if
147 * pending for these opcodes. This may result in a few extra WAIT_FOR_ME's
148 * with these opcodes, but the alternative would add unnecessary WAIT_FOR_ME's
149 * before draw opcodes that don't need it.
150 */
151 if (fd_context(pctx)->screen->info->a6xx.indirect_draw_wfm_quirk) {
152 flushes |= FD6_WAIT_FOR_ME;
153 }
154 }
155
156 if (flags & PIPE_BARRIER_FRAMEBUFFER) {
157 fd6_texture_barrier(pctx, PIPE_TEXTURE_BARRIER_FRAMEBUFFER);
158 }
159
160 add_flushes(pctx, flushes);
161 }
162
163 void
fd6_barrier_init(struct pipe_context * pctx)164 fd6_barrier_init(struct pipe_context *pctx)
165 {
166 pctx->texture_barrier = fd6_texture_barrier;
167 pctx->memory_barrier = fd6_memory_barrier;
168 }
169