xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_emit.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #ifndef FD6_EMIT_H
11 #define FD6_EMIT_H
12 
13 #include "pipe/p_context.h"
14 
15 #include "fd6_context.h"
16 #include "fd6_program.h"
17 #include "fdl/fd6_format_table.h"
18 #include "freedreno_context.h"
19 #include "freedreno_gpu_event.h"
20 #include "ir3_gallium.h"
21 
22 struct fd_ringbuffer;
23 
24 /* To collect all the state objects to emit in a single CP_SET_DRAW_STATE
25  * packet, the emit tracks a collection of however many state_group's that
26  * need to be emit'd.
27  */
28 enum fd6_state_id {
29    FD6_GROUP_PROG_CONFIG,
30    FD6_GROUP_PROG,
31    FD6_GROUP_PROG_BINNING,
32    FD6_GROUP_PROG_INTERP,
33    FD6_GROUP_PROG_FB_RAST,
34    FD6_GROUP_LRZ,
35    FD6_GROUP_VTXSTATE,
36    FD6_GROUP_VBO,
37    FD6_GROUP_CONST,
38    FD6_GROUP_DRIVER_PARAMS,
39    FD6_GROUP_PRIMITIVE_PARAMS,
40    FD6_GROUP_VS_TEX,
41    FD6_GROUP_HS_TEX,
42    FD6_GROUP_DS_TEX,
43    FD6_GROUP_GS_TEX,
44    FD6_GROUP_FS_TEX,
45    FD6_GROUP_RASTERIZER,
46    FD6_GROUP_ZSA,
47    FD6_GROUP_BLEND,
48    FD6_GROUP_SCISSOR,
49    FD6_GROUP_BLEND_COLOR,
50    FD6_GROUP_SAMPLE_LOCATIONS,
51    FD6_GROUP_SO,
52    FD6_GROUP_VS_BINDLESS,
53    FD6_GROUP_HS_BINDLESS,
54    FD6_GROUP_DS_BINDLESS,
55    FD6_GROUP_GS_BINDLESS,
56    FD6_GROUP_FS_BINDLESS,
57    FD6_GROUP_PRIM_MODE_SYSMEM,
58    FD6_GROUP_PRIM_MODE_GMEM,
59 
60    /*
61     * Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group
62     */
63 
64    FD6_GROUP_PROG_KEY,  /* Set for any state which could change shader key */
65    FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
66 
67    /*
68     * Note that since we don't interleave draws and grids in the same batch,
69     * the compute vs draw state groups can overlap:
70     */
71    FD6_GROUP_CS_TEX = FD6_GROUP_VS_TEX,
72    FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS,
73 };
74 
75 /**
76  * Pipeline type, Ie. is just plain old VS+FS (which can be high draw rate and
77  * should be a fast-path) or is it a pipeline that uses GS and/or tess to
78  * amplify geometry.
79  *
80  * TODO split GS and TESS?
81  */
82 enum fd6_pipeline_type {
83    NO_TESS_GS,   /* Only has VS+FS */
84    HAS_TESS_GS,  /* Has tess and/or GS */
85 };
86 
87 #define ENABLE_ALL                                                             \
88    (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM |                 \
89     CP_SET_DRAW_STATE__0_SYSMEM)
90 #define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
91 
92 struct fd6_state_group {
93    struct fd_ringbuffer *stateobj;
94    enum fd6_state_id group_id;
95    /* enable_mask controls which states the stateobj is evaluated in,
96     * b0 is binning pass b1 and/or b2 is draw pass
97     */
98    uint32_t enable_mask;
99 };
100 
101 struct fd6_state {
102    struct fd6_state_group groups[32];
103    unsigned num_groups;
104 };
105 
106 static inline void
fd6_state_emit(struct fd6_state * state,struct fd_ringbuffer * ring)107 fd6_state_emit(struct fd6_state *state, struct fd_ringbuffer *ring)
108 {
109    if (!state->num_groups)
110       return;
111 
112    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * state->num_groups);
113    for (unsigned i = 0; i < state->num_groups; i++) {
114       struct fd6_state_group *g = &state->groups[i];
115       unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
116 
117       assert((g->enable_mask & ~ENABLE_ALL) == 0);
118 
119       if (n == 0) {
120          OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
121                         CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
122                         CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
123          OUT_RING(ring, 0x00000000);
124          OUT_RING(ring, 0x00000000);
125       } else {
126          OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
127                         CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
128          OUT_RB(ring, g->stateobj);
129       }
130 
131       if (g->stateobj)
132          fd_ringbuffer_del(g->stateobj);
133    }
134 }
135 
136 static inline unsigned
enable_mask(enum fd6_state_id group_id)137 enable_mask(enum fd6_state_id group_id)
138 {
139    switch (group_id) {
140    case FD6_GROUP_PROG: return ENABLE_DRAW;
141    case FD6_GROUP_PROG_BINNING: return CP_SET_DRAW_STATE__0_BINNING;
142    case FD6_GROUP_PROG_INTERP: return ENABLE_DRAW;
143    case FD6_GROUP_FS_TEX: return ENABLE_DRAW;
144    case FD6_GROUP_FS_BINDLESS: return ENABLE_DRAW;
145    case FD6_GROUP_PRIM_MODE_SYSMEM: return CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
146    case FD6_GROUP_PRIM_MODE_GMEM: return CP_SET_DRAW_STATE__0_GMEM;
147    default: return ENABLE_ALL;
148    }
149 }
150 
151 static inline void
fd6_state_take_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)152 fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
153                      enum fd6_state_id group_id)
154 {
155    assert(state->num_groups < ARRAY_SIZE(state->groups));
156    struct fd6_state_group *g = &state->groups[state->num_groups++];
157    g->stateobj = stateobj;
158    g->group_id = group_id;
159    g->enable_mask = enable_mask(group_id);
160 }
161 
162 static inline void
fd6_state_add_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)163 fd6_state_add_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
164                     enum fd6_state_id group_id)
165 {
166    fd6_state_take_group(state, fd_ringbuffer_ref(stateobj), group_id);
167 }
168 
169 /* grouped together emit-state for prog/vertex/state emit: */
170 struct fd6_emit {
171    struct fd_context *ctx;
172    const struct pipe_draw_info *info;
173    const struct pipe_draw_indirect_info *indirect;
174    const struct pipe_draw_start_count_bias *draw;
175    uint32_t dirty_groups;
176 
177    uint32_t sprite_coord_enable; /* bitmask */
178    bool sprite_coord_mode : 1;
179    bool rasterflat : 1;
180    bool primitive_restart : 1;
181    uint8_t streamout_mask;
182    uint32_t draw_id;
183 
184    /* cached to avoid repeated lookups: */
185    const struct fd6_program_state *prog;
186 
187    const struct ir3_shader_variant *vs;
188    const struct ir3_shader_variant *hs;
189    const struct ir3_shader_variant *ds;
190    const struct ir3_shader_variant *gs;
191    const struct ir3_shader_variant *fs;
192 
193    struct fd6_state state;
194 };
195 
196 static inline const struct fd6_program_state *
fd6_emit_get_prog(struct fd6_emit * emit)197 fd6_emit_get_prog(struct fd6_emit *emit)
198 {
199    return emit->prog;
200 }
201 
202 template <chip CHIP>
203 static inline void
__event_write(struct fd_ringbuffer * ring,enum fd_gpu_event event,enum event_write_src esrc,enum event_write_dst edst,uint32_t val,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)204 __event_write(struct fd_ringbuffer *ring, enum fd_gpu_event event,
205               enum event_write_src esrc, enum event_write_dst edst, uint32_t val,
206               struct fd_bo *bo, uint32_t offset, uint64_t orval, int32_t shift)
207 {
208    struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
209    unsigned len = info.needs_seqno ? 4 : 1;
210 
211    if (CHIP == A6XX) {
212       OUT_PKT7(ring, CP_EVENT_WRITE, len);
213       OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(info.raw_event) |
214                COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP));
215    } else if (CHIP == A7XX) {
216       if (event == FD_RB_DONE)
217          len--;
218       OUT_PKT7(ring, CP_EVENT_WRITE, len);
219       OUT_RING(ring, CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
220                CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
221                CP_EVENT_WRITE7_0_WRITE_DST(edst) |
222                COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED));
223    }
224 
225    if (info.needs_seqno) {
226       OUT_RELOC(ring, bo, offset, orval, shift); /* ADDR_LO/HI */
227       if (len == 4)
228          OUT_RING(ring, val);
229    }
230 }
231 
232 template <chip CHIP>
233 static inline void
fd6_record_ts(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)234 fd6_record_ts(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
235               uint64_t orval, int32_t shift)
236 {
237    __event_write<CHIP>(ring, FD_RB_DONE, EV_WRITE_ALWAYSON, EV_DST_RAM, 0,
238                        bo, offset, orval, shift);
239 }
240 
241 template <chip CHIP>
242 static inline void
fd6_fence_write(struct fd_ringbuffer * ring,uint32_t val,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)243 fd6_fence_write(struct fd_ringbuffer *ring, uint32_t val, struct fd_bo *bo,
244                 uint32_t offset, uint64_t orval, int32_t shift)
245 {
246    __event_write<CHIP>(ring, FD_CACHE_CLEAN, EV_WRITE_USER_32B, EV_DST_RAM, val,
247                        bo, offset, orval, shift);
248 }
249 
250 template <chip CHIP>
251 static inline unsigned
fd6_event_write(struct fd_context * ctx,struct fd_ringbuffer * ring,enum fd_gpu_event event)252 fd6_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_gpu_event event)
253 {
254    struct fd6_context *fd6_ctx = fd6_context(ctx);
255    struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
256    unsigned seqno = 0;
257 
258    if (info.needs_seqno) {
259       struct fd6_context *fd6_ctx = fd6_context(ctx);
260       seqno = ++fd6_ctx->seqno;
261    }
262 
263    __event_write<CHIP>(ring, event, EV_WRITE_USER_32B, EV_DST_RAM, seqno,
264                        control_ptr(fd6_ctx, seqno));
265 
266    return seqno;
267 }
268 
269 template <chip CHIP>
270 static inline void
fd6_cache_inv(struct fd_context * ctx,struct fd_ringbuffer * ring)271 fd6_cache_inv(struct fd_context *ctx, struct fd_ringbuffer *ring)
272 {
273    fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
274    fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
275    fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
276 }
277 
278 template <chip CHIP>
279 static inline void
fd6_cache_flush(struct fd_context * ctx,struct fd_ringbuffer * ring)280 fd6_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
281 {
282    struct fd6_context *fd6_ctx = fd6_context(ctx);
283    unsigned seqno;
284 
285    seqno = fd6_event_write<CHIP>(ctx, ring, FD_RB_DONE);
286 
287    OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
288    OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
289                      CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
290    OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
291    OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
292    OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
293    OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
294 
295    seqno = fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
296 
297    OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
298    OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
299    OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
300    OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
301 }
302 
303 template <chip CHIP>
304 static inline void
fd6_emit_blit(struct fd_context * ctx,struct fd_ringbuffer * ring)305 fd6_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring)
306 {
307    emit_marker6(ring, 7);
308    fd6_event_write<CHIP>(ctx, ring, FD_BLIT);
309    emit_marker6(ring, 7);
310 }
311 
312 static inline bool
fd6_geom_stage(gl_shader_stage type)313 fd6_geom_stage(gl_shader_stage type)
314 {
315    switch (type) {
316    case MESA_SHADER_VERTEX:
317    case MESA_SHADER_TESS_CTRL:
318    case MESA_SHADER_TESS_EVAL:
319    case MESA_SHADER_GEOMETRY:
320       return true;
321    case MESA_SHADER_FRAGMENT:
322    case MESA_SHADER_COMPUTE:
323    case MESA_SHADER_KERNEL:
324       return false;
325    default:
326       unreachable("bad shader type");
327    }
328 }
329 
330 static inline uint32_t
fd6_stage2opcode(gl_shader_stage type)331 fd6_stage2opcode(gl_shader_stage type)
332 {
333    return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
334 }
335 
336 static inline enum a6xx_state_block
fd6_stage2shadersb(gl_shader_stage type)337 fd6_stage2shadersb(gl_shader_stage type)
338 {
339    switch (type) {
340    case MESA_SHADER_VERTEX:
341       return SB6_VS_SHADER;
342    case MESA_SHADER_TESS_CTRL:
343       return SB6_HS_SHADER;
344    case MESA_SHADER_TESS_EVAL:
345       return SB6_DS_SHADER;
346    case MESA_SHADER_GEOMETRY:
347       return SB6_GS_SHADER;
348    case MESA_SHADER_FRAGMENT:
349       return SB6_FS_SHADER;
350    case MESA_SHADER_COMPUTE:
351    case MESA_SHADER_KERNEL:
352       return SB6_CS_SHADER;
353    default:
354       unreachable("bad shader type");
355       return (enum a6xx_state_block)~0;
356    }
357 }
358 
359 static inline enum a6xx_tess_spacing
fd6_gl2spacing(enum gl_tess_spacing spacing)360 fd6_gl2spacing(enum gl_tess_spacing spacing)
361 {
362    switch (spacing) {
363    case TESS_SPACING_EQUAL:
364       return TESS_EQUAL;
365    case TESS_SPACING_FRACTIONAL_ODD:
366       return TESS_FRACTIONAL_ODD;
367    case TESS_SPACING_FRACTIONAL_EVEN:
368       return TESS_FRACTIONAL_EVEN;
369    case TESS_SPACING_UNSPECIFIED:
370    default:
371       unreachable("spacing must be specified");
372    }
373 }
374 
375 template <chip CHIP, fd6_pipeline_type PIPELINE>
376 void fd6_emit_3d_state(struct fd_ringbuffer *ring,
377                        struct fd6_emit *emit) assert_dt;
378 
379 struct fd6_compute_state;
380 template <chip CHIP>
381 void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
382                        struct fd6_compute_state *cs) assert_dt;
383 
384 template <chip CHIP>
385 void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
386 
387 template <chip CHIP>
388 void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
389 
390 void fd6_emit_init_screen(struct pipe_screen *pscreen);
391 
392 static inline void
fd6_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)393 fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
394 {
395    emit_marker6(ring, 6);
396    __OUT_IB5(ring, target);
397    emit_marker6(ring, 6);
398 }
399 
400 #define WRITE(reg, val)                                                        \
401    do {                                                                        \
402       OUT_PKT4(ring, reg, 1);                                                  \
403       OUT_RING(ring, val);                                                     \
404    } while (0)
405 
406 #endif /* FD6_EMIT_H */
407