1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <[email protected]>
8 */
9
10 #ifndef FD6_EMIT_H
11 #define FD6_EMIT_H
12
13 #include "pipe/p_context.h"
14
15 #include "fd6_context.h"
16 #include "fd6_program.h"
17 #include "fdl/fd6_format_table.h"
18 #include "freedreno_context.h"
19 #include "freedreno_gpu_event.h"
20 #include "ir3_gallium.h"
21
22 struct fd_ringbuffer;
23
24 /* To collect all the state objects to emit in a single CP_SET_DRAW_STATE
25 * packet, the emit tracks a collection of however many state_group's that
26 * need to be emit'd.
27 */
28 enum fd6_state_id {
29 FD6_GROUP_PROG_CONFIG,
30 FD6_GROUP_PROG,
31 FD6_GROUP_PROG_BINNING,
32 FD6_GROUP_PROG_INTERP,
33 FD6_GROUP_PROG_FB_RAST,
34 FD6_GROUP_LRZ,
35 FD6_GROUP_VTXSTATE,
36 FD6_GROUP_VBO,
37 FD6_GROUP_CONST,
38 FD6_GROUP_DRIVER_PARAMS,
39 FD6_GROUP_PRIMITIVE_PARAMS,
40 FD6_GROUP_VS_TEX,
41 FD6_GROUP_HS_TEX,
42 FD6_GROUP_DS_TEX,
43 FD6_GROUP_GS_TEX,
44 FD6_GROUP_FS_TEX,
45 FD6_GROUP_RASTERIZER,
46 FD6_GROUP_ZSA,
47 FD6_GROUP_BLEND,
48 FD6_GROUP_SCISSOR,
49 FD6_GROUP_BLEND_COLOR,
50 FD6_GROUP_SAMPLE_LOCATIONS,
51 FD6_GROUP_SO,
52 FD6_GROUP_VS_BINDLESS,
53 FD6_GROUP_HS_BINDLESS,
54 FD6_GROUP_DS_BINDLESS,
55 FD6_GROUP_GS_BINDLESS,
56 FD6_GROUP_FS_BINDLESS,
57 FD6_GROUP_PRIM_MODE_SYSMEM,
58 FD6_GROUP_PRIM_MODE_GMEM,
59
60 /*
61 * Virtual state-groups, which don't turn into a CP_SET_DRAW_STATE group
62 */
63
64 FD6_GROUP_PROG_KEY, /* Set for any state which could change shader key */
65 FD6_GROUP_NON_GROUP, /* placeholder group for state emit in IB2, keep last */
66
67 /*
68 * Note that since we don't interleave draws and grids in the same batch,
69 * the compute vs draw state groups can overlap:
70 */
71 FD6_GROUP_CS_TEX = FD6_GROUP_VS_TEX,
72 FD6_GROUP_CS_BINDLESS = FD6_GROUP_VS_BINDLESS,
73 };
74
75 /**
76 * Pipeline type, Ie. is just plain old VS+FS (which can be high draw rate and
77 * should be a fast-path) or is it a pipeline that uses GS and/or tess to
78 * amplify geometry.
79 *
80 * TODO split GS and TESS?
81 */
82 enum fd6_pipeline_type {
83 NO_TESS_GS, /* Only has VS+FS */
84 HAS_TESS_GS, /* Has tess and/or GS */
85 };
86
87 #define ENABLE_ALL \
88 (CP_SET_DRAW_STATE__0_BINNING | CP_SET_DRAW_STATE__0_GMEM | \
89 CP_SET_DRAW_STATE__0_SYSMEM)
90 #define ENABLE_DRAW (CP_SET_DRAW_STATE__0_GMEM | CP_SET_DRAW_STATE__0_SYSMEM)
91
92 struct fd6_state_group {
93 struct fd_ringbuffer *stateobj;
94 enum fd6_state_id group_id;
95 /* enable_mask controls which states the stateobj is evaluated in,
96 * b0 is binning pass b1 and/or b2 is draw pass
97 */
98 uint32_t enable_mask;
99 };
100
101 struct fd6_state {
102 struct fd6_state_group groups[32];
103 unsigned num_groups;
104 };
105
106 static inline void
fd6_state_emit(struct fd6_state * state,struct fd_ringbuffer * ring)107 fd6_state_emit(struct fd6_state *state, struct fd_ringbuffer *ring)
108 {
109 if (!state->num_groups)
110 return;
111
112 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3 * state->num_groups);
113 for (unsigned i = 0; i < state->num_groups; i++) {
114 struct fd6_state_group *g = &state->groups[i];
115 unsigned n = g->stateobj ? fd_ringbuffer_size(g->stateobj) / 4 : 0;
116
117 assert((g->enable_mask & ~ENABLE_ALL) == 0);
118
119 if (n == 0) {
120 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
121 CP_SET_DRAW_STATE__0_DISABLE | g->enable_mask |
122 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
123 OUT_RING(ring, 0x00000000);
124 OUT_RING(ring, 0x00000000);
125 } else {
126 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(n) | g->enable_mask |
127 CP_SET_DRAW_STATE__0_GROUP_ID(g->group_id));
128 OUT_RB(ring, g->stateobj);
129 }
130
131 if (g->stateobj)
132 fd_ringbuffer_del(g->stateobj);
133 }
134 }
135
136 static inline unsigned
enable_mask(enum fd6_state_id group_id)137 enable_mask(enum fd6_state_id group_id)
138 {
139 switch (group_id) {
140 case FD6_GROUP_PROG: return ENABLE_DRAW;
141 case FD6_GROUP_PROG_BINNING: return CP_SET_DRAW_STATE__0_BINNING;
142 case FD6_GROUP_PROG_INTERP: return ENABLE_DRAW;
143 case FD6_GROUP_FS_TEX: return ENABLE_DRAW;
144 case FD6_GROUP_FS_BINDLESS: return ENABLE_DRAW;
145 case FD6_GROUP_PRIM_MODE_SYSMEM: return CP_SET_DRAW_STATE__0_SYSMEM | CP_SET_DRAW_STATE__0_BINNING;
146 case FD6_GROUP_PRIM_MODE_GMEM: return CP_SET_DRAW_STATE__0_GMEM;
147 default: return ENABLE_ALL;
148 }
149 }
150
151 static inline void
fd6_state_take_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)152 fd6_state_take_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
153 enum fd6_state_id group_id)
154 {
155 assert(state->num_groups < ARRAY_SIZE(state->groups));
156 struct fd6_state_group *g = &state->groups[state->num_groups++];
157 g->stateobj = stateobj;
158 g->group_id = group_id;
159 g->enable_mask = enable_mask(group_id);
160 }
161
162 static inline void
fd6_state_add_group(struct fd6_state * state,struct fd_ringbuffer * stateobj,enum fd6_state_id group_id)163 fd6_state_add_group(struct fd6_state *state, struct fd_ringbuffer *stateobj,
164 enum fd6_state_id group_id)
165 {
166 fd6_state_take_group(state, fd_ringbuffer_ref(stateobj), group_id);
167 }
168
169 /* grouped together emit-state for prog/vertex/state emit: */
170 struct fd6_emit {
171 struct fd_context *ctx;
172 const struct pipe_draw_info *info;
173 const struct pipe_draw_indirect_info *indirect;
174 const struct pipe_draw_start_count_bias *draw;
175 uint32_t dirty_groups;
176
177 uint32_t sprite_coord_enable; /* bitmask */
178 bool sprite_coord_mode : 1;
179 bool rasterflat : 1;
180 bool primitive_restart : 1;
181 uint8_t streamout_mask;
182 uint32_t draw_id;
183
184 /* cached to avoid repeated lookups: */
185 const struct fd6_program_state *prog;
186
187 const struct ir3_shader_variant *vs;
188 const struct ir3_shader_variant *hs;
189 const struct ir3_shader_variant *ds;
190 const struct ir3_shader_variant *gs;
191 const struct ir3_shader_variant *fs;
192
193 struct fd6_state state;
194 };
195
196 static inline const struct fd6_program_state *
fd6_emit_get_prog(struct fd6_emit * emit)197 fd6_emit_get_prog(struct fd6_emit *emit)
198 {
199 return emit->prog;
200 }
201
202 template <chip CHIP>
203 static inline void
__event_write(struct fd_ringbuffer * ring,enum fd_gpu_event event,enum event_write_src esrc,enum event_write_dst edst,uint32_t val,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)204 __event_write(struct fd_ringbuffer *ring, enum fd_gpu_event event,
205 enum event_write_src esrc, enum event_write_dst edst, uint32_t val,
206 struct fd_bo *bo, uint32_t offset, uint64_t orval, int32_t shift)
207 {
208 struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
209 unsigned len = info.needs_seqno ? 4 : 1;
210
211 if (CHIP == A6XX) {
212 OUT_PKT7(ring, CP_EVENT_WRITE, len);
213 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(info.raw_event) |
214 COND(info.needs_seqno, CP_EVENT_WRITE_0_TIMESTAMP));
215 } else if (CHIP == A7XX) {
216 if (event == FD_RB_DONE)
217 len--;
218 OUT_PKT7(ring, CP_EVENT_WRITE, len);
219 OUT_RING(ring, CP_EVENT_WRITE7_0_EVENT(info.raw_event) |
220 CP_EVENT_WRITE7_0_WRITE_SRC(esrc) |
221 CP_EVENT_WRITE7_0_WRITE_DST(edst) |
222 COND(info.needs_seqno, CP_EVENT_WRITE7_0_WRITE_ENABLED));
223 }
224
225 if (info.needs_seqno) {
226 OUT_RELOC(ring, bo, offset, orval, shift); /* ADDR_LO/HI */
227 if (len == 4)
228 OUT_RING(ring, val);
229 }
230 }
231
232 template <chip CHIP>
233 static inline void
fd6_record_ts(struct fd_ringbuffer * ring,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)234 fd6_record_ts(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
235 uint64_t orval, int32_t shift)
236 {
237 __event_write<CHIP>(ring, FD_RB_DONE, EV_WRITE_ALWAYSON, EV_DST_RAM, 0,
238 bo, offset, orval, shift);
239 }
240
241 template <chip CHIP>
242 static inline void
fd6_fence_write(struct fd_ringbuffer * ring,uint32_t val,struct fd_bo * bo,uint32_t offset,uint64_t orval,int32_t shift)243 fd6_fence_write(struct fd_ringbuffer *ring, uint32_t val, struct fd_bo *bo,
244 uint32_t offset, uint64_t orval, int32_t shift)
245 {
246 __event_write<CHIP>(ring, FD_CACHE_CLEAN, EV_WRITE_USER_32B, EV_DST_RAM, val,
247 bo, offset, orval, shift);
248 }
249
250 template <chip CHIP>
251 static inline unsigned
fd6_event_write(struct fd_context * ctx,struct fd_ringbuffer * ring,enum fd_gpu_event event)252 fd6_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, enum fd_gpu_event event)
253 {
254 struct fd6_context *fd6_ctx = fd6_context(ctx);
255 struct fd_gpu_event_info info = fd_gpu_events<CHIP>[event];
256 unsigned seqno = 0;
257
258 if (info.needs_seqno) {
259 struct fd6_context *fd6_ctx = fd6_context(ctx);
260 seqno = ++fd6_ctx->seqno;
261 }
262
263 __event_write<CHIP>(ring, event, EV_WRITE_USER_32B, EV_DST_RAM, seqno,
264 control_ptr(fd6_ctx, seqno));
265
266 return seqno;
267 }
268
269 template <chip CHIP>
270 static inline void
fd6_cache_inv(struct fd_context * ctx,struct fd_ringbuffer * ring)271 fd6_cache_inv(struct fd_context *ctx, struct fd_ringbuffer *ring)
272 {
273 fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_COLOR);
274 fd6_event_write<CHIP>(ctx, ring, FD_CCU_INVALIDATE_DEPTH);
275 fd6_event_write<CHIP>(ctx, ring, FD_CACHE_INVALIDATE);
276 }
277
278 template <chip CHIP>
279 static inline void
fd6_cache_flush(struct fd_context * ctx,struct fd_ringbuffer * ring)280 fd6_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
281 {
282 struct fd6_context *fd6_ctx = fd6_context(ctx);
283 unsigned seqno;
284
285 seqno = fd6_event_write<CHIP>(ctx, ring, FD_RB_DONE);
286
287 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
288 OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
289 CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
290 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
291 OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
292 OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
293 OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
294
295 seqno = fd6_event_write<CHIP>(ctx, ring, FD_CACHE_CLEAN);
296
297 OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
298 OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
299 OUT_RELOC(ring, control_ptr(fd6_ctx, seqno));
300 OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
301 }
302
303 template <chip CHIP>
304 static inline void
fd6_emit_blit(struct fd_context * ctx,struct fd_ringbuffer * ring)305 fd6_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring)
306 {
307 emit_marker6(ring, 7);
308 fd6_event_write<CHIP>(ctx, ring, FD_BLIT);
309 emit_marker6(ring, 7);
310 }
311
312 static inline bool
fd6_geom_stage(gl_shader_stage type)313 fd6_geom_stage(gl_shader_stage type)
314 {
315 switch (type) {
316 case MESA_SHADER_VERTEX:
317 case MESA_SHADER_TESS_CTRL:
318 case MESA_SHADER_TESS_EVAL:
319 case MESA_SHADER_GEOMETRY:
320 return true;
321 case MESA_SHADER_FRAGMENT:
322 case MESA_SHADER_COMPUTE:
323 case MESA_SHADER_KERNEL:
324 return false;
325 default:
326 unreachable("bad shader type");
327 }
328 }
329
330 static inline uint32_t
fd6_stage2opcode(gl_shader_stage type)331 fd6_stage2opcode(gl_shader_stage type)
332 {
333 return fd6_geom_stage(type) ? CP_LOAD_STATE6_GEOM : CP_LOAD_STATE6_FRAG;
334 }
335
336 static inline enum a6xx_state_block
fd6_stage2shadersb(gl_shader_stage type)337 fd6_stage2shadersb(gl_shader_stage type)
338 {
339 switch (type) {
340 case MESA_SHADER_VERTEX:
341 return SB6_VS_SHADER;
342 case MESA_SHADER_TESS_CTRL:
343 return SB6_HS_SHADER;
344 case MESA_SHADER_TESS_EVAL:
345 return SB6_DS_SHADER;
346 case MESA_SHADER_GEOMETRY:
347 return SB6_GS_SHADER;
348 case MESA_SHADER_FRAGMENT:
349 return SB6_FS_SHADER;
350 case MESA_SHADER_COMPUTE:
351 case MESA_SHADER_KERNEL:
352 return SB6_CS_SHADER;
353 default:
354 unreachable("bad shader type");
355 return (enum a6xx_state_block)~0;
356 }
357 }
358
359 static inline enum a6xx_tess_spacing
fd6_gl2spacing(enum gl_tess_spacing spacing)360 fd6_gl2spacing(enum gl_tess_spacing spacing)
361 {
362 switch (spacing) {
363 case TESS_SPACING_EQUAL:
364 return TESS_EQUAL;
365 case TESS_SPACING_FRACTIONAL_ODD:
366 return TESS_FRACTIONAL_ODD;
367 case TESS_SPACING_FRACTIONAL_EVEN:
368 return TESS_FRACTIONAL_EVEN;
369 case TESS_SPACING_UNSPECIFIED:
370 default:
371 unreachable("spacing must be specified");
372 }
373 }
374
375 template <chip CHIP, fd6_pipeline_type PIPELINE>
376 void fd6_emit_3d_state(struct fd_ringbuffer *ring,
377 struct fd6_emit *emit) assert_dt;
378
379 struct fd6_compute_state;
380 template <chip CHIP>
381 void fd6_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
382 struct fd6_compute_state *cs) assert_dt;
383
384 template <chip CHIP>
385 void fd6_emit_ccu_cntl(struct fd_ringbuffer *ring, struct fd_screen *screen, bool gmem);
386
387 template <chip CHIP>
388 void fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring);
389
390 void fd6_emit_init_screen(struct pipe_screen *pscreen);
391
392 static inline void
fd6_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)393 fd6_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
394 {
395 emit_marker6(ring, 6);
396 __OUT_IB5(ring, target);
397 emit_marker6(ring, 6);
398 }
399
400 #define WRITE(reg, val) \
401 do { \
402 OUT_PKT4(ring, reg, 1); \
403 OUT_RING(ring, val); \
404 } while (0)
405
406 #endif /* FD6_EMIT_H */
407