1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <[email protected]>
8 */
9
10 #define FD_BO_NO_HARDPIN 1
11
12 #include "pipe/p_state.h"
13 #include "util/u_memory.h"
14 #include "util/u_prim.h"
15 #include "util/u_string.h"
16
17 #include "freedreno_blitter.h"
18 #include "freedreno_resource.h"
19 #include "freedreno_state.h"
20
21 #include "fd6_barrier.h"
22 #include "fd6_blend.h"
23 #include "fd6_blitter.h"
24 #include "fd6_context.h"
25 #include "fd6_draw.h"
26 #include "fd6_emit.h"
27 #include "fd6_program.h"
28 #include "fd6_vsc.h"
29 #include "fd6_zsa.h"
30
31 #include "fd6_pack.h"
32
33 enum draw_type {
34 DRAW_DIRECT_OP_NORMAL,
35 DRAW_DIRECT_OP_INDEXED,
36 DRAW_INDIRECT_OP_XFB,
37 DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED,
38 DRAW_INDIRECT_OP_INDIRECT_COUNT,
39 DRAW_INDIRECT_OP_INDEXED,
40 DRAW_INDIRECT_OP_NORMAL,
41 };
42
43 static inline bool
is_indirect(enum draw_type type)44 is_indirect(enum draw_type type)
45 {
46 return type >= DRAW_INDIRECT_OP_XFB;
47 }
48
49 static inline bool
is_indexed(enum draw_type type)50 is_indexed(enum draw_type type)
51 {
52 switch (type) {
53 case DRAW_DIRECT_OP_INDEXED:
54 case DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED:
55 case DRAW_INDIRECT_OP_INDEXED:
56 return true;
57 default:
58 return false;
59 }
60 }
61
62 static void
draw_emit_xfb(struct fd_ringbuffer * ring,struct CP_DRAW_INDX_OFFSET_0 * draw0,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect)63 draw_emit_xfb(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
64 const struct pipe_draw_info *info,
65 const struct pipe_draw_indirect_info *indirect)
66 {
67 struct fd_stream_output_target *target =
68 fd_stream_output_target(indirect->count_from_stream_output);
69 struct fd_resource *offset = fd_resource(target->offset_buf);
70
71 OUT_PKT7(ring, CP_DRAW_AUTO, 6);
72 OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
73 OUT_RING(ring, info->instance_count);
74 OUT_RELOC(ring, offset->bo, 0, 0, 0);
75 OUT_RING(
76 ring,
77 0); /* byte counter offset subtraced from the value read from above */
78 OUT_RING(ring, target->stride);
79 }
80
81 static inline unsigned
max_indices(const struct pipe_draw_info * info,unsigned index_offset)82 max_indices(const struct pipe_draw_info *info, unsigned index_offset)
83 {
84 struct pipe_resource *idx = info->index.resource;
85
86 assert((info->index_size == 1) ||
87 (info->index_size == 2) ||
88 (info->index_size == 4));
89
90 /* Conceptually we divide by the index_size. But if we had
91 * log2(index_size) we could convert that into a right-shift
92 * instead. Conveniently the index_size will only be 1, 2,
93 * or 4. And dividing by two (right-shift by one) gives us
94 * the same answer for those three values. So instead of
95 * divide we can do two right-shifts.
96 */
97 unsigned index_size_shift = info->index_size >> 1;
98 return (idx->width0 - index_offset) >> index_size_shift;
99 }
100
101 template <draw_type DRAW>
102 static void
draw_emit_indirect(struct fd_context * ctx,struct fd_ringbuffer * ring,struct CP_DRAW_INDX_OFFSET_0 * draw0,const struct pipe_draw_info * info,const struct pipe_draw_indirect_info * indirect,unsigned index_offset,uint32_t driver_param)103 draw_emit_indirect(struct fd_context *ctx,
104 struct fd_ringbuffer *ring,
105 struct CP_DRAW_INDX_OFFSET_0 *draw0,
106 const struct pipe_draw_info *info,
107 const struct pipe_draw_indirect_info *indirect,
108 unsigned index_offset, uint32_t driver_param)
109 {
110 struct fd_resource *ind = fd_resource(indirect->buffer);
111
112 if (DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED) {
113 OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 11);
114 OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
115 OUT_RING(ring,
116 (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT_INDEXED)
117 | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
118 struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
119 struct pipe_resource *idx = info->index.resource;
120 OUT_RING(ring, indirect->draw_count);
121 OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
122 OUT_RING(ring, max_indices(info, index_offset));
123 OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
124 OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
125 OUT_RING(ring, indirect->stride);
126 } else if (DRAW == DRAW_INDIRECT_OP_INDEXED) {
127 OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 9);
128 OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
129 OUT_RING(ring,
130 (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDEXED)
131 | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
132 struct pipe_resource *idx = info->index.resource;
133 OUT_RING(ring, indirect->draw_count);
134 //index va
135 OUT_RELOC(ring, fd_resource(idx)->bo, index_offset, 0, 0);
136 //max indices
137 OUT_RING(ring, max_indices(info, index_offset));
138 OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
139 OUT_RING(ring, indirect->stride);
140 } else if(DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT) {
141 OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 8);
142 OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
143 OUT_RING(ring,
144 (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_INDIRECT_COUNT)
145 | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
146 struct fd_resource *count_buf = fd_resource(indirect->indirect_draw_count);
147 OUT_RING(ring, indirect->draw_count);
148 OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
149 OUT_RELOC(ring, count_buf->bo, indirect->indirect_draw_count_offset, 0, 0);
150 OUT_RING(ring, indirect->stride);
151 } else if (DRAW == DRAW_INDIRECT_OP_NORMAL) {
152 OUT_PKT7(ring, CP_DRAW_INDIRECT_MULTI, 6);
153 OUT_RING(ring, pack_CP_DRAW_INDX_OFFSET_0(*draw0).value);
154 OUT_RING(ring,
155 (A6XX_CP_DRAW_INDIRECT_MULTI_1_OPCODE(INDIRECT_OP_NORMAL)
156 | A6XX_CP_DRAW_INDIRECT_MULTI_1_DST_OFF(driver_param)));
157 OUT_RING(ring, indirect->draw_count);
158 OUT_RELOC(ring, ind->bo, indirect->offset, 0, 0);
159 OUT_RING(ring, indirect->stride);
160 }
161 }
162
163 template <draw_type DRAW>
164 static void
draw_emit(struct fd_ringbuffer * ring,struct CP_DRAW_INDX_OFFSET_0 * draw0,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,unsigned index_offset)165 draw_emit(struct fd_ringbuffer *ring, struct CP_DRAW_INDX_OFFSET_0 *draw0,
166 const struct pipe_draw_info *info,
167 const struct pipe_draw_start_count_bias *draw, unsigned index_offset)
168 {
169 if (DRAW == DRAW_DIRECT_OP_INDEXED) {
170 assert(!info->has_user_indices);
171
172 struct pipe_resource *idx_buffer = info->index.resource;
173
174 OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
175 CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
176 CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count),
177 CP_DRAW_INDX_OFFSET_3(.first_indx = draw->start),
178 A5XX_CP_DRAW_INDX_OFFSET_INDX_BASE(fd_resource(idx_buffer)->bo,
179 index_offset),
180 A5XX_CP_DRAW_INDX_OFFSET_6(.max_indices = max_indices(info, index_offset)));
181 } else if (DRAW == DRAW_DIRECT_OP_NORMAL) {
182 OUT_PKT(ring, CP_DRAW_INDX_OFFSET, pack_CP_DRAW_INDX_OFFSET_0(*draw0),
183 CP_DRAW_INDX_OFFSET_1(.num_instances = info->instance_count),
184 CP_DRAW_INDX_OFFSET_2(.num_indices = draw->count));
185 }
186 }
187
188 static void
fixup_draw_state(struct fd_context * ctx,struct fd6_emit * emit)189 fixup_draw_state(struct fd_context *ctx, struct fd6_emit *emit) assert_dt
190 {
191 if (ctx->last.dirty ||
192 (ctx->last.primitive_restart != emit->primitive_restart)) {
193 /* rasterizer state is effected by primitive-restart: */
194 fd_context_dirty(ctx, FD_DIRTY_RASTERIZER);
195 ctx->last.primitive_restart = emit->primitive_restart;
196 }
197 }
198
199 template <fd6_pipeline_type PIPELINE>
200 static const struct fd6_program_state *
get_program_state(struct fd_context * ctx,const struct pipe_draw_info * info)201 get_program_state(struct fd_context *ctx, const struct pipe_draw_info *info)
202 assert_dt
203 {
204 struct fd6_context *fd6_ctx = fd6_context(ctx);
205 struct ir3_cache_key key = {
206 .vs = (struct ir3_shader_state *)ctx->prog.vs,
207 .gs = (struct ir3_shader_state *)ctx->prog.gs,
208 .fs = (struct ir3_shader_state *)ctx->prog.fs,
209 .clip_plane_enable = ctx->rasterizer->clip_plane_enable,
210 .patch_vertices = HAS_TESS_GS ? ctx->patch_vertices : 0,
211 };
212
213 /* Some gcc versions get confused about designated order, so workaround
214 * by not initializing these inline:
215 */
216 key.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
217 key.key.sample_shading = (ctx->min_samples > 1);
218 key.key.msaa = (ctx->framebuffer.samples > 1);
219 key.key.rasterflat = ctx->rasterizer->flatshade;
220
221 if (unlikely(ctx->screen->driconf.dual_color_blend_by_location)) {
222 struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
223 key.key.force_dual_color_blend = blend->use_dual_src_blend;
224 }
225
226 if (PIPELINE == HAS_TESS_GS) {
227 if (info->mode == MESA_PRIM_PATCHES) {
228 struct shader_info *gs_info =
229 ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.gs);
230
231 key.hs = (struct ir3_shader_state *)ctx->prog.hs;
232 key.ds = (struct ir3_shader_state *)ctx->prog.ds;
233
234 struct shader_info *ds_info = ir3_get_shader_info(key.ds);
235 key.key.tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
236
237 struct shader_info *fs_info = ir3_get_shader_info(key.fs);
238 key.key.tcs_store_primid =
239 BITSET_TEST(ds_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID) ||
240 (gs_info && BITSET_TEST(gs_info->system_values_read, SYSTEM_VALUE_PRIMITIVE_ID)) ||
241 (fs_info && (fs_info->inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)));
242 }
243
244 if (key.gs) {
245 key.key.has_gs = true;
246 }
247 }
248
249 ir3_fixup_shader_state(&ctx->base, &key.key);
250
251 if (ctx->gen_dirty & BIT(FD6_GROUP_PROG)) {
252 struct ir3_program_state *s = ir3_cache_lookup(
253 ctx->shader_cache, &key, &ctx->debug);
254 fd6_ctx->prog = fd6_program_state(s);
255 }
256
257 return fd6_ctx->prog;
258 }
259
260 template <chip CHIP>
261 static void
flush_streamout(struct fd_context * ctx,struct fd6_emit * emit)262 flush_streamout(struct fd_context *ctx, struct fd6_emit *emit)
263 assert_dt
264 {
265 if (!emit->streamout_mask)
266 return;
267
268 struct fd_ringbuffer *ring = ctx->batch->draw;
269
270 for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
271 if (emit->streamout_mask & (1 << i)) {
272 enum fd_gpu_event evt = (enum fd_gpu_event)(FD_FLUSH_SO_0 + i);
273 fd6_event_write<CHIP>(ctx, ring, evt);
274 }
275 }
276 }
277
278 template <chip CHIP, fd6_pipeline_type PIPELINE, draw_type DRAW>
279 static void
draw_vbos(struct fd_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned index_offset)280 draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
281 unsigned drawid_offset,
282 const struct pipe_draw_indirect_info *indirect,
283 const struct pipe_draw_start_count_bias *draws,
284 unsigned num_draws,
285 unsigned index_offset)
286 assert_dt
287 {
288 struct fd6_context *fd6_ctx = fd6_context(ctx);
289 struct fd6_emit emit;
290
291 emit.ctx = ctx;
292 emit.info = info;
293 emit.indirect = indirect;
294 emit.draw = NULL;
295 emit.rasterflat = ctx->rasterizer->flatshade;
296 emit.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable;
297 emit.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode;
298 emit.primitive_restart = info->primitive_restart && is_indexed(DRAW);
299 emit.state.num_groups = 0;
300 emit.streamout_mask = 0;
301 emit.prog = NULL;
302 emit.draw_id = 0;
303
304 if (!(ctx->prog.vs && ctx->prog.fs))
305 return;
306
307 if (PIPELINE == HAS_TESS_GS) {
308 if ((info->mode == MESA_PRIM_PATCHES) || ctx->prog.gs) {
309 ctx->gen_dirty |= BIT(FD6_GROUP_PRIMITIVE_PARAMS);
310 }
311 }
312
313 if ((PIPELINE == NO_TESS_GS) && !is_indirect(DRAW)) {
314 fd6_vsc_update_sizes(ctx->batch, info, &draws[0]);
315 }
316
317 /* If PROG state (which will mark PROG_KEY dirty) or any state that the
318 * key depends on, is dirty, then we actually need to construct the shader
319 * key, figure out if we need a new variant, and lookup the PROG state.
320 * Otherwise we can just use the previous prog state.
321 */
322 if (unlikely(ctx->gen_dirty & BIT(FD6_GROUP_PROG_KEY))) {
323 emit.prog = get_program_state<PIPELINE>(ctx, info);
324 } else {
325 emit.prog = fd6_ctx->prog;
326 }
327
328 /* bail if compile failed: */
329 if (!emit.prog)
330 return;
331
332 fixup_draw_state(ctx, &emit);
333
334 /* *after* fixup_shader_state(): */
335 emit.dirty_groups = ctx->gen_dirty;
336
337 emit.vs = fd6_emit_get_prog(&emit)->vs;
338 if (PIPELINE == HAS_TESS_GS) {
339 emit.hs = fd6_emit_get_prog(&emit)->hs;
340 emit.ds = fd6_emit_get_prog(&emit)->ds;
341 emit.gs = fd6_emit_get_prog(&emit)->gs;
342 }
343 emit.fs = fd6_emit_get_prog(&emit)->fs;
344
345 if (emit.prog->num_driver_params || fd6_ctx->has_dp_state) {
346 emit.draw = &draws[0];
347 emit.dirty_groups |= BIT(FD6_GROUP_DRIVER_PARAMS);
348 }
349
350 /* If we are doing xfb, we need to emit the xfb state on every draw: */
351 if (emit.prog->stream_output)
352 emit.dirty_groups |= BIT(FD6_GROUP_SO);
353
354 if (unlikely(ctx->stats_users > 0)) {
355 ctx->stats.vs_regs += ir3_shader_halfregs(emit.vs);
356 if (PIPELINE == HAS_TESS_GS) {
357 ctx->stats.hs_regs += COND(emit.hs, ir3_shader_halfregs(emit.hs));
358 ctx->stats.ds_regs += COND(emit.ds, ir3_shader_halfregs(emit.ds));
359 ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
360 }
361 ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
362 }
363
364 struct fd_ringbuffer *ring = ctx->batch->draw;
365
366 struct CP_DRAW_INDX_OFFSET_0 draw0 = {
367 .prim_type = ctx->screen->primtypes[info->mode],
368 .vis_cull = USE_VISIBILITY,
369 .gs_enable = !!ctx->prog.gs,
370 };
371
372 if (DRAW == DRAW_INDIRECT_OP_XFB) {
373 draw0.source_select = DI_SRC_SEL_AUTO_XFB;
374 } else if (DRAW == DRAW_DIRECT_OP_INDEXED ||
375 DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED ||
376 DRAW == DRAW_INDIRECT_OP_INDEXED) {
377 draw0.source_select = DI_SRC_SEL_DMA;
378 draw0.index_size = fd4_size2indextype(info->index_size);
379 } else {
380 draw0.source_select = DI_SRC_SEL_AUTO_INDEX;
381 }
382
383 if ((PIPELINE == HAS_TESS_GS) && (info->mode == MESA_PRIM_PATCHES)) {
384 struct shader_info *ds_info =
385 ir3_get_shader_info((struct ir3_shader_state *)ctx->prog.ds);
386 unsigned tessellation = ir3_tess_mode(ds_info->tess._primitive_mode);
387
388 uint32_t factor_stride = ir3_tess_factor_stride(tessellation);
389
390 STATIC_ASSERT(IR3_TESS_ISOLINES == TESS_ISOLINES + 1);
391 STATIC_ASSERT(IR3_TESS_TRIANGLES == TESS_TRIANGLES + 1);
392 STATIC_ASSERT(IR3_TESS_QUADS == TESS_QUADS + 1);
393 draw0.patch_type = (enum a6xx_patch_type)(tessellation - 1);
394
395 draw0.prim_type = (enum pc_di_primtype)(DI_PT_PATCHES0 + ctx->patch_vertices);
396 draw0.tess_enable = true;
397
398 /* maximum number of patches that can fit in tess factor/param buffers */
399 uint32_t subdraw_size = MIN2(FD6_TESS_FACTOR_SIZE / factor_stride,
400 FD6_TESS_PARAM_SIZE / (emit.hs->output_size * 4));
401 /* convert from # of patches to draw count */
402 subdraw_size *= ctx->patch_vertices;
403
404 OUT_PKT7(ring, CP_SET_SUBDRAW_SIZE, 1);
405 OUT_RING(ring, subdraw_size);
406
407 ctx->batch->tessellation = true;
408 }
409
410 uint32_t index_start = is_indexed(DRAW) ? draws[0].index_bias : draws[0].start;
411 if (ctx->last.dirty || (ctx->last.index_start != index_start)) {
412 OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
413 OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
414 ctx->last.index_start = index_start;
415 }
416
417 if (ctx->last.dirty || (ctx->last.instance_start != info->start_instance)) {
418 OUT_PKT4(ring, REG_A6XX_VFD_INSTANCE_START_OFFSET, 1);
419 OUT_RING(ring, info->start_instance); /* VFD_INSTANCE_START_OFFSET */
420 ctx->last.instance_start = info->start_instance;
421 }
422
423 uint32_t restart_index =
424 info->primitive_restart ? info->restart_index : 0xffffffff;
425 if (ctx->last.dirty || (ctx->last.restart_index != restart_index)) {
426 OUT_PKT4(ring, REG_A6XX_PC_RESTART_INDEX, 1);
427 OUT_RING(ring, restart_index); /* PC_RESTART_INDEX */
428 ctx->last.restart_index = restart_index;
429 }
430
431 if (emit.dirty_groups)
432 fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
433
434 /* All known firmware versions do not wait for WFI's with CP_DRAW_AUTO.
435 * Plus, for the common case where the counter buffer is written by
436 * vkCmdEndTransformFeedback, we need to wait for the CP_WAIT_MEM_WRITES to
437 * complete which means we need a WAIT_FOR_ME anyway.
438 *
439 * Also, on some firmwares CP_DRAW_INDIRECT_MULTI waits for WFIs before
440 * reading the draw parameters but after reading the count, so commands
441 * that use indirect draw count need a WFM anyway.
442 */
443 if (DRAW == DRAW_INDIRECT_OP_XFB ||
444 DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED ||
445 DRAW == DRAW_INDIRECT_OP_INDIRECT_COUNT)
446 ctx->batch->barrier |= FD6_WAIT_FOR_ME;
447
448 if (ctx->batch->barrier)
449 fd6_barrier_flush<CHIP>(ctx->batch);
450
451 /* for debug after a lock up, write a unique counter value
452 * to scratch7 for each draw, to make it easier to match up
453 * register dumps to cmdstream. The combination of IB
454 * (scratch6) and DRAW is enough to "triangulate" the
455 * particular draw that caused lockup.
456 */
457 emit_marker6(ring, 7);
458
459 if (is_indirect(DRAW)) {
460 assert(num_draws == 1); /* only >1 for direct draws */
461 if (DRAW == DRAW_INDIRECT_OP_XFB) {
462 draw_emit_xfb(ring, &draw0, info, indirect);
463 } else {
464 const struct ir3_const_state *const_state = ir3_const_state(emit.vs);
465 uint32_t dst_offset_dp = const_state->offsets.driver_param;
466
467 /* If unused, pass 0 for DST_OFF: */
468 if (dst_offset_dp > emit.vs->constlen)
469 dst_offset_dp = 0;
470
471 draw_emit_indirect<DRAW>(ctx, ring, &draw0, info, indirect, index_offset, dst_offset_dp);
472 }
473 } else {
474 draw_emit<DRAW>(ring, &draw0, info, &draws[0], index_offset);
475
476 if (unlikely(num_draws > 1)) {
477
478 /*
479 * Most state won't need to be re-emitted, other than xfb and
480 * driver-params:
481 */
482 emit.dirty_groups = 0;
483
484 if (emit.prog->num_driver_params)
485 emit.dirty_groups |= BIT(FD6_GROUP_DRIVER_PARAMS);
486
487 if (emit.prog->stream_output)
488 emit.dirty_groups |= BIT(FD6_GROUP_SO);
489
490 uint32_t last_index_start = ctx->last.index_start;
491
492 for (unsigned i = 1; i < num_draws; i++) {
493 flush_streamout<CHIP>(ctx, &emit);
494
495 fd6_vsc_update_sizes(ctx->batch, info, &draws[i]);
496
497 uint32_t index_start = is_indexed(DRAW) ? draws[i].index_bias : draws[i].start;
498 if (last_index_start != index_start) {
499 OUT_PKT4(ring, REG_A6XX_VFD_INDEX_OFFSET, 1);
500 OUT_RING(ring, index_start); /* VFD_INDEX_OFFSET */
501 last_index_start = index_start;
502 }
503
504 if (emit.dirty_groups) {
505 emit.state.num_groups = 0;
506 emit.draw = &draws[i];
507 emit.draw_id = info->increment_draw_id ? i : 0;
508 fd6_emit_3d_state<CHIP, PIPELINE>(ring, &emit);
509 }
510
511 assert(!index_offset); /* handled by util_draw_multi() */
512
513 draw_emit<DRAW>(ring, &draw0, info, &draws[i], 0);
514 }
515
516 ctx->last.index_start = last_index_start;
517 }
518 }
519
520 emit_marker6(ring, 7);
521
522 flush_streamout<CHIP>(ctx, &emit);
523
524 fd_context_all_clean(ctx);
525 }
526
527 template <chip CHIP, fd6_pipeline_type PIPELINE>
528 static void
fd6_draw_vbos(struct fd_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned index_offset)529 fd6_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
530 unsigned drawid_offset,
531 const struct pipe_draw_indirect_info *indirect,
532 const struct pipe_draw_start_count_bias *draws,
533 unsigned num_draws,
534 unsigned index_offset)
535 assert_dt
536 {
537 /* Non-indirect case is where we are more likely to see a high draw rate: */
538 if (likely(!indirect)) {
539 if (info->index_size) {
540 draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_INDEXED>(
541 ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
542 } else {
543 draw_vbos<CHIP, PIPELINE, DRAW_DIRECT_OP_NORMAL>(
544 ctx, info, drawid_offset, NULL, draws, num_draws, index_offset);
545 }
546 } else if (indirect->count_from_stream_output) {
547 draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_XFB>(
548 ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
549 } else if (indirect->indirect_draw_count && info->index_size) {
550 draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT_INDEXED>(
551 ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
552 } else if (indirect->indirect_draw_count) {
553 draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDIRECT_COUNT>(
554 ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
555 } else if (info->index_size) {
556 draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_INDEXED>(
557 ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
558 } else {
559 draw_vbos<CHIP, PIPELINE, DRAW_INDIRECT_OP_NORMAL>(
560 ctx, info, drawid_offset, indirect, draws, num_draws, index_offset);
561 }
562 }
563
564 template <chip CHIP>
565 static void
fd6_update_draw(struct fd_context * ctx)566 fd6_update_draw(struct fd_context *ctx)
567 {
568 const uint32_t gs_tess_stages = BIT(MESA_SHADER_TESS_CTRL) |
569 BIT(MESA_SHADER_TESS_EVAL) | BIT(MESA_SHADER_GEOMETRY);
570
571 if (ctx->bound_shader_stages & gs_tess_stages) {
572 ctx->draw_vbos = fd6_draw_vbos<CHIP, HAS_TESS_GS>;
573 } else {
574 ctx->draw_vbos = fd6_draw_vbos<CHIP, NO_TESS_GS>;
575 }
576 }
577
578 static bool
do_lrz_clear(struct fd_context * ctx,enum fd_buffer_mask buffers)579 do_lrz_clear(struct fd_context *ctx, enum fd_buffer_mask buffers)
580 {
581 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
582
583 if (!pfb->zsbuf)
584 return false;
585
586 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
587
588 return (buffers & FD_BUFFER_DEPTH) && zsbuf->lrz;
589 }
590
591 static bool
fd6_clear(struct fd_context * ctx,enum fd_buffer_mask buffers,const union pipe_color_union * color,double depth,unsigned stencil)592 fd6_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
593 const union pipe_color_union *color, double depth,
594 unsigned stencil) assert_dt
595 {
596 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
597 struct fd_batch_subpass *subpass = ctx->batch->subpass;
598 unsigned color_buffers = buffers >> 2;
599
600 if (pfb->samples > 1) {
601 /* we need to do multisample clear on 3d pipe, so fallback to u_blitter.
602 * But we do this ourselves so that we can still benefit from LRZ, as
603 * normally zfunc==ALWAYS would invalidate LRZ. So we want to mark the
604 * LRZ state as valid *after* the fallback clear.
605 */
606 fd_blitter_clear(&ctx->base, (unsigned)buffers, color, depth, stencil);
607 }
608
609 /* If we are clearing after draws, split out a new subpass:
610 */
611 if (subpass->num_draws > 0) {
612 /* If we won't be able to do any fast-clears, avoid pointlessly
613 * splitting out a new subpass:
614 */
615 if (pfb->samples > 1 && !do_lrz_clear(ctx, buffers))
616 return true;
617
618 subpass = fd_batch_create_subpass(ctx->batch);
619
620 /* If doing an LRZ clear, replace the existing LRZ buffer with a
621 * freshly allocated one so that we have valid LRZ state for the
622 * new pass. Otherwise unconditional writes to the depth buffer
623 * would cause LRZ state to be invalid.
624 */
625 if (do_lrz_clear(ctx, buffers)) {
626 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
627
628 fd_bo_del(subpass->lrz);
629 subpass->lrz = fd_bo_new(ctx->screen->dev, fd_bo_size(zsbuf->lrz),
630 FD_BO_NOMAP, "lrz");
631 fd_bo_del(zsbuf->lrz);
632 zsbuf->lrz = fd_bo_ref(subpass->lrz);
633 }
634 }
635
636 if (do_lrz_clear(ctx, buffers)) {
637 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
638
639 zsbuf->lrz_valid = true;
640 zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
641 subpass->clear_depth = depth;
642 subpass->fast_cleared |= FD_BUFFER_LRZ;
643
644 STATIC_ASSERT((FD_BUFFER_LRZ & FD_BUFFER_ALL) == 0);
645 }
646
647 /* We've already done the fallback 3d clear: */
648 if (pfb->samples > 1)
649 return true;
650
651 u_foreach_bit (i, color_buffers)
652 subpass->clear_color[i] = *color;
653 if (buffers & FD_BUFFER_DEPTH)
654 subpass->clear_depth = depth;
655 if (buffers & FD_BUFFER_STENCIL)
656 subpass->clear_stencil = stencil;
657
658 subpass->fast_cleared |= buffers;
659
660 return true;
661 }
662
663 template <chip CHIP>
664 void
fd6_draw_init(struct pipe_context * pctx)665 fd6_draw_init(struct pipe_context *pctx)
666 disable_thread_safety_analysis
667 {
668 struct fd_context *ctx = fd_context(pctx);
669 ctx->clear = fd6_clear;
670 ctx->update_draw = fd6_update_draw<CHIP>;
671 fd6_update_draw<CHIP>(ctx);
672 }
673 FD_GENX(fd6_draw_init);
674