1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <[email protected]>
8 */
9
10 #define FD_BO_NO_HARDPIN 1
11
12 #include <stdio.h>
13
14 #include "pipe/p_state.h"
15 #include "util/format/u_format.h"
16 #include "util/u_inlines.h"
17 #include "util/u_memory.h"
18 #include "util/u_string.h"
19
20 #include "freedreno_draw.h"
21 #include "freedreno_resource.h"
22 #include "freedreno_state.h"
23 #include "freedreno_tracepoints.h"
24
25 #include "fd6_barrier.h"
26 #include "fd6_blitter.h"
27 #include "fd6_context.h"
28 #include "fd6_draw.h"
29 #include "fd6_emit.h"
30 #include "fd6_gmem.h"
31 #include "fd6_pack.h"
32 #include "fd6_program.h"
33 #include "fd6_resource.h"
34 #include "fd6_zsa.h"
35
36 /**
37 * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
38 * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
39 */
40 void
fd6_emit_flag_reference(struct fd_ringbuffer * ring,struct fd_resource * rsc,int level,int layer)41 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
42 int level, int layer)
43 {
44 if (fd_resource_ubwc_enabled(rsc, level)) {
45 OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
46 0);
47 OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
48 fdl_ubwc_pitch(&rsc->layout, level)) |
49 A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
50 rsc->layout.ubwc_layer_size >> 2));
51 } else {
52 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
53 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
54 OUT_RING(ring, 0x00000000);
55 }
56 }
57
58 template <chip CHIP>
59 static void
emit_mrt(struct fd_ringbuffer * ring,struct pipe_framebuffer_state * pfb,const struct fd_gmem_stateobj * gmem)60 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
61 const struct fd_gmem_stateobj *gmem)
62 {
63 unsigned srgb_cntl = 0;
64 unsigned i;
65
66 /* Note, GLES 3.2 says "If the fragment’s layer number is negative, or
67 * greater than or equal to the minimum number of layers of any attachment,
68 * the effects of the fragment on the framebuffer contents are undefined."
69 */
70 unsigned max_layer_index = 0;
71 enum a6xx_format mrt0_format = FMT6_NONE;
72
73 for (i = 0; i < pfb->nr_cbufs; i++) {
74 enum a3xx_color_swap swap = WZYX;
75 bool sint = false, uint = false;
76 struct fd_resource *rsc = NULL;
77 ASSERTED struct fdl_slice *slice = NULL;
78 uint32_t stride = 0;
79 uint32_t array_stride = 0;
80 uint32_t offset;
81
82 if (!pfb->cbufs[i])
83 continue;
84
85 struct pipe_surface *psurf = pfb->cbufs[i];
86 enum pipe_format pformat = psurf->format;
87 rsc = fd_resource(psurf->texture);
88
89 uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
90 slice = fd_resource_slice(rsc, psurf->u.tex.level);
91 enum a6xx_tile_mode tile_mode = (enum a6xx_tile_mode)
92 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
93 enum a6xx_format format = fd6_color_format(pformat, tile_mode);
94 sint = util_format_is_pure_sint(pformat);
95 uint = util_format_is_pure_uint(pformat);
96
97 if (util_format_is_srgb(pformat))
98 srgb_cntl |= (1 << i);
99
100 offset =
101 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
102
103 stride = fd_resource_pitch(rsc, psurf->u.tex.level);
104 array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
105 swap = fd6_color_swap(pformat, (enum a6xx_tile_mode)rsc->layout.tile_mode);
106
107 max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
108
109 assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
110
111 /* Batch with no draws? */
112 fd_ringbuffer_attach_bo(ring, rsc->bo);
113
114 OUT_REG(ring,
115 RB_MRT_BUF_INFO(CHIP, i,
116 .color_format = format,
117 .color_tile_mode = tile_mode,
118 .color_swap = swap,
119 .losslesscompen = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level),
120 ),
121 A6XX_RB_MRT_PITCH(i, stride),
122 A6XX_RB_MRT_ARRAY_PITCH(i, array_stride),
123 A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
124 A6XX_RB_MRT_BASE_GMEM(i, base));
125
126 OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
127 .color_sint = sint, .color_uint = uint));
128
129 OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
130 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
131 psurf->u.tex.first_layer);
132
133 if (i == 0)
134 mrt0_format = format;
135 }
136 if (pfb->zsbuf)
137 max_layer_index = pfb->zsbuf->u.tex.last_layer - pfb->zsbuf->u.tex.first_layer;
138
139 OUT_REG(ring, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = mrt0_format));
140
141 OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
142 OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
143
144 OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
145 }
146
147 template <chip CHIP>
148 static void
emit_zs(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)149 emit_zs(struct fd_context *ctx, struct fd_ringbuffer *ring,
150 struct pipe_surface *zsbuf, const struct fd_gmem_stateobj *gmem)
151 {
152 if (zsbuf) {
153 struct fd_resource *rsc = fd_resource(zsbuf->texture);
154 struct fd_resource *stencil = rsc->stencil;
155 uint32_t stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
156 uint32_t array_stride = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
157 uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
158 uint32_t offset =
159 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
160
161 /* We could have a depth buffer, but no draws with depth write/test
162 * enabled, in which case it wouldn't have been part of the batch
163 * resource tracking
164 */
165 fd_ringbuffer_attach_bo(ring, rsc->bo);
166
167 if (zsbuf->format == PIPE_FORMAT_S8_UINT) {
168 /* S8 is implemented as Z32_S8 minus the Z32 plane: */
169 enum a6xx_depth_format fmt = DEPTH6_32;
170
171 OUT_REG(ring,
172 RB_DEPTH_BUFFER_INFO(CHIP,
173 .depth_format = fmt,
174 .tilemode = TILE6_3,
175 .losslesscompen = fd_resource_ubwc_enabled(rsc, zsbuf->u.tex.level),
176 ),
177 A6XX_RB_DEPTH_BUFFER_PITCH(0),
178 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(0),
179 A6XX_RB_DEPTH_BUFFER_BASE(.qword = 0),
180 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(base));
181
182 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
183
184 stencil = rsc;
185 } else {
186 enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
187
188 OUT_REG(ring,
189 RB_DEPTH_BUFFER_INFO(CHIP,
190 .depth_format = fmt,
191 .tilemode = TILE6_3,
192 .losslesscompen = fd_resource_ubwc_enabled(rsc, zsbuf->u.tex.level),
193 ),
194 A6XX_RB_DEPTH_BUFFER_PITCH(stride),
195 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(array_stride),
196 A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
197 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(base));
198
199 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
200
201 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
202 fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
203 zsbuf->u.tex.first_layer);
204 }
205
206 if (stencil) {
207 stride = fd_resource_pitch(stencil, zsbuf->u.tex.level);
208 array_stride = fd_resource_layer_stride(stencil, zsbuf->u.tex.level);
209 uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
210 uint32_t offset =
211 fd_resource_offset(stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
212
213 fd_ringbuffer_attach_bo(ring, stencil->bo);
214
215 OUT_REG(ring,
216 RB_STENCIL_INFO(
217 CHIP,
218 .separate_stencil = true,
219 .tilemode = TILE6_3,
220 ),
221 A6XX_RB_STENCIL_BUFFER_PITCH(stride),
222 A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(array_stride),
223 A6XX_RB_STENCIL_BUFFER_BASE(.bo = stencil->bo, .bo_offset = offset),
224 A6XX_RB_STENCIL_BUFFER_BASE_GMEM(base)
225 );
226 } else {
227 OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
228 }
229 } else {
230 OUT_REG(ring,
231 RB_DEPTH_BUFFER_INFO(
232 CHIP,
233 .depth_format = DEPTH6_NONE,
234 ),
235 A6XX_RB_DEPTH_BUFFER_PITCH(),
236 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(),
237 A6XX_RB_DEPTH_BUFFER_BASE(),
238 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(),
239 );
240
241 OUT_REG(ring,
242 A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
243
244 OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
245 }
246 }
247
248 template <chip CHIP>
249 static void
emit_lrz(struct fd_batch * batch,struct fd_batch_subpass * subpass)250 emit_lrz(struct fd_batch *batch, struct fd_batch_subpass *subpass)
251 {
252 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
253 struct fd_ringbuffer *ring = batch->gmem;
254
255 if (!subpass->lrz) {
256 OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(),
257 A6XX_GRAS_LRZ_BUFFER_PITCH(),
258 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
259 if (CHIP >= A7XX)
260 OUT_REG(ring, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO());
261 return;
262 }
263
264 /* When swapping LRZ buffers we need to flush LRZ cache..
265 * we possibly don't need this during the binning pass, it
266 * appears that the corruption happens on the read-side, ie.
267 * we change the LRZ buffer after a sub-pass, but get a
268 * cache-hit on stale data from the previous LRZ buffer.
269 */
270 fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
271
272 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
273 OUT_REG(ring,
274 A6XX_GRAS_LRZ_BUFFER_BASE(.bo = subpass->lrz),
275 A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_pitch),
276 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(
277 .bo = zsbuf->lrz_fc_offset ? subpass->lrz : NULL,
278 .bo_offset = zsbuf->lrz_fc_offset
279 ),
280 );
281 fd_ringbuffer_attach_bo(ring, subpass->lrz);
282
283 if (CHIP >= A7XX) {
284 OUT_REG(ring,
285 A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(
286 .depth_format = fd6_pipe2depth(pfb->zsbuf->format),
287 )
288 );
289 }
290 }
291
292 /* Emit any needed lrz clears to the prologue cmds
293 */
294 template <chip CHIP>
295 static void
emit_lrz_clears(struct fd_batch * batch)296 emit_lrz_clears(struct fd_batch *batch)
297 {
298 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
299 struct fd_context *ctx = batch->ctx;
300 unsigned count = 0;
301
302 if (!pfb->zsbuf)
303 return;
304
305 struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
306
307 foreach_subpass (subpass, batch) {
308 /* The lrz buffer isn't explicitly tracked by the batch resource
309 * tracking (tracking the zsbuf is sufficient), but it still needs
310 * to be attached to the ring
311 */
312 if (subpass->lrz)
313 fd_ringbuffer_attach_bo(batch->gmem, subpass->lrz);
314
315 if (!(subpass->fast_cleared & FD_BUFFER_LRZ))
316 continue;
317
318 subpass->fast_cleared &= ~FD_BUFFER_LRZ;
319
320 /* prep before first clear: */
321 if (count == 0) {
322 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
323
324 fd6_emit_ccu_cntl<CHIP>(ring, ctx->screen, false);
325
326 OUT_PKT7(ring, CP_SET_MARKER, 1);
327 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
328
329 fd6_emit_flushes<CHIP>(ctx, ring, FD6_FLUSH_CACHE);
330
331 if (ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit !=
332 ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
333 /* This a non-context register, so we have to WFI before changing. */
334 OUT_WFI5(ring);
335 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
336 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
337 }
338 }
339
340 fd6_clear_lrz<CHIP>(batch, zsbuf, subpass->lrz, subpass->clear_depth);
341
342 count++;
343 }
344
345 /* cleanup after last clear: */
346 if (count > 0) {
347 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
348
349 if (ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit !=
350 ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
351 OUT_WFI5(ring);
352 OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
353 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL);
354 }
355
356 /* Clearing writes via CCU color in the PS stage, and LRZ is read via
357 * UCHE in the earlier GRAS stage.
358 *
359 * Note tu also asks for WFI but maybe that is only needed if
360 * has_ccu_flush_bug (and it is added by fd6_emit_flushes() already
361 * in that case)
362 */
363 fd6_emit_flushes<CHIP>(batch->ctx, ring,
364 FD6_FLUSH_CCU_COLOR |
365 FD6_INVALIDATE_CACHE);
366 }
367 }
368
369 static bool
use_hw_binning(struct fd_batch * batch)370 use_hw_binning(struct fd_batch *batch)
371 {
372 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
373
374 if ((gmem->maxpw * gmem->maxph) > 32)
375 return false;
376
377 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
378 (batch->num_draws > 0);
379 }
380
381 static void
patch_fb_read_gmem(struct fd_batch * batch)382 patch_fb_read_gmem(struct fd_batch *batch)
383 {
384 struct fd_screen *screen = batch->ctx->screen;
385 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
386 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
387
388 unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
389 if (!num_patches)
390 return;
391
392 for (unsigned i = 0; i < num_patches; i++) {
393 struct fd_cs_patch *patch =
394 fd_patch_element(&batch->fb_read_patches, i);
395 int buf = patch->val;
396 struct pipe_surface *psurf = pfb->cbufs[buf];
397 struct pipe_resource *prsc = psurf->texture;
398 struct fd_resource *rsc = fd_resource(prsc);
399 enum pipe_format format = psurf->format;
400
401 uint8_t swiz[4];
402 fdl6_format_swiz(psurf->format, false, swiz);
403
404 uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
405 /* always TILE6_2 mode in GMEM, which also means no swap: */
406 uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
407 A6XX_TEX_CONST_0_FMT(fd6_texture_format(
408 format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
409 A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
410 A6XX_TEX_CONST_0_SWAP(WZYX) |
411 A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
412 COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
413 A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
414 A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
415 A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
416 A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
417
418 A6XX_TEX_CONST_1_WIDTH(pfb->width) |
419 A6XX_TEX_CONST_1_HEIGHT(pfb->height),
420
421 A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) |
422 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
423
424 A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size),
425 A6XX_TEX_CONST_4_BASE_LO(base),
426
427 A6XX_TEX_CONST_5_BASE_HI(base >> 32) |
428 A6XX_TEX_CONST_5_DEPTH(prsc->array_size)
429 };
430
431 memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
432 }
433
434 util_dynarray_clear(&batch->fb_read_patches);
435 }
436
437 template <chip CHIP>
438 static void
patch_fb_read_sysmem(struct fd_batch * batch)439 patch_fb_read_sysmem(struct fd_batch *batch)
440 {
441 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
442
443 unsigned num_patches =
444 fd_patch_num_elements(&batch->fb_read_patches);
445 if (!num_patches)
446 return;
447 for (unsigned i = 0; i < num_patches; i++) {
448 struct fd_cs_patch *patch =
449 fd_patch_element(&batch->fb_read_patches, i);
450 int buf = patch->val;
451
452 struct pipe_surface *psurf = pfb->cbufs[buf];
453 if (!psurf)
454 return;
455
456 struct pipe_resource *prsc = psurf->texture;
457 struct fd_resource *rsc = fd_resource(prsc);
458
459 uint32_t block_width, block_height;
460 fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
461
462 struct fdl_view_args args = {
463 .chip = CHIP,
464
465 .iova = fd_bo_get_iova(rsc->bo),
466
467 .base_miplevel = psurf->u.tex.level,
468 .level_count = 1,
469
470 .base_array_layer = psurf->u.tex.first_layer,
471 .layer_count = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
472
473 .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
474 PIPE_SWIZZLE_W},
475 .format = psurf->format,
476
477 .type = FDL_VIEW_TYPE_2D,
478 .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
479 FDL_CHROMA_LOCATION_COSITED_EVEN},
480 };
481 const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
482 struct fdl6_view view;
483 fdl6_view_init(&view, layouts, &args,
484 batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
485 memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4);
486 }
487
488 util_dynarray_clear(&batch->fb_read_patches);
489 }
490
491 template <chip CHIP>
492 static void
update_render_cntl(struct fd_batch * batch,struct pipe_framebuffer_state * pfb,bool binning)493 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
494 bool binning)
495 {
496 struct fd_ringbuffer *ring = batch->gmem;
497
498 if (CHIP >= A7XX) {
499 OUT_REG(ring,
500 RB_RENDER_CNTL(
501 CHIP,
502 .binning = binning,
503 .raster_mode = TYPE_TILED,
504 .raster_direction = LR_TB
505 )
506 );
507 OUT_REG(ring,
508 A7XX_GRAS_SU_RENDER_CNTL(
509 .binning = binning,
510 )
511 );
512 return;
513 }
514
515 struct fd_screen *screen = batch->ctx->screen;
516 bool depth_ubwc_enable = false;
517 uint32_t mrts_ubwc_enable = 0;
518 int i;
519
520 if (pfb->zsbuf) {
521 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
522 depth_ubwc_enable =
523 fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
524 }
525
526 for (i = 0; i < pfb->nr_cbufs; i++) {
527 if (!pfb->cbufs[i])
528 continue;
529
530 struct pipe_surface *psurf = pfb->cbufs[i];
531 struct fd_resource *rsc = fd_resource(psurf->texture);
532
533 if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
534 mrts_ubwc_enable |= 1 << i;
535 }
536
537 struct fd_reg_pair rb_render_cntl = RB_RENDER_CNTL(
538 CHIP,
539 .ccusinglecachelinesize = 2,
540 .binning = binning,
541 .flag_depth = depth_ubwc_enable,
542 .flag_mrts = mrts_ubwc_enable,
543 );
544
545 if (screen->info->a6xx.has_cp_reg_write) {
546 OUT_PKT(ring, CP_REG_WRITE,
547 CP_REG_WRITE_0(TRACK_RENDER_CNTL),
548 CP_REG_WRITE_1(rb_render_cntl.reg),
549 CP_REG_WRITE_2(rb_render_cntl.value),
550 );
551 } else {
552 OUT_REG(ring, rb_render_cntl);
553 }
554 }
555
556 static void
update_vsc_pipe(struct fd_batch * batch)557 update_vsc_pipe(struct fd_batch *batch)
558 {
559 struct fd_context *ctx = batch->ctx;
560 struct fd6_context *fd6_ctx = fd6_context(ctx);
561 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
562 struct fd_ringbuffer *ring = batch->gmem;
563 unsigned max_vsc_pipes = batch->ctx->screen->info->num_vsc_pipes;
564 int i;
565
566 if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
567 if (fd6_ctx->vsc_draw_strm)
568 fd_bo_del(fd6_ctx->vsc_draw_strm);
569 fd6_ctx->vsc_draw_strm = NULL;
570 /* Note: probably only need to align to 0x40, but aligning stronger
571 * reduces the odds that we will have to realloc again on the next
572 * frame:
573 */
574 fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
575 mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
576 fd6_ctx->vsc_draw_strm_pitch);
577 }
578
579 if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
580 if (fd6_ctx->vsc_prim_strm)
581 fd_bo_del(fd6_ctx->vsc_prim_strm);
582 fd6_ctx->vsc_prim_strm = NULL;
583 fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
584 mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
585 fd6_ctx->vsc_prim_strm_pitch);
586 }
587
588 if (!fd6_ctx->vsc_draw_strm) {
589 /* We also use four bytes per vsc pipe at the end of the draw
590 * stream buffer for VSC_DRAW_STRM_SIZE written back by hw
591 * (see VSC_DRAW_STRM_SIZE_ADDRESS)
592 */
593 unsigned sz = (max_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch) +
594 (max_vsc_pipes * 4);
595 fd6_ctx->vsc_draw_strm =
596 fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_draw_strm");
597 }
598
599 if (!fd6_ctx->vsc_prim_strm) {
600 unsigned sz = max_vsc_pipes * fd6_ctx->vsc_prim_strm_pitch;
601 fd6_ctx->vsc_prim_strm =
602 fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_prim_strm");
603 }
604
605 fd_ringbuffer_attach_bo(ring, fd6_ctx->vsc_draw_strm);
606 fd_ringbuffer_attach_bo(ring, fd6_ctx->vsc_prim_strm);
607
608 OUT_REG(ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
609 A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
610 .bo_offset = max_vsc_pipes *
611 fd6_ctx->vsc_draw_strm_pitch));
612
613 OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
614
615 OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), max_vsc_pipes);
616 for (i = 0; i < max_vsc_pipes; i++) {
617 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
618 OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
619 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
620 A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
621 A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
622 }
623
624 OUT_REG(
625 ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
626 A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
627 A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
628
629 OUT_REG(
630 ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
631 A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
632 A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
633 }
634
635 /*
636 * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
637 * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
638 * written to control->vsc_overflow. This allows the CPU to
639 * detect which buffer overflowed (and, since the current size is
640 * encoded as well, this protects against already-submitted but
641 * not executed batches from fooling the CPU into increasing the
642 * size again unnecessarily).
643 */
644 static void
emit_vsc_overflow_test(struct fd_batch * batch)645 emit_vsc_overflow_test(struct fd_batch *batch)
646 {
647 struct fd_ringbuffer *ring = batch->gmem;
648 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
649 struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
650
651 assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
652 assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
653
654 /* Check for overflow, write vsc_scratch if detected: */
655 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
656 OUT_PKT7(ring, CP_COND_WRITE5, 8);
657 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
658 CP_COND_WRITE5_0_WRITE_MEMORY);
659 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
660 REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
661 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
662 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
663 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
664 OUT_RELOC(ring,
665 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
666 OUT_RING(ring,
667 CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
668
669 OUT_PKT7(ring, CP_COND_WRITE5, 8);
670 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
671 CP_COND_WRITE5_0_WRITE_MEMORY);
672 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
673 REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
674 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
675 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
676 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
677 OUT_RELOC(ring,
678 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
679 OUT_RING(ring,
680 CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
681 }
682
683 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
684 }
685
686 static void
check_vsc_overflow(struct fd_context * ctx)687 check_vsc_overflow(struct fd_context *ctx)
688 {
689 struct fd6_context *fd6_ctx = fd6_context(ctx);
690 struct fd6_control *control =
691 (struct fd6_control *)fd_bo_map(fd6_ctx->control_mem);
692 uint32_t vsc_overflow = control->vsc_overflow;
693
694 if (!vsc_overflow)
695 return;
696
697 /* clear overflow flag: */
698 control->vsc_overflow = 0;
699
700 unsigned buffer = vsc_overflow & 0x3;
701 unsigned size = vsc_overflow & ~0x3;
702
703 if (buffer == 0x1) {
704 /* VSC_DRAW_STRM overflow: */
705
706 if (size < fd6_ctx->vsc_draw_strm_pitch) {
707 /* we've already increased the size, this overflow is
708 * from a batch submitted before resize, but executed
709 * after
710 */
711 return;
712 }
713
714 fd_bo_del(fd6_ctx->vsc_draw_strm);
715 fd6_ctx->vsc_draw_strm = NULL;
716 fd6_ctx->vsc_draw_strm_pitch *= 2;
717
718 mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
719 fd6_ctx->vsc_draw_strm_pitch);
720
721 } else if (buffer == 0x3) {
722 /* VSC_PRIM_STRM overflow: */
723
724 if (size < fd6_ctx->vsc_prim_strm_pitch) {
725 /* we've already increased the size */
726 return;
727 }
728
729 fd_bo_del(fd6_ctx->vsc_prim_strm);
730 fd6_ctx->vsc_prim_strm = NULL;
731 fd6_ctx->vsc_prim_strm_pitch *= 2;
732
733 mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
734 fd6_ctx->vsc_prim_strm_pitch);
735
736 } else {
737 /* NOTE: it's possible, for example, for overflow to corrupt the
738 * control page. I mostly just see this hit if I set initial VSC
739 * buffer size extremely small. Things still seem to recover,
740 * but maybe we should pre-emptively realloc vsc_data/vsc_data2
741 * and hope for different memory placement?
742 */
743 mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
744 }
745 }
746
747 template <chip CHIP>
748 static void
emit_common_init(struct fd_batch * batch)749 emit_common_init(struct fd_batch *batch)
750 {
751 struct fd_context *ctx = batch->ctx;
752 struct fd_ringbuffer *ring = batch->gmem;
753 struct fd_autotune *at = &batch->ctx->autotune;
754 struct fd_batch_result *result = batch->autotune_result;
755
756 if (!result)
757 return;
758
759 fd_ringbuffer_attach_bo(ring, at->results_mem);
760
761 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
762 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
763
764 if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
765 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
766 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
767
768 fd6_event_write<CHIP>(ctx, ring, FD_ZPASS_DONE);
769
770 /* Copied from blob's cmdstream, not sure why it is done. */
771 if (CHIP == A7XX) {
772 fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
773 }
774 } else {
775 OUT_PKT(ring, CP_EVENT_WRITE7,
776 CP_EVENT_WRITE7_0(
777 .event = ZPASS_DONE,
778 .write_sample_count = true,
779 ),
780 EV_DST_RAM_CP_EVENT_WRITE7_1(
781 results_ptr(at, result[result->idx].samples_start)
782 ),
783 );
784 }
785 }
786
787 template <chip CHIP>
788 static void
emit_common_fini(struct fd_batch * batch)789 emit_common_fini(struct fd_batch *batch)
790 {
791 struct fd_context *ctx = batch->ctx;
792 struct fd_ringbuffer *ring = batch->gmem;
793 struct fd_autotune *at = &batch->ctx->autotune;
794 struct fd_batch_result *result = batch->autotune_result;
795
796 fd6_emit_flushes<CHIP>(batch->ctx, ring, batch->barrier);
797
798 if (!result)
799 return;
800
801 fd_ringbuffer_attach_bo(ring, at->results_mem);
802
803 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
804 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
805
806 if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
807 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
808 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
809
810 fd6_event_write<CHIP>(batch->ctx, ring, FD_ZPASS_DONE);
811 } else {
812 OUT_PKT(ring, CP_EVENT_WRITE7,
813 CP_EVENT_WRITE7_0(
814 .event = ZPASS_DONE,
815 .write_sample_count = true,
816 .sample_count_end_offset = true,
817 .write_accum_sample_count_diff = true,
818 ),
819 EV_DST_RAM_CP_EVENT_WRITE7_1(
820 results_ptr(at, result[result->idx].samples_start)
821 ),
822 );
823 }
824
825 fd6_fence_write<CHIP>(ring, result->fence, results_ptr(at, fence));
826 }
827
828 /*
829 * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
830 * is skipped for tiles that have no visible geometry.
831 *
832 * If we aren't using binning pass, this just emits a normal IB.
833 */
834 static void
emit_conditional_ib(struct fd_batch * batch,const struct fd_tile * tile,struct fd_ringbuffer * target)835 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
836 struct fd_ringbuffer *target)
837 {
838 struct fd_ringbuffer *ring = batch->gmem;
839
840 /* If we have fast clear, that won't count in the VSC state, so it
841 * forces an unconditional IB (because we know there is something
842 * to do for this tile)
843 */
844 if (batch->cleared || !use_hw_binning(batch)) {
845 fd6_emit_ib(batch->gmem, target);
846 return;
847 }
848
849 if (target->cur == target->start)
850 return;
851
852 emit_marker6(ring, 6);
853
854 unsigned count = fd_ringbuffer_cmd_count(target);
855
856 BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
857
858 OUT_PKT7(ring, CP_REG_TEST, 1);
859 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
860 A6XX_CP_REG_TEST_0_BIT(tile->n) |
861 A6XX_CP_REG_TEST_0_SKIP_WAIT_FOR_ME);
862
863 OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
864 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
865 OUT_RING(ring, PRED_TEST_CP_COND_REG_EXEC_1_DWORDS(4 * count));
866
867 for (unsigned i = 0; i < count; i++) {
868 uint32_t dwords;
869 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
870 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
871 assert(dwords > 0);
872 OUT_RING(ring, dwords);
873 }
874
875 emit_marker6(ring, 6);
876 }
877
878 static void
set_scissor(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1,uint32_t x2,uint32_t y2)879 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
880 uint32_t y2)
881 {
882 OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
883 A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
884
885 OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
886 A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
887 }
888
889 struct bin_size_params {
890 enum a6xx_render_mode render_mode;
891 bool force_lrz_write_dis;
892 enum a6xx_buffers_location buffers_location;
893 enum a6xx_lrz_feedback_mask lrz_feedback_zmode_mask;
894 };
895
896 template <chip CHIP>
897 static void
set_bin_size(struct fd_ringbuffer * ring,const struct fd_gmem_stateobj * gmem,struct bin_size_params p)898 set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem,
899 struct bin_size_params p)
900 {
901 unsigned w = gmem ? gmem->bin_w : 0;
902 unsigned h = gmem ? gmem->bin_h : 0;
903
904 if (CHIP == A6XX) {
905 OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(
906 .binw = w, .binh = h,
907 .render_mode = p.render_mode,
908 .force_lrz_write_dis = p.force_lrz_write_dis,
909 .buffers_location = p.buffers_location,
910 .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
911 ));
912 } else {
913 OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(
914 .binw = w, .binh = h,
915 .render_mode = p.render_mode,
916 .force_lrz_write_dis = p.force_lrz_write_dis,
917 .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
918 ));
919 }
920 OUT_REG(ring, RB_BIN_CONTROL(
921 CHIP,
922 .binw = w, .binh = h,
923 .render_mode = p.render_mode,
924 .force_lrz_write_dis = p.force_lrz_write_dis,
925 .buffers_location = p.buffers_location,
926 .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
927 ));
928 /* no flag for RB_BIN_CONTROL2... */
929 OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
930 }
931
932 template <chip CHIP>
933 static void
emit_binning_pass(struct fd_batch * batch)934 emit_binning_pass(struct fd_batch *batch) assert_dt
935 {
936 struct fd_ringbuffer *ring = batch->gmem;
937 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
938 struct fd_screen *screen = batch->ctx->screen;
939
940 assert(!batch->tessellation);
941
942 set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
943
944 emit_marker6(ring, 7);
945 OUT_PKT7(ring, CP_SET_MARKER, 1);
946 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
947 emit_marker6(ring, 7);
948
949 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
950 OUT_RING(ring, 0x1);
951
952 OUT_PKT7(ring, CP_SET_MODE, 1);
953 OUT_RING(ring, 0x1);
954
955 OUT_WFI5(ring);
956
957 OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
958
959 update_vsc_pipe(batch);
960
961 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
962 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
963
964 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
965 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
966
967 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
968 OUT_RING(ring, UNK_2C);
969
970 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
971 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
972
973 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
974 OUT_RING(ring,
975 A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
976
977 /* emit IB to binning drawcmds: */
978 trace_start_binning_ib(&batch->trace, ring);
979 foreach_subpass (subpass, batch) {
980 emit_lrz<CHIP>(batch, subpass);
981 fd6_emit_ib(ring, subpass->draw);
982 }
983 trace_end_binning_ib(&batch->trace, ring);
984
985 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
986 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
987 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
988 CP_SET_DRAW_STATE__0_GROUP_ID(0));
989 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
990 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
991
992 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
993 OUT_RING(ring, UNK_2D);
994
995 /* This flush is probably required because the VSC, which produces the
996 * visibility stream, is a client of UCHE, whereas the CP needs to read
997 * the visibility stream (without caching) to do draw skipping. The
998 * WFI+WAIT_FOR_ME combination guarantees that the binning commands
999 * submitted are finished before reading the VSC regs (in
1000 * emit_vsc_overflow_test) or the VSC_DATA buffer directly (implicitly
1001 * as part of draws).
1002 */
1003 fd6_emit_flushes<CHIP>(batch->ctx, ring,
1004 FD6_FLUSH_CACHE |
1005 FD6_WAIT_FOR_IDLE |
1006 FD6_WAIT_FOR_ME);
1007
1008 trace_start_vsc_overflow_test(&batch->trace, batch->gmem);
1009 emit_vsc_overflow_test(batch);
1010 trace_end_vsc_overflow_test(&batch->trace, batch->gmem);
1011
1012 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1013 OUT_RING(ring, 0x0);
1014
1015 OUT_PKT7(ring, CP_SET_MODE, 1);
1016 OUT_RING(ring, 0x0);
1017
1018 OUT_WFI5(ring);
1019
1020 fd6_emit_ccu_cntl<CHIP>(ring, screen, true);
1021 }
1022
1023 static void
emit_msaa(struct fd_ringbuffer * ring,unsigned nr)1024 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
1025 {
1026 enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
1027
1028 OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
1029 OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
1030 OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
1031 COND(samples == MSAA_ONE,
1032 A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
1033
1034 OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
1035 OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
1036 OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
1037 COND(samples == MSAA_ONE,
1038 A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
1039
1040 OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
1041 OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
1042 OUT_RING(ring,
1043 A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
1044 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
1045
1046 OUT_PKT4(ring, REG_A6XX_RB_BLIT_GMEM_MSAA_CNTL, 1);
1047 OUT_RING(ring, A6XX_RB_BLIT_GMEM_MSAA_CNTL_SAMPLES(samples));
1048 }
1049
1050 template <chip CHIP>
1051 static void prepare_tile_setup(struct fd_batch *batch);
1052 template <chip CHIP>
1053 static void prepare_tile_fini(struct fd_batch *batch);
1054
1055 /* before first tile */
1056 template <chip CHIP>
1057 static void
fd6_emit_tile_init(struct fd_batch * batch)1058 fd6_emit_tile_init(struct fd_batch *batch) assert_dt
1059 {
1060 struct fd_ringbuffer *ring = batch->gmem;
1061 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1062 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1063 struct fd_screen *screen = batch->ctx->screen;
1064
1065 emit_lrz_clears<CHIP>(batch);
1066
1067 fd6_emit_restore<CHIP>(batch, ring);
1068
1069 fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1070
1071 if (batch->prologue) {
1072 trace_start_prologue(&batch->trace, ring);
1073 fd6_emit_ib(ring, batch->prologue);
1074 trace_end_prologue(&batch->trace, ring);
1075 }
1076
1077 fd6_cache_inv<CHIP>(batch->ctx, ring);
1078
1079 prepare_tile_setup<CHIP>(batch);
1080 prepare_tile_fini<CHIP>(batch);
1081
1082 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1083 OUT_RING(ring, 0x0);
1084
1085 /* blob controls "local" in IB2, but I think that is not required */
1086 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1087 OUT_RING(ring, 0x1);
1088
1089 OUT_WFI5(ring);
1090 fd6_emit_ccu_cntl<CHIP>(ring, screen, true);
1091
1092 emit_zs<CHIP>(batch->ctx, ring, pfb->zsbuf, batch->gmem_state);
1093 emit_mrt<CHIP>(ring, pfb, batch->gmem_state);
1094 emit_msaa(ring, pfb->samples);
1095 patch_fb_read_gmem(batch);
1096
1097 if (CHIP >= A7XX) {
1098 OUT_REG(ring, A7XX_RB_UNKNOWN_8812(0x0));
1099 OUT_REG(ring, A7XX_RB_UNKNOWN_8E06(0x0));
1100 OUT_REG(ring, A7XX_GRAS_UNKNOWN_8007(0x0));
1101 OUT_REG(ring, A6XX_GRAS_UNKNOWN_8110(0x2));
1102 OUT_REG(ring, A7XX_RB_UNKNOWN_8E09(0x4));
1103 }
1104
1105 if (use_hw_binning(batch)) {
1106 /* enable stream-out during binning pass: */
1107 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1108
1109 set_bin_size<CHIP>(ring, gmem, {
1110 .render_mode = BINNING_PASS,
1111 .buffers_location = BUFFERS_IN_GMEM,
1112 .lrz_feedback_zmode_mask = LRZ_FEEDBACK_NONE,
1113 });
1114 update_render_cntl<CHIP>(batch, pfb, true);
1115 emit_binning_pass<CHIP>(batch);
1116
1117 /* and disable stream-out for draw pass: */
1118 OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
1119
1120 /*
1121 * NOTE: even if we detect VSC overflow and disable use of
1122 * visibility stream in draw pass, it is still safe to execute
1123 * the reset of these cmds:
1124 */
1125
1126 set_bin_size<CHIP>(ring, gmem, {
1127 .render_mode = RENDERING_PASS,
1128 .force_lrz_write_dis = !screen->info->a6xx.has_lrz_feedback,
1129 .buffers_location = BUFFERS_IN_GMEM,
1130 .lrz_feedback_zmode_mask = screen->info->a6xx.has_lrz_feedback
1131 ? LRZ_FEEDBACK_EARLY_LRZ_LATE_Z
1132 : LRZ_FEEDBACK_NONE,
1133 });
1134
1135 OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
1136 OUT_RING(ring, 0x0);
1137
1138 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
1139 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
1140
1141 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
1142 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
1143
1144 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1145 OUT_RING(ring, 0x1);
1146 } else {
1147 /* no binning pass, so enable stream-out for draw pass:: */
1148 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1149
1150 set_bin_size<CHIP>(ring, gmem, {
1151 .render_mode = RENDERING_PASS,
1152 .force_lrz_write_dis = !screen->info->a6xx.has_lrz_feedback,
1153 .buffers_location = BUFFERS_IN_GMEM,
1154 .lrz_feedback_zmode_mask =
1155 screen->info->a6xx.has_lrz_feedback
1156 ? LRZ_FEEDBACK_EARLY_Z_OR_EARLY_LRZ_LATE_Z
1157 : LRZ_FEEDBACK_NONE,
1158 });
1159 }
1160
1161 update_render_cntl<CHIP>(batch, pfb, false);
1162
1163 emit_common_init<CHIP>(batch);
1164 }
1165
1166 template <chip CHIP>
1167 static void
set_window_offset(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1)1168 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
1169 {
1170 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
1171 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
1172
1173 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
1174 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
1175
1176 OUT_REG(ring, SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1));
1177
1178 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
1179 OUT_RING(ring,
1180 A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
1181 }
1182
1183 /* before mem2gmem */
1184 template <chip CHIP>
1185 static void
fd6_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)1186 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
1187 {
1188 struct fd_context *ctx = batch->ctx;
1189 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1190 struct fd6_context *fd6_ctx = fd6_context(ctx);
1191 struct fd_ringbuffer *ring = batch->gmem;
1192
1193 emit_marker6(ring, 7);
1194 OUT_PKT7(ring, CP_SET_MARKER, 1);
1195 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
1196 emit_marker6(ring, 7);
1197
1198 uint32_t x1 = tile->xoff;
1199 uint32_t y1 = tile->yoff;
1200 uint32_t x2 = tile->xoff + tile->bin_w - 1;
1201 uint32_t y2 = tile->yoff + tile->bin_h - 1;
1202
1203 set_scissor(ring, x1, y1, x2, y2);
1204
1205 if (use_hw_binning(batch)) {
1206 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
1207 unsigned num_vsc_pipes = ctx->screen->info->num_vsc_pipes;
1208
1209 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
1210
1211 OUT_PKT7(ring, CP_SET_MODE, 1);
1212 OUT_RING(ring, 0x0);
1213
1214 OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
1215 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
1216 CP_SET_BIN_DATA5_0_VSC_N(tile->n));
1217 OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
1218 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
1219 OUT_RELOC(
1220 ring, fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
1221 (tile->p * 4) + (num_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch),
1222 0, 0);
1223 OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
1224 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
1225
1226 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1227 OUT_RING(ring, 0x0);
1228
1229 set_window_offset<CHIP>(ring, x1, y1);
1230
1231 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1232 set_bin_size<CHIP>(ring, gmem, {
1233 .render_mode = RENDERING_PASS,
1234 .force_lrz_write_dis = !ctx->screen->info->a6xx.has_lrz_feedback,
1235 .buffers_location = BUFFERS_IN_GMEM,
1236 .lrz_feedback_zmode_mask = ctx->screen->info->a6xx.has_lrz_feedback
1237 ? LRZ_FEEDBACK_EARLY_LRZ_LATE_Z
1238 : LRZ_FEEDBACK_NONE,
1239 });
1240
1241 OUT_PKT7(ring, CP_SET_MODE, 1);
1242 OUT_RING(ring, 0x0);
1243 } else {
1244 set_window_offset<CHIP>(ring, x1, y1);
1245
1246 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1247 OUT_RING(ring, 0x1);
1248
1249 OUT_PKT7(ring, CP_SET_MODE, 1);
1250 OUT_RING(ring, 0x0);
1251 }
1252 }
1253
1254 static void
set_blit_scissor(struct fd_batch * batch,struct fd_ringbuffer * ring)1255 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
1256 {
1257 const struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1258
1259 struct pipe_scissor_state blit_scissor;
1260
1261 blit_scissor.minx = 0;
1262 blit_scissor.miny = 0;
1263 blit_scissor.maxx = ALIGN(pfb->width, 16);
1264 blit_scissor.maxy = ALIGN(pfb->height, 4);
1265
1266 OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
1267 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
1268 A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
1269 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
1270 A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
1271 }
1272
1273 template <chip CHIP>
1274 static void
emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,bool stencil)1275 emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
1276 struct pipe_surface *psurf, bool stencil)
1277 {
1278 struct fd_resource *rsc = fd_resource(psurf->texture);
1279 enum pipe_format pfmt = psurf->format;
1280 uint32_t offset;
1281 bool ubwc_enabled;
1282
1283 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1284
1285 /* separate stencil case: */
1286 if (stencil) {
1287 rsc = rsc->stencil;
1288 pfmt = rsc->b.b.format;
1289 }
1290
1291 offset =
1292 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
1293 ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
1294
1295 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1296
1297 enum a6xx_tile_mode tile_mode = (enum a6xx_tile_mode)
1298 fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
1299 enum a6xx_format format = fd6_color_format(pfmt, tile_mode);
1300 uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
1301 uint32_t array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
1302 enum a3xx_color_swap swap =
1303 fd6_color_swap(pfmt, (enum a6xx_tile_mode)rsc->layout.tile_mode);
1304 enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
1305
1306 OUT_REG(ring,
1307 A6XX_RB_BLIT_DST_INFO(
1308 .tile_mode = tile_mode,
1309 .flags = ubwc_enabled,
1310 .samples = samples,
1311 .color_swap = swap,
1312 .color_format = format,
1313 ),
1314 A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1315 A6XX_RB_BLIT_DST_PITCH(stride),
1316 A6XX_RB_BLIT_DST_ARRAY_PITCH(array_stride));
1317
1318 OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1319
1320 if (ubwc_enabled) {
1321 OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1322 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1323 psurf->u.tex.first_layer);
1324 }
1325
1326 if (CHIP >= A7XX)
1327 OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
1328
1329 fd6_emit_blit<CHIP>(batch->ctx, ring);
1330 }
1331
1332 template <chip CHIP>
1333 static void
emit_restore_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1334 emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1335 uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1336 {
1337 bool stencil = (buffer == FD_BUFFER_STENCIL);
1338
1339 OUT_REG(ring,
1340 A6XX_RB_BLIT_INFO(
1341 .type = BLIT_EVENT_LOAD,
1342 .sample_0 = util_format_is_pure_integer(psurf->format),
1343 .depth = (buffer == FD_BUFFER_DEPTH),
1344 ),
1345 );
1346
1347 emit_blit<CHIP>(batch, ring, base, psurf, stencil);
1348 }
1349
1350 template <chip CHIP>
1351 static void
emit_subpass_clears(struct fd_batch * batch,struct fd_batch_subpass * subpass)1352 emit_subpass_clears(struct fd_batch *batch, struct fd_batch_subpass *subpass)
1353 {
1354 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1355 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1356 struct fd_ringbuffer *ring = subpass->subpass_clears;
1357 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1358
1359 uint32_t buffers = subpass->fast_cleared;
1360
1361 if (buffers & PIPE_CLEAR_COLOR) {
1362
1363 for (int i = 0; i < pfb->nr_cbufs; i++) {
1364 union pipe_color_union *color = &subpass->clear_color[i];
1365 union util_color uc = {0};
1366
1367 if (!pfb->cbufs[i])
1368 continue;
1369
1370 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1371 continue;
1372
1373 enum pipe_format pfmt = pfb->cbufs[i]->format;
1374
1375 // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1376 union pipe_color_union swapped;
1377 switch (fd6_color_swap(pfmt, TILE6_LINEAR)) {
1378 case WZYX:
1379 swapped.ui[0] = color->ui[0];
1380 swapped.ui[1] = color->ui[1];
1381 swapped.ui[2] = color->ui[2];
1382 swapped.ui[3] = color->ui[3];
1383 break;
1384 case WXYZ:
1385 swapped.ui[2] = color->ui[0];
1386 swapped.ui[1] = color->ui[1];
1387 swapped.ui[0] = color->ui[2];
1388 swapped.ui[3] = color->ui[3];
1389 break;
1390 case ZYXW:
1391 swapped.ui[3] = color->ui[0];
1392 swapped.ui[0] = color->ui[1];
1393 swapped.ui[1] = color->ui[2];
1394 swapped.ui[2] = color->ui[3];
1395 break;
1396 case XYZW:
1397 swapped.ui[3] = color->ui[0];
1398 swapped.ui[2] = color->ui[1];
1399 swapped.ui[1] = color->ui[2];
1400 swapped.ui[0] = color->ui[3];
1401 break;
1402 }
1403
1404 util_pack_color_union(pfmt, &uc, &swapped);
1405
1406 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1407 OUT_RING(ring,
1408 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1409 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1410 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1411
1412 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1413 OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1414 A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1415
1416 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1417 OUT_RING(ring, gmem->cbuf_base[i]);
1418
1419 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1420 OUT_RING(ring, 0);
1421
1422 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1423 OUT_RING(ring, uc.ui[0]);
1424 OUT_RING(ring, uc.ui[1]);
1425 OUT_RING(ring, uc.ui[2]);
1426 OUT_RING(ring, uc.ui[3]);
1427
1428 if (CHIP >= A7XX)
1429 OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
1430
1431 fd6_emit_blit<CHIP>(batch->ctx, ring);
1432 }
1433 }
1434
1435 const bool has_depth = pfb->zsbuf;
1436 const bool has_separate_stencil =
1437 has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1438
1439 /* First clear depth or combined depth/stencil. */
1440 if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1441 (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1442 enum pipe_format pfmt = pfb->zsbuf->format;
1443 uint32_t clear_value;
1444 uint32_t mask = 0;
1445
1446 if (has_separate_stencil) {
1447 pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1448 clear_value = util_pack_z(pfmt, subpass->clear_depth);
1449 } else {
1450 pfmt = pfb->zsbuf->format;
1451 clear_value =
1452 util_pack_z_stencil(pfmt, subpass->clear_depth, subpass->clear_stencil);
1453 }
1454
1455 if (buffers & PIPE_CLEAR_DEPTH)
1456 mask |= 0x1;
1457
1458 if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1459 mask |= 0x2;
1460
1461 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1462 OUT_RING(ring,
1463 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1464 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1465 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1466
1467 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1468 OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1469 A6XX_RB_BLIT_INFO_DEPTH |
1470 A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1471
1472 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1473 OUT_RING(ring, gmem->zsbuf_base[0]);
1474
1475 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1476 OUT_RING(ring, 0);
1477
1478 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1479 OUT_RING(ring, clear_value);
1480
1481 fd6_emit_blit<CHIP>(batch->ctx, ring);
1482 }
1483
1484 /* Then clear the separate stencil buffer in case of 32 bit depth
1485 * formats with separate stencil. */
1486 if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1487 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1488 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1489 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1490 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1491
1492 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1493 OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1494 A6XX_RB_BLIT_INFO_DEPTH |
1495 A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1496
1497 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1498 OUT_RING(ring, gmem->zsbuf_base[1]);
1499
1500 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1501 OUT_RING(ring, 0);
1502
1503 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1504 OUT_RING(ring, subpass->clear_stencil & 0xff);
1505
1506 fd6_emit_blit<CHIP>(batch->ctx, ring);
1507 }
1508 }
1509
1510 /*
1511 * transfer from system memory to gmem
1512 */
1513 template <chip CHIP>
1514 static void
emit_restore_blits(struct fd_batch * batch,struct fd_ringbuffer * ring)1515 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1516 {
1517 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1518 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1519
1520 if (batch->restore & FD_BUFFER_COLOR) {
1521 unsigned i;
1522 for (i = 0; i < pfb->nr_cbufs; i++) {
1523 if (!pfb->cbufs[i])
1524 continue;
1525 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1526 continue;
1527 emit_restore_blit<CHIP>(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1528 FD_BUFFER_COLOR);
1529 }
1530 }
1531
1532 if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1533 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1534
1535 if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1536 emit_restore_blit<CHIP>(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1537 FD_BUFFER_DEPTH);
1538 }
1539 if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1540 emit_restore_blit<CHIP>(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1541 FD_BUFFER_STENCIL);
1542 }
1543 }
1544 }
1545
1546 template <chip CHIP>
1547 static void
prepare_tile_setup(struct fd_batch * batch)1548 prepare_tile_setup(struct fd_batch *batch)
1549 {
1550 if (batch->restore) {
1551 batch->tile_loads =
1552 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1553
1554 set_blit_scissor(batch, batch->tile_loads);
1555 emit_restore_blits<CHIP>(batch, batch->tile_loads);
1556 }
1557
1558 foreach_subpass (subpass, batch) {
1559 if (!subpass->fast_cleared)
1560 continue;
1561
1562 subpass->subpass_clears =
1563 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1564
1565 set_blit_scissor(batch, subpass->subpass_clears);
1566 emit_subpass_clears<CHIP>(batch, subpass);
1567 }
1568 }
1569
1570 /*
1571 * transfer from system memory to gmem
1572 */
1573 static void
fd6_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)1574 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1575 {
1576 }
1577
1578 /* before IB to rendering cmds: */
1579 static void
fd6_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)1580 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1581 {
1582 if (batch->tile_loads) {
1583 trace_start_tile_loads(&batch->trace, batch->gmem, batch->restore);
1584 emit_conditional_ib(batch, tile, batch->tile_loads);
1585 trace_end_tile_loads(&batch->trace, batch->gmem);
1586 }
1587 }
1588
1589 static bool
blit_can_resolve(enum pipe_format format)1590 blit_can_resolve(enum pipe_format format)
1591 {
1592 const struct util_format_description *desc = util_format_description(format);
1593
1594 /* blit event can only do resolve for simple cases:
1595 * averaging samples as unsigned integers or choosing only one sample
1596 */
1597 if (util_format_is_snorm(format) || util_format_is_srgb(format))
1598 return false;
1599
1600 /* can't do formats with larger channel sizes
1601 * note: this includes all float formats
1602 * note2: single channel integer formats seem OK
1603 */
1604 if (desc->channel[0].size > 10)
1605 return false;
1606
1607 switch (format) {
1608 /* for unknown reasons blit event can't msaa resolve these formats when tiled
1609 * likely related to these formats having different layout from other cpp=2
1610 * formats
1611 */
1612 case PIPE_FORMAT_R8G8_UNORM:
1613 case PIPE_FORMAT_R8G8_UINT:
1614 case PIPE_FORMAT_R8G8_SINT:
1615 case PIPE_FORMAT_R8G8_SRGB:
1616 /* TODO: this one should be able to work? */
1617 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1618 return false;
1619 default:
1620 break;
1621 }
1622
1623 return true;
1624 }
1625
1626 static bool
needs_resolve(struct pipe_surface * psurf)1627 needs_resolve(struct pipe_surface *psurf)
1628 {
1629 return psurf->nr_samples &&
1630 (psurf->nr_samples != psurf->texture->nr_samples);
1631 }
1632
1633 /**
1634 * Returns the UNKNOWN_8C01 value for handling partial depth/stencil
1635 * clear/stores to Z24S8.
1636 */
1637 static uint32_t
fd6_unknown_8c01(enum pipe_format format,unsigned buffers)1638 fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
1639 {
1640 buffers &= FD_BUFFER_DEPTH | FD_BUFFER_STENCIL;
1641 if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1642 if (buffers == FD_BUFFER_DEPTH)
1643 return 0x08000041;
1644 else if (buffers == FD_BUFFER_STENCIL)
1645 return 0x00084001;
1646 }
1647 return 0;
1648 }
1649
1650 template <chip CHIP>
1651 static void
emit_resolve_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1652 emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1653 uint32_t base, struct pipe_surface *psurf,
1654 unsigned buffer) assert_dt
1655 {
1656 uint32_t info = 0;
1657 bool stencil = false;
1658
1659 if (!fd_resource(psurf->texture)->valid)
1660 return;
1661
1662 /* if we need to resolve, but cannot with BLIT event, we instead need
1663 * to generate per-tile CP_BLIT (r2d) commands:
1664 *
1665 * The separate-stencil is a special case, we might need to use CP_BLIT
1666 * for depth, but we can still resolve stencil with a BLIT event
1667 */
1668 if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1669 (buffer != FD_BUFFER_STENCIL)) {
1670 /* We could potentially use fd6_unknown_8c01() to handle partial z/s
1671 * resolve to packed z/s, but we would need a corresponding ability in the
1672 * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
1673 * just do a restore of the other channel for partial packed z/s writes.
1674 */
1675 fd6_resolve_tile<CHIP>(batch, ring, base, psurf, 0);
1676 return;
1677 }
1678
1679 switch (buffer) {
1680 case FD_BUFFER_COLOR:
1681 info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE);
1682 break;
1683 case FD_BUFFER_STENCIL:
1684 info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE_AND_CLEAR);
1685 stencil = true;
1686 break;
1687 case FD_BUFFER_DEPTH:
1688 info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE) | A6XX_RB_BLIT_INFO_DEPTH;
1689 break;
1690 }
1691
1692 if (util_format_is_pure_integer(psurf->format) ||
1693 util_format_is_depth_or_stencil(psurf->format))
1694 info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1695
1696 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1697 OUT_RING(ring, info);
1698
1699 emit_blit<CHIP>(batch, ring, base, psurf, stencil);
1700 }
1701
1702 /*
1703 * transfer from gmem to system memory (ie. normal RAM)
1704 */
1705
1706 template <chip CHIP>
1707 static void
prepare_tile_fini(struct fd_batch * batch)1708 prepare_tile_fini(struct fd_batch *batch)
1709 assert_dt
1710 {
1711 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1712 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1713 struct fd_ringbuffer *ring;
1714
1715 batch->tile_store =
1716 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1717 ring = batch->tile_store;
1718
1719 set_blit_scissor(batch, ring);
1720
1721 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1722 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1723
1724 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1725 emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[0],
1726 pfb->zsbuf, FD_BUFFER_DEPTH);
1727 }
1728 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1729 emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[1],
1730 pfb->zsbuf, FD_BUFFER_STENCIL);
1731 }
1732 }
1733
1734 if (batch->resolve & FD_BUFFER_COLOR) {
1735 unsigned i;
1736 for (i = 0; i < pfb->nr_cbufs; i++) {
1737 if (!pfb->cbufs[i])
1738 continue;
1739 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1740 continue;
1741 emit_resolve_blit<CHIP>(batch, ring, gmem->cbuf_base[i],
1742 pfb->cbufs[i], FD_BUFFER_COLOR);
1743 }
1744 }
1745 }
1746
1747 template <chip CHIP>
1748 static void
fd6_emit_tile(struct fd_batch * batch,const struct fd_tile * tile)1749 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1750 {
1751 foreach_subpass (subpass, batch) {
1752 if (subpass->subpass_clears) {
1753 trace_start_clears(&batch->trace, batch->gmem, subpass->fast_cleared);
1754 emit_conditional_ib(batch, tile, subpass->subpass_clears);
1755 trace_end_clears(&batch->trace, batch->gmem);
1756 }
1757
1758 emit_lrz<CHIP>(batch, subpass);
1759
1760 fd6_emit_ib(batch->gmem, subpass->draw);
1761 }
1762
1763 if (batch->tile_epilogue)
1764 fd6_emit_ib(batch->gmem, batch->tile_epilogue);
1765 }
1766
1767 static void
fd6_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)1768 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1769 {
1770 struct fd_ringbuffer *ring = batch->gmem;
1771
1772 if (batch->epilogue)
1773 fd6_emit_ib(batch->gmem, batch->epilogue);
1774
1775 if (use_hw_binning(batch)) {
1776 OUT_PKT7(ring, CP_SET_MARKER, 1);
1777 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1778 }
1779
1780 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1781 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1782 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1783 CP_SET_DRAW_STATE__0_GROUP_ID(0));
1784 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1785 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1786
1787 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1788 OUT_RING(ring, 0x0);
1789
1790 emit_marker6(ring, 7);
1791 OUT_PKT7(ring, CP_SET_MARKER, 1);
1792 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1793 emit_marker6(ring, 7);
1794
1795 if (batch->tile_store) {
1796 trace_start_tile_stores(&batch->trace, batch->gmem, batch->resolve);
1797 emit_conditional_ib(batch, tile, batch->tile_store);
1798 trace_end_tile_stores(&batch->trace, batch->gmem);
1799 }
1800 }
1801
1802 template <chip CHIP>
1803 static void
fd6_emit_tile_fini(struct fd_batch * batch)1804 fd6_emit_tile_fini(struct fd_batch *batch)
1805 {
1806 struct fd_ringbuffer *ring = batch->gmem;
1807
1808 emit_common_fini<CHIP>(batch);
1809
1810 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1811 OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1812
1813 fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1814 fd6_event_write<CHIP>(batch->ctx, ring, FD_CCU_CLEAN_BLIT_CACHE);
1815
1816 if (use_hw_binning(batch)) {
1817 check_vsc_overflow(batch->ctx);
1818 }
1819 }
1820
1821 template <chip CHIP>
1822 static void
emit_sysmem_clears(struct fd_batch * batch,struct fd_batch_subpass * subpass)1823 emit_sysmem_clears(struct fd_batch *batch, struct fd_batch_subpass *subpass)
1824 assert_dt
1825 {
1826 struct fd_context *ctx = batch->ctx;
1827 struct fd_ringbuffer *ring = batch->gmem;
1828 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1829
1830 uint32_t buffers = subpass->fast_cleared;
1831
1832 if (!buffers)
1833 return;
1834
1835 struct pipe_box box2d;
1836 u_box_2d(0, 0, pfb->width, pfb->height, &box2d);
1837
1838 trace_start_clears(&batch->trace, ring, buffers);
1839
1840 if (buffers & PIPE_CLEAR_COLOR) {
1841 for (int i = 0; i < pfb->nr_cbufs; i++) {
1842 union pipe_color_union color = subpass->clear_color[i];
1843
1844 if (!pfb->cbufs[i])
1845 continue;
1846
1847 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1848 continue;
1849
1850 fd6_clear_surface<CHIP>(ctx, ring, pfb->cbufs[i], &box2d, &color, 0);
1851 }
1852 }
1853 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1854 union pipe_color_union value = {};
1855
1856 const bool has_depth = pfb->zsbuf;
1857 struct pipe_resource *separate_stencil =
1858 has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1859 ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1860 : NULL;
1861
1862 if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1863 value.f[0] = subpass->clear_depth;
1864 value.ui[1] = subpass->clear_stencil;
1865 fd6_clear_surface<CHIP>(ctx, ring, pfb->zsbuf, &box2d,
1866 &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
1867 }
1868
1869 if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1870 value.ui[0] = subpass->clear_stencil;
1871
1872 struct pipe_surface stencil_surf = *pfb->zsbuf;
1873 stencil_surf.format = PIPE_FORMAT_S8_UINT;
1874 stencil_surf.texture = separate_stencil;
1875
1876 fd6_clear_surface<CHIP>(ctx, ring, &stencil_surf, &box2d, &value, 0);
1877 }
1878 }
1879
1880 fd6_emit_flushes<CHIP>(ctx, ring, FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR);
1881
1882 trace_end_clears(&batch->trace, ring);
1883 }
1884
1885 template <chip CHIP>
1886 static void
fd6_emit_sysmem_prep(struct fd_batch * batch)1887 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1888 {
1889 struct fd_ringbuffer *ring = batch->gmem;
1890
1891 emit_lrz_clears<CHIP>(batch);
1892
1893 fd6_emit_restore<CHIP>(batch, ring);
1894 fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1895
1896 if (batch->prologue) {
1897 if (!batch->nondraw) {
1898 trace_start_prologue(&batch->trace, ring);
1899 }
1900 fd6_emit_ib(ring, batch->prologue);
1901 if (!batch->nondraw) {
1902 trace_end_prologue(&batch->trace, ring);
1903 }
1904 }
1905
1906 /* remaining setup below here does not apply to blit/compute: */
1907 if (batch->nondraw)
1908 return;
1909
1910 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1911
1912 if (pfb->width > 0 && pfb->height > 0)
1913 set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1914 else
1915 set_scissor(ring, 0, 0, 0, 0);
1916
1917 set_window_offset<CHIP>(ring, 0, 0);
1918
1919 set_bin_size<CHIP>(ring, NULL, {
1920 .render_mode = RENDERING_PASS,
1921 .buffers_location = BUFFERS_IN_SYSMEM,
1922 });
1923
1924 if (CHIP >= A7XX) {
1925 OUT_REG(ring, A7XX_RB_UNKNOWN_8812(0x3ff)); // all buffers in sysmem
1926 OUT_REG(ring, A7XX_RB_UNKNOWN_8E06(batch->ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E06));
1927 OUT_REG(ring, A7XX_GRAS_UNKNOWN_8007(0x0));
1928 OUT_REG(ring, A6XX_GRAS_UNKNOWN_8110(0x2));
1929 OUT_REG(ring, A7XX_RB_UNKNOWN_8E09(0x4));
1930 }
1931
1932 emit_marker6(ring, 7);
1933 OUT_PKT7(ring, CP_SET_MARKER, 1);
1934 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1935 emit_marker6(ring, 7);
1936
1937 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1938 OUT_RING(ring, 0x0);
1939
1940 /* blob controls "local" in IB2, but I think that is not required */
1941 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1942 OUT_RING(ring, 0x1);
1943
1944 /* enable stream-out, with sysmem there is only one pass: */
1945 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1946
1947 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1948 OUT_RING(ring, 0x1);
1949
1950 emit_zs<CHIP>(batch->ctx, ring, pfb->zsbuf, NULL);
1951 emit_mrt<CHIP>(ring, pfb, NULL);
1952 emit_msaa(ring, pfb->samples);
1953 patch_fb_read_sysmem<CHIP>(batch);
1954
1955 emit_common_init<CHIP>(batch);
1956 }
1957
1958 template <chip CHIP>
1959 static void
fd6_emit_sysmem(struct fd_batch * batch)1960 fd6_emit_sysmem(struct fd_batch *batch)
1961 assert_dt
1962 {
1963 struct fd_ringbuffer *ring = batch->gmem;
1964 struct fd_screen *screen = batch->ctx->screen;
1965
1966 foreach_subpass (subpass, batch) {
1967 if (subpass->fast_cleared) {
1968 unsigned flushes = 0;
1969 if (subpass->fast_cleared & FD_BUFFER_COLOR)
1970 flushes |= FD6_INVALIDATE_CCU_COLOR;
1971 if (subpass->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
1972 flushes |= FD6_INVALIDATE_CCU_DEPTH;
1973
1974 fd6_emit_flushes<CHIP>(batch->ctx, ring, flushes);
1975 emit_sysmem_clears<CHIP>(batch, subpass);
1976 }
1977
1978 OUT_WFI5(ring);
1979 fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
1980
1981 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1982 update_render_cntl<CHIP>(batch, pfb, false);
1983
1984 emit_lrz<CHIP>(batch, subpass);
1985
1986 fd6_emit_ib(ring, subpass->draw);
1987 }
1988 }
1989
1990 template <chip CHIP>
1991 static void
fd6_emit_sysmem_fini(struct fd_batch * batch)1992 fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
1993 {
1994 struct fd_ringbuffer *ring = batch->gmem;
1995
1996 emit_common_fini<CHIP>(batch);
1997
1998 if (batch->tile_epilogue)
1999 fd6_emit_ib(batch->gmem, batch->tile_epilogue);
2000
2001 if (batch->epilogue)
2002 fd6_emit_ib(batch->gmem, batch->epilogue);
2003
2004 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
2005 OUT_RING(ring, 0x0);
2006
2007 fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
2008
2009 fd6_emit_flushes<CHIP>(batch->ctx, ring,
2010 FD6_FLUSH_CCU_COLOR |
2011 FD6_FLUSH_CCU_DEPTH);
2012 }
2013
2014 template <chip CHIP>
2015 void
fd6_gmem_init(struct pipe_context * pctx)2016 fd6_gmem_init(struct pipe_context *pctx)
2017 disable_thread_safety_analysis
2018 {
2019 struct fd_context *ctx = fd_context(pctx);
2020
2021 ctx->emit_tile_init = fd6_emit_tile_init<CHIP>;
2022 ctx->emit_tile_prep = fd6_emit_tile_prep<CHIP>;
2023 ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
2024 ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
2025 ctx->emit_tile = fd6_emit_tile<CHIP>;
2026 ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
2027 ctx->emit_tile_fini = fd6_emit_tile_fini<CHIP>;
2028 ctx->emit_sysmem_prep = fd6_emit_sysmem_prep<CHIP>;
2029 ctx->emit_sysmem = fd6_emit_sysmem<CHIP>;
2030 ctx->emit_sysmem_fini = fd6_emit_sysmem_fini<CHIP>;
2031 }
2032 FD_GENX(fd6_gmem_init);
2033