xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_gmem.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 #include <stdio.h>
13 
14 #include "pipe/p_state.h"
15 #include "util/format/u_format.h"
16 #include "util/u_inlines.h"
17 #include "util/u_memory.h"
18 #include "util/u_string.h"
19 
20 #include "freedreno_draw.h"
21 #include "freedreno_resource.h"
22 #include "freedreno_state.h"
23 #include "freedreno_tracepoints.h"
24 
25 #include "fd6_barrier.h"
26 #include "fd6_blitter.h"
27 #include "fd6_context.h"
28 #include "fd6_draw.h"
29 #include "fd6_emit.h"
30 #include "fd6_gmem.h"
31 #include "fd6_pack.h"
32 #include "fd6_program.h"
33 #include "fd6_resource.h"
34 #include "fd6_zsa.h"
35 
36 /**
37  * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
38  * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
39  */
40 void
fd6_emit_flag_reference(struct fd_ringbuffer * ring,struct fd_resource * rsc,int level,int layer)41 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
42                         int level, int layer)
43 {
44    if (fd_resource_ubwc_enabled(rsc, level)) {
45       OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
46                 0);
47       OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
48                         fdl_ubwc_pitch(&rsc->layout, level)) |
49                         A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
50                            rsc->layout.ubwc_layer_size >> 2));
51    } else {
52       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
53       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
54       OUT_RING(ring, 0x00000000);
55    }
56 }
57 
58 template <chip CHIP>
59 static void
emit_mrt(struct fd_ringbuffer * ring,struct pipe_framebuffer_state * pfb,const struct fd_gmem_stateobj * gmem)60 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
61          const struct fd_gmem_stateobj *gmem)
62 {
63    unsigned srgb_cntl = 0;
64    unsigned i;
65 
66    /* Note, GLES 3.2 says "If the fragment’s layer number is negative, or
67     * greater than or equal to the minimum number of layers of any attachment,
68     * the effects of the fragment on the framebuffer contents are undefined."
69     */
70    unsigned max_layer_index = 0;
71    enum a6xx_format mrt0_format = FMT6_NONE;
72 
73    for (i = 0; i < pfb->nr_cbufs; i++) {
74       enum a3xx_color_swap swap = WZYX;
75       bool sint = false, uint = false;
76       struct fd_resource *rsc = NULL;
77       ASSERTED struct fdl_slice *slice = NULL;
78       uint32_t stride = 0;
79       uint32_t array_stride = 0;
80       uint32_t offset;
81 
82       if (!pfb->cbufs[i])
83          continue;
84 
85       struct pipe_surface *psurf = pfb->cbufs[i];
86       enum pipe_format pformat = psurf->format;
87       rsc = fd_resource(psurf->texture);
88 
89       uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
90       slice = fd_resource_slice(rsc, psurf->u.tex.level);
91       enum a6xx_tile_mode tile_mode = (enum a6xx_tile_mode)
92             fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
93       enum a6xx_format format = fd6_color_format(pformat, tile_mode);
94       sint = util_format_is_pure_sint(pformat);
95       uint = util_format_is_pure_uint(pformat);
96 
97       if (util_format_is_srgb(pformat))
98          srgb_cntl |= (1 << i);
99 
100       offset =
101          fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
102 
103       stride = fd_resource_pitch(rsc, psurf->u.tex.level);
104       array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
105       swap = fd6_color_swap(pformat, (enum a6xx_tile_mode)rsc->layout.tile_mode);
106 
107       max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
108 
109       assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
110 
111       /* Batch with no draws? */
112       fd_ringbuffer_attach_bo(ring, rsc->bo);
113 
114       OUT_REG(ring,
115          RB_MRT_BUF_INFO(CHIP, i,
116             .color_format = format,
117             .color_tile_mode = tile_mode,
118             .color_swap = swap,
119             .losslesscompen = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level),
120          ),
121          A6XX_RB_MRT_PITCH(i, stride),
122          A6XX_RB_MRT_ARRAY_PITCH(i, array_stride),
123          A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
124          A6XX_RB_MRT_BASE_GMEM(i, base));
125 
126       OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
127                                        .color_sint = sint, .color_uint = uint));
128 
129       OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
130       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
131                               psurf->u.tex.first_layer);
132 
133       if (i == 0)
134          mrt0_format = format;
135    }
136    if (pfb->zsbuf)
137       max_layer_index = pfb->zsbuf->u.tex.last_layer - pfb->zsbuf->u.tex.first_layer;
138 
139    OUT_REG(ring, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = mrt0_format));
140 
141    OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
142    OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
143 
144    OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
145 }
146 
147 template <chip CHIP>
148 static void
emit_zs(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)149 emit_zs(struct fd_context *ctx, struct fd_ringbuffer *ring,
150         struct pipe_surface *zsbuf, const struct fd_gmem_stateobj *gmem)
151 {
152    if (zsbuf) {
153       struct fd_resource *rsc = fd_resource(zsbuf->texture);
154       struct fd_resource *stencil = rsc->stencil;
155       uint32_t stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
156       uint32_t array_stride = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
157       uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
158       uint32_t offset =
159          fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
160 
161       /* We could have a depth buffer, but no draws with depth write/test
162        * enabled, in which case it wouldn't have been part of the batch
163        * resource tracking
164        */
165       fd_ringbuffer_attach_bo(ring, rsc->bo);
166 
167       if (zsbuf->format == PIPE_FORMAT_S8_UINT) {
168          /* S8 is implemented as Z32_S8 minus the Z32 plane: */
169          enum a6xx_depth_format fmt = DEPTH6_32;
170 
171          OUT_REG(ring,
172             RB_DEPTH_BUFFER_INFO(CHIP,
173                .depth_format = fmt,
174                .tilemode = TILE6_3,
175                .losslesscompen = fd_resource_ubwc_enabled(rsc, zsbuf->u.tex.level),
176             ),
177             A6XX_RB_DEPTH_BUFFER_PITCH(0),
178             A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(0),
179             A6XX_RB_DEPTH_BUFFER_BASE(.qword = 0),
180             A6XX_RB_DEPTH_BUFFER_BASE_GMEM(base));
181 
182          OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
183 
184          stencil = rsc;
185       } else {
186          enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
187 
188          OUT_REG(ring,
189             RB_DEPTH_BUFFER_INFO(CHIP,
190                .depth_format = fmt,
191                .tilemode = TILE6_3,
192                .losslesscompen = fd_resource_ubwc_enabled(rsc, zsbuf->u.tex.level),
193             ),
194             A6XX_RB_DEPTH_BUFFER_PITCH(stride),
195             A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(array_stride),
196             A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
197             A6XX_RB_DEPTH_BUFFER_BASE_GMEM(base));
198 
199          OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
200 
201          OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
202          fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
203                                  zsbuf->u.tex.first_layer);
204       }
205 
206       if (stencil) {
207          stride = fd_resource_pitch(stencil, zsbuf->u.tex.level);
208          array_stride = fd_resource_layer_stride(stencil, zsbuf->u.tex.level);
209          uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
210          uint32_t offset =
211             fd_resource_offset(stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
212 
213          fd_ringbuffer_attach_bo(ring, stencil->bo);
214 
215          OUT_REG(ring,
216             RB_STENCIL_INFO(
217                CHIP,
218                .separate_stencil = true,
219                .tilemode = TILE6_3,
220             ),
221             A6XX_RB_STENCIL_BUFFER_PITCH(stride),
222             A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(array_stride),
223             A6XX_RB_STENCIL_BUFFER_BASE(.bo = stencil->bo, .bo_offset = offset),
224             A6XX_RB_STENCIL_BUFFER_BASE_GMEM(base)
225          );
226       } else {
227          OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
228       }
229    } else {
230       OUT_REG(ring,
231               RB_DEPTH_BUFFER_INFO(
232                     CHIP,
233                     .depth_format = DEPTH6_NONE,
234               ),
235               A6XX_RB_DEPTH_BUFFER_PITCH(),
236               A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(),
237               A6XX_RB_DEPTH_BUFFER_BASE(),
238               A6XX_RB_DEPTH_BUFFER_BASE_GMEM(),
239       );
240 
241       OUT_REG(ring,
242               A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
243 
244       OUT_REG(ring, RB_STENCIL_INFO(CHIP, 0));
245    }
246 }
247 
248 template <chip CHIP>
249 static void
emit_lrz(struct fd_batch * batch,struct fd_batch_subpass * subpass)250 emit_lrz(struct fd_batch *batch, struct fd_batch_subpass *subpass)
251 {
252    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
253    struct fd_ringbuffer *ring = batch->gmem;
254 
255    if (!subpass->lrz) {
256       OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(),
257               A6XX_GRAS_LRZ_BUFFER_PITCH(),
258               A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
259       if (CHIP >= A7XX)
260          OUT_REG(ring, A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO());
261       return;
262    }
263 
264    /* When swapping LRZ buffers we need to flush LRZ cache..
265     * we possibly don't need this during the binning pass, it
266     * appears that the corruption happens on the read-side, ie.
267     * we change the LRZ buffer after a sub-pass, but get a
268     * cache-hit on stale data from the previous LRZ buffer.
269     */
270    fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
271 
272    struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
273    OUT_REG(ring,
274       A6XX_GRAS_LRZ_BUFFER_BASE(.bo = subpass->lrz),
275       A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = zsbuf->lrz_pitch),
276       A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(
277          .bo = zsbuf->lrz_fc_offset ? subpass->lrz : NULL,
278          .bo_offset = zsbuf->lrz_fc_offset
279       ),
280    );
281    fd_ringbuffer_attach_bo(ring, subpass->lrz);
282 
283    if (CHIP >= A7XX) {
284       OUT_REG(ring,
285          A7XX_GRAS_LRZ_DEPTH_BUFFER_INFO(
286             .depth_format = fd6_pipe2depth(pfb->zsbuf->format),
287          )
288       );
289    }
290 }
291 
292 /* Emit any needed lrz clears to the prologue cmds
293  */
294 template <chip CHIP>
295 static void
emit_lrz_clears(struct fd_batch * batch)296 emit_lrz_clears(struct fd_batch *batch)
297 {
298    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
299    struct fd_context *ctx = batch->ctx;
300    unsigned count = 0;
301 
302    if (!pfb->zsbuf)
303       return;
304 
305    struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
306 
307    foreach_subpass (subpass, batch) {
308       /* The lrz buffer isn't explicitly tracked by the batch resource
309        * tracking (tracking the zsbuf is sufficient), but it still needs
310        * to be attached to the ring
311        */
312       if (subpass->lrz)
313          fd_ringbuffer_attach_bo(batch->gmem, subpass->lrz);
314 
315       if (!(subpass->fast_cleared & FD_BUFFER_LRZ))
316          continue;
317 
318       subpass->fast_cleared &= ~FD_BUFFER_LRZ;
319 
320       /* prep before first clear: */
321       if (count == 0) {
322          struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
323 
324          fd6_emit_ccu_cntl<CHIP>(ring, ctx->screen, false);
325 
326          OUT_PKT7(ring, CP_SET_MARKER, 1);
327          OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BLIT2DSCALE));
328 
329          fd6_emit_flushes<CHIP>(ctx, ring, FD6_FLUSH_CACHE);
330 
331          if (ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit !=
332              ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
333             /* This a non-context register, so we have to WFI before changing. */
334             OUT_WFI5(ring);
335             OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
336             OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
337          }
338       }
339 
340       fd6_clear_lrz<CHIP>(batch, zsbuf, subpass->lrz, subpass->clear_depth);
341 
342       count++;
343    }
344 
345    /* cleanup after last clear: */
346    if (count > 0) {
347       struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
348 
349       if (ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit !=
350           ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL) {
351          OUT_WFI5(ring);
352          OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
353          OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL);
354       }
355 
356       /* Clearing writes via CCU color in the PS stage, and LRZ is read via
357        * UCHE in the earlier GRAS stage.
358        *
359        * Note tu also asks for WFI but maybe that is only needed if
360        * has_ccu_flush_bug (and it is added by fd6_emit_flushes() already
361        * in that case)
362        */
363       fd6_emit_flushes<CHIP>(batch->ctx, ring,
364                              FD6_FLUSH_CCU_COLOR |
365                              FD6_INVALIDATE_CACHE);
366    }
367 }
368 
369 static bool
use_hw_binning(struct fd_batch * batch)370 use_hw_binning(struct fd_batch *batch)
371 {
372    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
373 
374    if ((gmem->maxpw * gmem->maxph) > 32)
375       return false;
376 
377    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
378           (batch->num_draws > 0);
379 }
380 
381 static void
patch_fb_read_gmem(struct fd_batch * batch)382 patch_fb_read_gmem(struct fd_batch *batch)
383 {
384    struct fd_screen *screen = batch->ctx->screen;
385    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
386    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
387 
388    unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
389    if (!num_patches)
390       return;
391 
392    for (unsigned i = 0; i < num_patches; i++) {
393      struct fd_cs_patch *patch =
394         fd_patch_element(&batch->fb_read_patches, i);
395       int buf = patch->val;
396       struct pipe_surface *psurf = pfb->cbufs[buf];
397       struct pipe_resource *prsc = psurf->texture;
398       struct fd_resource *rsc = fd_resource(prsc);
399       enum pipe_format format = psurf->format;
400 
401       uint8_t swiz[4];
402       fdl6_format_swiz(psurf->format, false, swiz);
403 
404       uint64_t base = screen->gmem_base + gmem->cbuf_base[buf];
405       /* always TILE6_2 mode in GMEM, which also means no swap: */
406       uint32_t descriptor[FDL6_TEX_CONST_DWORDS] = {
407             A6XX_TEX_CONST_0_FMT(fd6_texture_format(
408                   format, (enum a6xx_tile_mode)rsc->layout.tile_mode)) |
409             A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
410             A6XX_TEX_CONST_0_SWAP(WZYX) |
411             A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
412             COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
413             A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) |
414             A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) |
415             A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) |
416             A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])),
417 
418          A6XX_TEX_CONST_1_WIDTH(pfb->width) |
419             A6XX_TEX_CONST_1_HEIGHT(pfb->height),
420 
421          A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[buf]) |
422             A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D),
423 
424          A6XX_TEX_CONST_3_ARRAY_PITCH(rsc->layout.layer_size),
425          A6XX_TEX_CONST_4_BASE_LO(base),
426 
427          A6XX_TEX_CONST_5_BASE_HI(base >> 32) |
428             A6XX_TEX_CONST_5_DEPTH(prsc->array_size)
429       };
430 
431       memcpy(patch->cs, descriptor, FDL6_TEX_CONST_DWORDS * 4);
432    }
433 
434    util_dynarray_clear(&batch->fb_read_patches);
435 }
436 
437 template <chip CHIP>
438 static void
patch_fb_read_sysmem(struct fd_batch * batch)439 patch_fb_read_sysmem(struct fd_batch *batch)
440 {
441    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
442 
443    unsigned num_patches =
444       fd_patch_num_elements(&batch->fb_read_patches);
445    if (!num_patches)
446       return;
447    for (unsigned i = 0; i < num_patches; i++) {
448      struct fd_cs_patch *patch =
449         fd_patch_element(&batch->fb_read_patches, i);
450       int buf = patch->val;
451 
452       struct pipe_surface *psurf = pfb->cbufs[buf];
453       if (!psurf)
454          return;
455 
456       struct pipe_resource *prsc = psurf->texture;
457       struct fd_resource *rsc = fd_resource(prsc);
458 
459       uint32_t block_width, block_height;
460       fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
461 
462       struct fdl_view_args args = {
463          .chip = CHIP,
464 
465          .iova = fd_bo_get_iova(rsc->bo),
466 
467          .base_miplevel = psurf->u.tex.level,
468          .level_count = 1,
469 
470          .base_array_layer = psurf->u.tex.first_layer,
471          .layer_count = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1,
472 
473          .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
474                   PIPE_SWIZZLE_W},
475          .format = psurf->format,
476 
477          .type = FDL_VIEW_TYPE_2D,
478          .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
479                             FDL_CHROMA_LOCATION_COSITED_EVEN},
480       };
481       const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
482       struct fdl6_view view;
483       fdl6_view_init(&view, layouts, &args,
484                      batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
485       memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4);
486    }
487 
488    util_dynarray_clear(&batch->fb_read_patches);
489 }
490 
491 template <chip CHIP>
492 static void
update_render_cntl(struct fd_batch * batch,struct pipe_framebuffer_state * pfb,bool binning)493 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
494                    bool binning)
495 {
496    struct fd_ringbuffer *ring = batch->gmem;
497 
498    if (CHIP >= A7XX) {
499       OUT_REG(ring,
500          RB_RENDER_CNTL(
501             CHIP,
502             .binning = binning,
503             .raster_mode = TYPE_TILED,
504             .raster_direction = LR_TB
505          )
506       );
507       OUT_REG(ring,
508          A7XX_GRAS_SU_RENDER_CNTL(
509             .binning = binning,
510          )
511       );
512       return;
513    }
514 
515    struct fd_screen *screen = batch->ctx->screen;
516    bool depth_ubwc_enable = false;
517    uint32_t mrts_ubwc_enable = 0;
518    int i;
519 
520    if (pfb->zsbuf) {
521       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
522       depth_ubwc_enable =
523          fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
524    }
525 
526    for (i = 0; i < pfb->nr_cbufs; i++) {
527       if (!pfb->cbufs[i])
528          continue;
529 
530       struct pipe_surface *psurf = pfb->cbufs[i];
531       struct fd_resource *rsc = fd_resource(psurf->texture);
532 
533       if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
534          mrts_ubwc_enable |= 1 << i;
535    }
536 
537    struct fd_reg_pair rb_render_cntl = RB_RENDER_CNTL(
538          CHIP,
539          .ccusinglecachelinesize = 2,
540          .binning = binning,
541          .flag_depth = depth_ubwc_enable,
542          .flag_mrts = mrts_ubwc_enable,
543    );
544 
545    if (screen->info->a6xx.has_cp_reg_write) {
546       OUT_PKT(ring, CP_REG_WRITE,
547               CP_REG_WRITE_0(TRACK_RENDER_CNTL),
548               CP_REG_WRITE_1(rb_render_cntl.reg),
549               CP_REG_WRITE_2(rb_render_cntl.value),
550       );
551    } else {
552       OUT_REG(ring, rb_render_cntl);
553    }
554 }
555 
556 static void
update_vsc_pipe(struct fd_batch * batch)557 update_vsc_pipe(struct fd_batch *batch)
558 {
559    struct fd_context *ctx = batch->ctx;
560    struct fd6_context *fd6_ctx = fd6_context(ctx);
561    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
562    struct fd_ringbuffer *ring = batch->gmem;
563    unsigned max_vsc_pipes = batch->ctx->screen->info->num_vsc_pipes;
564    int i;
565 
566    if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
567       if (fd6_ctx->vsc_draw_strm)
568          fd_bo_del(fd6_ctx->vsc_draw_strm);
569       fd6_ctx->vsc_draw_strm = NULL;
570       /* Note: probably only need to align to 0x40, but aligning stronger
571        * reduces the odds that we will have to realloc again on the next
572        * frame:
573        */
574       fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
575       mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
576                 fd6_ctx->vsc_draw_strm_pitch);
577    }
578 
579    if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
580       if (fd6_ctx->vsc_prim_strm)
581          fd_bo_del(fd6_ctx->vsc_prim_strm);
582       fd6_ctx->vsc_prim_strm = NULL;
583       fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
584       mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
585                 fd6_ctx->vsc_prim_strm_pitch);
586    }
587 
588    if (!fd6_ctx->vsc_draw_strm) {
589       /* We also use four bytes per vsc pipe at the end of the draw
590        * stream buffer for VSC_DRAW_STRM_SIZE written back by hw
591        * (see VSC_DRAW_STRM_SIZE_ADDRESS)
592        */
593       unsigned sz = (max_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch) +
594                     (max_vsc_pipes * 4);
595       fd6_ctx->vsc_draw_strm =
596          fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_draw_strm");
597    }
598 
599    if (!fd6_ctx->vsc_prim_strm) {
600       unsigned sz = max_vsc_pipes * fd6_ctx->vsc_prim_strm_pitch;
601       fd6_ctx->vsc_prim_strm =
602          fd_bo_new(ctx->screen->dev, sz, FD_BO_NOMAP, "vsc_prim_strm");
603    }
604 
605    fd_ringbuffer_attach_bo(ring, fd6_ctx->vsc_draw_strm);
606    fd_ringbuffer_attach_bo(ring, fd6_ctx->vsc_prim_strm);
607 
608    OUT_REG(ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
609            A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
610                                            .bo_offset = max_vsc_pipes *
611                                               fd6_ctx->vsc_draw_strm_pitch));
612 
613    OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
614 
615    OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), max_vsc_pipes);
616    for (i = 0; i < max_vsc_pipes; i++) {
617       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
618       OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
619                         A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
620                         A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
621                         A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
622    }
623 
624    OUT_REG(
625       ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
626       A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
627       A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
628 
629    OUT_REG(
630       ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
631       A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
632       A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
633 }
634 
635 /*
636  * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
637  * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
638  * written to control->vsc_overflow.  This allows the CPU to
639  * detect which buffer overflowed (and, since the current size is
640  * encoded as well, this protects against already-submitted but
641  * not executed batches from fooling the CPU into increasing the
642  * size again unnecessarily).
643  */
644 static void
emit_vsc_overflow_test(struct fd_batch * batch)645 emit_vsc_overflow_test(struct fd_batch *batch)
646 {
647    struct fd_ringbuffer *ring = batch->gmem;
648    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
649    struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
650 
651    assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
652    assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
653 
654    /* Check for overflow, write vsc_scratch if detected: */
655    for (int i = 0; i < gmem->num_vsc_pipes; i++) {
656       OUT_PKT7(ring, CP_COND_WRITE5, 8);
657       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
658                         CP_COND_WRITE5_0_WRITE_MEMORY);
659       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
660                         REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
661       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
662       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
663       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
664       OUT_RELOC(ring,
665                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
666       OUT_RING(ring,
667                CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
668 
669       OUT_PKT7(ring, CP_COND_WRITE5, 8);
670       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
671                         CP_COND_WRITE5_0_WRITE_MEMORY);
672       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
673                         REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
674       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
675       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
676       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
677       OUT_RELOC(ring,
678                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
679       OUT_RING(ring,
680                CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
681    }
682 
683    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
684 }
685 
686 static void
check_vsc_overflow(struct fd_context * ctx)687 check_vsc_overflow(struct fd_context *ctx)
688 {
689    struct fd6_context *fd6_ctx = fd6_context(ctx);
690    struct fd6_control *control =
691          (struct fd6_control *)fd_bo_map(fd6_ctx->control_mem);
692    uint32_t vsc_overflow = control->vsc_overflow;
693 
694    if (!vsc_overflow)
695       return;
696 
697    /* clear overflow flag: */
698    control->vsc_overflow = 0;
699 
700    unsigned buffer = vsc_overflow & 0x3;
701    unsigned size = vsc_overflow & ~0x3;
702 
703    if (buffer == 0x1) {
704       /* VSC_DRAW_STRM overflow: */
705 
706       if (size < fd6_ctx->vsc_draw_strm_pitch) {
707          /* we've already increased the size, this overflow is
708           * from a batch submitted before resize, but executed
709           * after
710           */
711          return;
712       }
713 
714       fd_bo_del(fd6_ctx->vsc_draw_strm);
715       fd6_ctx->vsc_draw_strm = NULL;
716       fd6_ctx->vsc_draw_strm_pitch *= 2;
717 
718       mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
719                 fd6_ctx->vsc_draw_strm_pitch);
720 
721    } else if (buffer == 0x3) {
722       /* VSC_PRIM_STRM overflow: */
723 
724       if (size < fd6_ctx->vsc_prim_strm_pitch) {
725          /* we've already increased the size */
726          return;
727       }
728 
729       fd_bo_del(fd6_ctx->vsc_prim_strm);
730       fd6_ctx->vsc_prim_strm = NULL;
731       fd6_ctx->vsc_prim_strm_pitch *= 2;
732 
733       mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
734                 fd6_ctx->vsc_prim_strm_pitch);
735 
736    } else {
737       /* NOTE: it's possible, for example, for overflow to corrupt the
738        * control page.  I mostly just see this hit if I set initial VSC
739        * buffer size extremely small.  Things still seem to recover,
740        * but maybe we should pre-emptively realloc vsc_data/vsc_data2
741        * and hope for different memory placement?
742        */
743       mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
744    }
745 }
746 
747 template <chip CHIP>
748 static void
emit_common_init(struct fd_batch * batch)749 emit_common_init(struct fd_batch *batch)
750 {
751    struct fd_context *ctx = batch->ctx;
752    struct fd_ringbuffer *ring = batch->gmem;
753    struct fd_autotune *at = &batch->ctx->autotune;
754    struct fd_batch_result *result = batch->autotune_result;
755 
756    if (!result)
757       return;
758 
759    fd_ringbuffer_attach_bo(ring, at->results_mem);
760 
761    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
762    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
763 
764    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
765       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
766       OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
767 
768       fd6_event_write<CHIP>(ctx, ring, FD_ZPASS_DONE);
769 
770       /* Copied from blob's cmdstream, not sure why it is done. */
771       if (CHIP == A7XX) {
772          fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
773       }
774    } else {
775       OUT_PKT(ring, CP_EVENT_WRITE7,
776          CP_EVENT_WRITE7_0(
777             .event = ZPASS_DONE,
778             .write_sample_count = true,
779          ),
780          EV_DST_RAM_CP_EVENT_WRITE7_1(
781             results_ptr(at, result[result->idx].samples_start)
782          ),
783       );
784    }
785 }
786 
787 template <chip CHIP>
788 static void
emit_common_fini(struct fd_batch * batch)789 emit_common_fini(struct fd_batch *batch)
790 {
791    struct fd_context *ctx = batch->ctx;
792    struct fd_ringbuffer *ring = batch->gmem;
793    struct fd_autotune *at = &batch->ctx->autotune;
794    struct fd_batch_result *result = batch->autotune_result;
795 
796    fd6_emit_flushes<CHIP>(batch->ctx, ring, batch->barrier);
797 
798    if (!result)
799       return;
800 
801    fd_ringbuffer_attach_bo(ring, at->results_mem);
802 
803    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
804    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
805 
806    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
807       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
808       OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
809 
810       fd6_event_write<CHIP>(batch->ctx, ring, FD_ZPASS_DONE);
811    } else {
812       OUT_PKT(ring, CP_EVENT_WRITE7,
813          CP_EVENT_WRITE7_0(
814             .event = ZPASS_DONE,
815             .write_sample_count = true,
816             .sample_count_end_offset = true,
817             .write_accum_sample_count_diff = true,
818          ),
819          EV_DST_RAM_CP_EVENT_WRITE7_1(
820             results_ptr(at, result[result->idx].samples_start)
821          ),
822       );
823    }
824 
825    fd6_fence_write<CHIP>(ring, result->fence, results_ptr(at, fence));
826 }
827 
828 /*
829  * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
830  * is skipped for tiles that have no visible geometry.
831  *
832  * If we aren't using binning pass, this just emits a normal IB.
833  */
834 static void
emit_conditional_ib(struct fd_batch * batch,const struct fd_tile * tile,struct fd_ringbuffer * target)835 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
836                     struct fd_ringbuffer *target)
837 {
838    struct fd_ringbuffer *ring = batch->gmem;
839 
840    /* If we have fast clear, that won't count in the VSC state, so it
841     * forces an unconditional IB (because we know there is something
842     * to do for this tile)
843     */
844    if (batch->cleared || !use_hw_binning(batch)) {
845       fd6_emit_ib(batch->gmem, target);
846       return;
847    }
848 
849    if (target->cur == target->start)
850       return;
851 
852    emit_marker6(ring, 6);
853 
854    unsigned count = fd_ringbuffer_cmd_count(target);
855 
856    BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
857 
858    OUT_PKT7(ring, CP_REG_TEST, 1);
859    OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
860                      A6XX_CP_REG_TEST_0_BIT(tile->n) |
861                      A6XX_CP_REG_TEST_0_SKIP_WAIT_FOR_ME);
862 
863    OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
864    OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
865    OUT_RING(ring, PRED_TEST_CP_COND_REG_EXEC_1_DWORDS(4 * count));
866 
867    for (unsigned i = 0; i < count; i++) {
868       uint32_t dwords;
869       OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
870       dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
871       assert(dwords > 0);
872       OUT_RING(ring, dwords);
873    }
874 
875    emit_marker6(ring, 6);
876 }
877 
878 static void
set_scissor(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1,uint32_t x2,uint32_t y2)879 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
880             uint32_t y2)
881 {
882    OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
883            A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
884 
885    OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
886            A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
887 }
888 
889 struct bin_size_params {
890    enum a6xx_render_mode render_mode;
891    bool force_lrz_write_dis;
892    enum a6xx_buffers_location buffers_location;
893    enum a6xx_lrz_feedback_mask lrz_feedback_zmode_mask;
894 };
895 
896 template <chip CHIP>
897 static void
set_bin_size(struct fd_ringbuffer * ring,const struct fd_gmem_stateobj * gmem,struct bin_size_params p)898 set_bin_size(struct fd_ringbuffer *ring, const struct fd_gmem_stateobj *gmem,
899              struct bin_size_params p)
900 {
901    unsigned w = gmem ? gmem->bin_w : 0;
902    unsigned h = gmem ? gmem->bin_h : 0;
903 
904    if (CHIP == A6XX) {
905       OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(
906             .binw = w, .binh = h,
907             .render_mode = p.render_mode,
908             .force_lrz_write_dis = p.force_lrz_write_dis,
909             .buffers_location = p.buffers_location,
910             .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
911       ));
912    } else {
913       OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(
914             .binw = w, .binh = h,
915             .render_mode = p.render_mode,
916             .force_lrz_write_dis = p.force_lrz_write_dis,
917             .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
918       ));
919    }
920    OUT_REG(ring, RB_BIN_CONTROL(
921          CHIP,
922          .binw = w, .binh = h,
923          .render_mode = p.render_mode,
924          .force_lrz_write_dis = p.force_lrz_write_dis,
925          .buffers_location = p.buffers_location,
926          .lrz_feedback_zmode_mask = p.lrz_feedback_zmode_mask,
927    ));
928    /* no flag for RB_BIN_CONTROL2... */
929    OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
930 }
931 
932 template <chip CHIP>
933 static void
emit_binning_pass(struct fd_batch * batch)934 emit_binning_pass(struct fd_batch *batch) assert_dt
935 {
936    struct fd_ringbuffer *ring = batch->gmem;
937    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
938    struct fd_screen *screen = batch->ctx->screen;
939 
940    assert(!batch->tessellation);
941 
942    set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
943 
944    emit_marker6(ring, 7);
945    OUT_PKT7(ring, CP_SET_MARKER, 1);
946    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
947    emit_marker6(ring, 7);
948 
949    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
950    OUT_RING(ring, 0x1);
951 
952    OUT_PKT7(ring, CP_SET_MODE, 1);
953    OUT_RING(ring, 0x1);
954 
955    OUT_WFI5(ring);
956 
957    OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
958 
959    update_vsc_pipe(batch);
960 
961    OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
962    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
963 
964    OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
965    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
966 
967    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
968    OUT_RING(ring, UNK_2C);
969 
970    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
971    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
972 
973    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
974    OUT_RING(ring,
975             A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
976 
977    /* emit IB to binning drawcmds: */
978    trace_start_binning_ib(&batch->trace, ring);
979    foreach_subpass (subpass, batch) {
980       emit_lrz<CHIP>(batch, subpass);
981       fd6_emit_ib(ring, subpass->draw);
982    }
983    trace_end_binning_ib(&batch->trace, ring);
984 
985    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
986    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
987                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
988                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
989    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
990    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
991 
992    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
993    OUT_RING(ring, UNK_2D);
994 
995    /* This flush is probably required because the VSC, which produces the
996     * visibility stream, is a client of UCHE, whereas the CP needs to read
997     * the visibility stream (without caching) to do draw skipping. The
998     * WFI+WAIT_FOR_ME combination guarantees that the binning commands
999     * submitted are finished before reading the VSC regs (in
1000     * emit_vsc_overflow_test) or the VSC_DATA buffer directly (implicitly
1001     * as part of draws).
1002     */
1003    fd6_emit_flushes<CHIP>(batch->ctx, ring,
1004                           FD6_FLUSH_CACHE |
1005                           FD6_WAIT_FOR_IDLE |
1006                           FD6_WAIT_FOR_ME);
1007 
1008    trace_start_vsc_overflow_test(&batch->trace, batch->gmem);
1009    emit_vsc_overflow_test(batch);
1010    trace_end_vsc_overflow_test(&batch->trace, batch->gmem);
1011 
1012    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1013    OUT_RING(ring, 0x0);
1014 
1015    OUT_PKT7(ring, CP_SET_MODE, 1);
1016    OUT_RING(ring, 0x0);
1017 
1018    OUT_WFI5(ring);
1019 
1020    fd6_emit_ccu_cntl<CHIP>(ring, screen, true);
1021 }
1022 
1023 static void
emit_msaa(struct fd_ringbuffer * ring,unsigned nr)1024 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
1025 {
1026    enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
1027 
1028    OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
1029    OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
1030    OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
1031                      COND(samples == MSAA_ONE,
1032                           A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
1033 
1034    OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
1035    OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
1036    OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
1037                      COND(samples == MSAA_ONE,
1038                           A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
1039 
1040    OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
1041    OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
1042    OUT_RING(ring,
1043             A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
1044                COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
1045 
1046    OUT_PKT4(ring, REG_A6XX_RB_BLIT_GMEM_MSAA_CNTL, 1);
1047    OUT_RING(ring, A6XX_RB_BLIT_GMEM_MSAA_CNTL_SAMPLES(samples));
1048 }
1049 
1050 template <chip CHIP>
1051 static void prepare_tile_setup(struct fd_batch *batch);
1052 template <chip CHIP>
1053 static void prepare_tile_fini(struct fd_batch *batch);
1054 
1055 /* before first tile */
1056 template <chip CHIP>
1057 static void
fd6_emit_tile_init(struct fd_batch * batch)1058 fd6_emit_tile_init(struct fd_batch *batch) assert_dt
1059 {
1060    struct fd_ringbuffer *ring = batch->gmem;
1061    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1062    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1063    struct fd_screen *screen = batch->ctx->screen;
1064 
1065    emit_lrz_clears<CHIP>(batch);
1066 
1067    fd6_emit_restore<CHIP>(batch, ring);
1068 
1069    fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1070 
1071    if (batch->prologue) {
1072       trace_start_prologue(&batch->trace, ring);
1073       fd6_emit_ib(ring, batch->prologue);
1074       trace_end_prologue(&batch->trace, ring);
1075    }
1076 
1077    fd6_cache_inv<CHIP>(batch->ctx, ring);
1078 
1079    prepare_tile_setup<CHIP>(batch);
1080    prepare_tile_fini<CHIP>(batch);
1081 
1082    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1083    OUT_RING(ring, 0x0);
1084 
1085    /* blob controls "local" in IB2, but I think that is not required */
1086    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1087    OUT_RING(ring, 0x1);
1088 
1089    OUT_WFI5(ring);
1090    fd6_emit_ccu_cntl<CHIP>(ring, screen, true);
1091 
1092    emit_zs<CHIP>(batch->ctx, ring, pfb->zsbuf, batch->gmem_state);
1093    emit_mrt<CHIP>(ring, pfb, batch->gmem_state);
1094    emit_msaa(ring, pfb->samples);
1095    patch_fb_read_gmem(batch);
1096 
1097    if (CHIP >= A7XX) {
1098       OUT_REG(ring, A7XX_RB_UNKNOWN_8812(0x0));
1099       OUT_REG(ring, A7XX_RB_UNKNOWN_8E06(0x0));
1100       OUT_REG(ring, A7XX_GRAS_UNKNOWN_8007(0x0));
1101       OUT_REG(ring, A6XX_GRAS_UNKNOWN_8110(0x2));
1102       OUT_REG(ring, A7XX_RB_UNKNOWN_8E09(0x4));
1103    }
1104 
1105    if (use_hw_binning(batch)) {
1106       /* enable stream-out during binning pass: */
1107       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1108 
1109       set_bin_size<CHIP>(ring, gmem, {
1110             .render_mode = BINNING_PASS,
1111             .buffers_location = BUFFERS_IN_GMEM,
1112             .lrz_feedback_zmode_mask = LRZ_FEEDBACK_NONE,
1113       });
1114       update_render_cntl<CHIP>(batch, pfb, true);
1115       emit_binning_pass<CHIP>(batch);
1116 
1117       /* and disable stream-out for draw pass: */
1118       OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
1119 
1120       /*
1121        * NOTE: even if we detect VSC overflow and disable use of
1122        * visibility stream in draw pass, it is still safe to execute
1123        * the reset of these cmds:
1124        */
1125 
1126       set_bin_size<CHIP>(ring, gmem, {
1127             .render_mode = RENDERING_PASS,
1128             .force_lrz_write_dis = !screen->info->a6xx.has_lrz_feedback,
1129             .buffers_location = BUFFERS_IN_GMEM,
1130             .lrz_feedback_zmode_mask = screen->info->a6xx.has_lrz_feedback
1131                                           ? LRZ_FEEDBACK_EARLY_LRZ_LATE_Z
1132                                           : LRZ_FEEDBACK_NONE,
1133       });
1134 
1135       OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
1136       OUT_RING(ring, 0x0);
1137 
1138       OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
1139       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
1140 
1141       OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
1142       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
1143 
1144       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1145       OUT_RING(ring, 0x1);
1146    } else {
1147       /* no binning pass, so enable stream-out for draw pass:: */
1148       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1149 
1150       set_bin_size<CHIP>(ring, gmem, {
1151             .render_mode = RENDERING_PASS,
1152             .force_lrz_write_dis = !screen->info->a6xx.has_lrz_feedback,
1153             .buffers_location = BUFFERS_IN_GMEM,
1154             .lrz_feedback_zmode_mask =
1155                screen->info->a6xx.has_lrz_feedback
1156                   ? LRZ_FEEDBACK_EARLY_Z_OR_EARLY_LRZ_LATE_Z
1157                   : LRZ_FEEDBACK_NONE,
1158       });
1159    }
1160 
1161    update_render_cntl<CHIP>(batch, pfb, false);
1162 
1163    emit_common_init<CHIP>(batch);
1164 }
1165 
1166 template <chip CHIP>
1167 static void
set_window_offset(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1)1168 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
1169 {
1170    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
1171    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
1172 
1173    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
1174    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
1175 
1176    OUT_REG(ring, SP_WINDOW_OFFSET(CHIP, .x = x1, .y = y1));
1177 
1178    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
1179    OUT_RING(ring,
1180             A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
1181 }
1182 
1183 /* before mem2gmem */
1184 template <chip CHIP>
1185 static void
fd6_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)1186 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
1187 {
1188    struct fd_context *ctx = batch->ctx;
1189    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1190    struct fd6_context *fd6_ctx = fd6_context(ctx);
1191    struct fd_ringbuffer *ring = batch->gmem;
1192 
1193    emit_marker6(ring, 7);
1194    OUT_PKT7(ring, CP_SET_MARKER, 1);
1195    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
1196    emit_marker6(ring, 7);
1197 
1198    uint32_t x1 = tile->xoff;
1199    uint32_t y1 = tile->yoff;
1200    uint32_t x2 = tile->xoff + tile->bin_w - 1;
1201    uint32_t y2 = tile->yoff + tile->bin_h - 1;
1202 
1203    set_scissor(ring, x1, y1, x2, y2);
1204 
1205    if (use_hw_binning(batch)) {
1206       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
1207       unsigned num_vsc_pipes = ctx->screen->info->num_vsc_pipes;
1208 
1209       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
1210 
1211       OUT_PKT7(ring, CP_SET_MODE, 1);
1212       OUT_RING(ring, 0x0);
1213 
1214       OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
1215       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
1216                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
1217       OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
1218                 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
1219       OUT_RELOC(
1220          ring, fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
1221          (tile->p * 4) + (num_vsc_pipes * fd6_ctx->vsc_draw_strm_pitch),
1222          0, 0);
1223       OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
1224                 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
1225 
1226       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1227       OUT_RING(ring, 0x0);
1228 
1229       set_window_offset<CHIP>(ring, x1, y1);
1230 
1231       const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1232       set_bin_size<CHIP>(ring, gmem, {
1233             .render_mode = RENDERING_PASS,
1234             .force_lrz_write_dis = !ctx->screen->info->a6xx.has_lrz_feedback,
1235             .buffers_location = BUFFERS_IN_GMEM,
1236             .lrz_feedback_zmode_mask = ctx->screen->info->a6xx.has_lrz_feedback
1237                                           ? LRZ_FEEDBACK_EARLY_LRZ_LATE_Z
1238                                           : LRZ_FEEDBACK_NONE,
1239       });
1240 
1241       OUT_PKT7(ring, CP_SET_MODE, 1);
1242       OUT_RING(ring, 0x0);
1243    } else {
1244       set_window_offset<CHIP>(ring, x1, y1);
1245 
1246       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1247       OUT_RING(ring, 0x1);
1248 
1249       OUT_PKT7(ring, CP_SET_MODE, 1);
1250       OUT_RING(ring, 0x0);
1251    }
1252 }
1253 
1254 static void
set_blit_scissor(struct fd_batch * batch,struct fd_ringbuffer * ring)1255 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
1256 {
1257    const struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1258 
1259    struct pipe_scissor_state blit_scissor;
1260 
1261    blit_scissor.minx = 0;
1262    blit_scissor.miny = 0;
1263    blit_scissor.maxx = ALIGN(pfb->width, 16);
1264    blit_scissor.maxy = ALIGN(pfb->height, 4);
1265 
1266    OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
1267    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
1268                      A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
1269    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
1270                      A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
1271 }
1272 
1273 template <chip CHIP>
1274 static void
emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,bool stencil)1275 emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
1276           struct pipe_surface *psurf, bool stencil)
1277 {
1278    struct fd_resource *rsc = fd_resource(psurf->texture);
1279    enum pipe_format pfmt = psurf->format;
1280    uint32_t offset;
1281    bool ubwc_enabled;
1282 
1283    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1284 
1285    /* separate stencil case: */
1286    if (stencil) {
1287       rsc = rsc->stencil;
1288       pfmt = rsc->b.b.format;
1289    }
1290 
1291    offset =
1292       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
1293    ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
1294 
1295    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1296 
1297    enum a6xx_tile_mode tile_mode = (enum a6xx_tile_mode)
1298          fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
1299    enum a6xx_format format = fd6_color_format(pfmt, tile_mode);
1300    uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
1301    uint32_t array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
1302    enum a3xx_color_swap swap =
1303          fd6_color_swap(pfmt, (enum a6xx_tile_mode)rsc->layout.tile_mode);
1304    enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
1305 
1306    OUT_REG(ring,
1307            A6XX_RB_BLIT_DST_INFO(
1308                  .tile_mode = tile_mode,
1309                  .flags = ubwc_enabled,
1310                  .samples = samples,
1311                  .color_swap = swap,
1312                  .color_format = format,
1313            ),
1314            A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1315            A6XX_RB_BLIT_DST_PITCH(stride),
1316            A6XX_RB_BLIT_DST_ARRAY_PITCH(array_stride));
1317 
1318    OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1319 
1320    if (ubwc_enabled) {
1321       OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1322       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1323                               psurf->u.tex.first_layer);
1324    }
1325 
1326    if (CHIP >= A7XX)
1327       OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
1328 
1329    fd6_emit_blit<CHIP>(batch->ctx, ring);
1330 }
1331 
1332 template <chip CHIP>
1333 static void
emit_restore_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1334 emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1335                   uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1336 {
1337    bool stencil = (buffer == FD_BUFFER_STENCIL);
1338 
1339    OUT_REG(ring,
1340            A6XX_RB_BLIT_INFO(
1341                  .type = BLIT_EVENT_LOAD,
1342                  .sample_0 = util_format_is_pure_integer(psurf->format),
1343                  .depth = (buffer == FD_BUFFER_DEPTH),
1344            ),
1345    );
1346 
1347    emit_blit<CHIP>(batch, ring, base, psurf, stencil);
1348 }
1349 
1350 template <chip CHIP>
1351 static void
emit_subpass_clears(struct fd_batch * batch,struct fd_batch_subpass * subpass)1352 emit_subpass_clears(struct fd_batch *batch, struct fd_batch_subpass *subpass)
1353 {
1354    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1355    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1356    struct fd_ringbuffer *ring = subpass->subpass_clears;
1357    enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1358 
1359    uint32_t buffers = subpass->fast_cleared;
1360 
1361    if (buffers & PIPE_CLEAR_COLOR) {
1362 
1363       for (int i = 0; i < pfb->nr_cbufs; i++) {
1364          union pipe_color_union *color = &subpass->clear_color[i];
1365          union util_color uc = {0};
1366 
1367          if (!pfb->cbufs[i])
1368             continue;
1369 
1370          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1371             continue;
1372 
1373          enum pipe_format pfmt = pfb->cbufs[i]->format;
1374 
1375          // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1376          union pipe_color_union swapped;
1377          switch (fd6_color_swap(pfmt, TILE6_LINEAR)) {
1378          case WZYX:
1379             swapped.ui[0] = color->ui[0];
1380             swapped.ui[1] = color->ui[1];
1381             swapped.ui[2] = color->ui[2];
1382             swapped.ui[3] = color->ui[3];
1383             break;
1384          case WXYZ:
1385             swapped.ui[2] = color->ui[0];
1386             swapped.ui[1] = color->ui[1];
1387             swapped.ui[0] = color->ui[2];
1388             swapped.ui[3] = color->ui[3];
1389             break;
1390          case ZYXW:
1391             swapped.ui[3] = color->ui[0];
1392             swapped.ui[0] = color->ui[1];
1393             swapped.ui[1] = color->ui[2];
1394             swapped.ui[2] = color->ui[3];
1395             break;
1396          case XYZW:
1397             swapped.ui[3] = color->ui[0];
1398             swapped.ui[2] = color->ui[1];
1399             swapped.ui[1] = color->ui[2];
1400             swapped.ui[0] = color->ui[3];
1401             break;
1402          }
1403 
1404          util_pack_color_union(pfmt, &uc, &swapped);
1405 
1406          OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1407          OUT_RING(ring,
1408                   A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1409                      A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1410                      A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1411 
1412          OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1413          OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1414                            A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1415 
1416          OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1417          OUT_RING(ring, gmem->cbuf_base[i]);
1418 
1419          OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1420          OUT_RING(ring, 0);
1421 
1422          OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1423          OUT_RING(ring, uc.ui[0]);
1424          OUT_RING(ring, uc.ui[1]);
1425          OUT_RING(ring, uc.ui[2]);
1426          OUT_RING(ring, uc.ui[3]);
1427 
1428          if (CHIP >= A7XX)
1429             OUT_REG(ring, A7XX_RB_BLIT_CLEAR_MODE(.clear_mode = CLEAR_MODE_GMEM));
1430 
1431          fd6_emit_blit<CHIP>(batch->ctx, ring);
1432       }
1433    }
1434 
1435    const bool has_depth = pfb->zsbuf;
1436    const bool has_separate_stencil =
1437       has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1438 
1439    /* First clear depth or combined depth/stencil. */
1440    if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1441        (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1442       enum pipe_format pfmt = pfb->zsbuf->format;
1443       uint32_t clear_value;
1444       uint32_t mask = 0;
1445 
1446       if (has_separate_stencil) {
1447          pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1448          clear_value = util_pack_z(pfmt, subpass->clear_depth);
1449       } else {
1450          pfmt = pfb->zsbuf->format;
1451          clear_value =
1452             util_pack_z_stencil(pfmt, subpass->clear_depth, subpass->clear_stencil);
1453       }
1454 
1455       if (buffers & PIPE_CLEAR_DEPTH)
1456          mask |= 0x1;
1457 
1458       if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1459          mask |= 0x2;
1460 
1461       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1462       OUT_RING(ring,
1463                A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1464                   A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1465                   A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1466 
1467       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1468       OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1469                         A6XX_RB_BLIT_INFO_DEPTH |
1470                         A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1471 
1472       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1473       OUT_RING(ring, gmem->zsbuf_base[0]);
1474 
1475       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1476       OUT_RING(ring, 0);
1477 
1478       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1479       OUT_RING(ring, clear_value);
1480 
1481       fd6_emit_blit<CHIP>(batch->ctx, ring);
1482    }
1483 
1484    /* Then clear the separate stencil buffer in case of 32 bit depth
1485     * formats with separate stencil. */
1486    if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1487       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1488       OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1489                         A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1490                         A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1491 
1492       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1493       OUT_RING(ring, A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_CLEAR) |
1494                         A6XX_RB_BLIT_INFO_DEPTH |
1495                         A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1496 
1497       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1498       OUT_RING(ring, gmem->zsbuf_base[1]);
1499 
1500       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1501       OUT_RING(ring, 0);
1502 
1503       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1504       OUT_RING(ring, subpass->clear_stencil & 0xff);
1505 
1506       fd6_emit_blit<CHIP>(batch->ctx, ring);
1507    }
1508 }
1509 
1510 /*
1511  * transfer from system memory to gmem
1512  */
1513 template <chip CHIP>
1514 static void
emit_restore_blits(struct fd_batch * batch,struct fd_ringbuffer * ring)1515 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1516 {
1517    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1518    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1519 
1520    if (batch->restore & FD_BUFFER_COLOR) {
1521       unsigned i;
1522       for (i = 0; i < pfb->nr_cbufs; i++) {
1523          if (!pfb->cbufs[i])
1524             continue;
1525          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1526             continue;
1527          emit_restore_blit<CHIP>(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1528                                  FD_BUFFER_COLOR);
1529       }
1530    }
1531 
1532    if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1533       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1534 
1535       if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1536          emit_restore_blit<CHIP>(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1537                                  FD_BUFFER_DEPTH);
1538       }
1539       if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1540          emit_restore_blit<CHIP>(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1541                                  FD_BUFFER_STENCIL);
1542       }
1543    }
1544 }
1545 
1546 template <chip CHIP>
1547 static void
prepare_tile_setup(struct fd_batch * batch)1548 prepare_tile_setup(struct fd_batch *batch)
1549 {
1550    if (batch->restore) {
1551       batch->tile_loads =
1552          fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1553 
1554       set_blit_scissor(batch, batch->tile_loads);
1555       emit_restore_blits<CHIP>(batch, batch->tile_loads);
1556    }
1557 
1558    foreach_subpass (subpass, batch) {
1559       if (!subpass->fast_cleared)
1560          continue;
1561 
1562       subpass->subpass_clears =
1563          fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1564 
1565       set_blit_scissor(batch, subpass->subpass_clears);
1566       emit_subpass_clears<CHIP>(batch, subpass);
1567    }
1568 }
1569 
1570 /*
1571  * transfer from system memory to gmem
1572  */
1573 static void
fd6_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)1574 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1575 {
1576 }
1577 
1578 /* before IB to rendering cmds: */
1579 static void
fd6_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)1580 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1581 {
1582    if (batch->tile_loads) {
1583       trace_start_tile_loads(&batch->trace, batch->gmem, batch->restore);
1584       emit_conditional_ib(batch, tile, batch->tile_loads);
1585       trace_end_tile_loads(&batch->trace, batch->gmem);
1586    }
1587 }
1588 
1589 static bool
blit_can_resolve(enum pipe_format format)1590 blit_can_resolve(enum pipe_format format)
1591 {
1592    const struct util_format_description *desc = util_format_description(format);
1593 
1594    /* blit event can only do resolve for simple cases:
1595     * averaging samples as unsigned integers or choosing only one sample
1596     */
1597    if (util_format_is_snorm(format) || util_format_is_srgb(format))
1598       return false;
1599 
1600    /* can't do formats with larger channel sizes
1601     * note: this includes all float formats
1602     * note2: single channel integer formats seem OK
1603     */
1604    if (desc->channel[0].size > 10)
1605       return false;
1606 
1607    switch (format) {
1608    /* for unknown reasons blit event can't msaa resolve these formats when tiled
1609     * likely related to these formats having different layout from other cpp=2
1610     * formats
1611     */
1612    case PIPE_FORMAT_R8G8_UNORM:
1613    case PIPE_FORMAT_R8G8_UINT:
1614    case PIPE_FORMAT_R8G8_SINT:
1615    case PIPE_FORMAT_R8G8_SRGB:
1616    /* TODO: this one should be able to work? */
1617    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1618       return false;
1619    default:
1620       break;
1621    }
1622 
1623    return true;
1624 }
1625 
1626 static bool
needs_resolve(struct pipe_surface * psurf)1627 needs_resolve(struct pipe_surface *psurf)
1628 {
1629    return psurf->nr_samples &&
1630           (psurf->nr_samples != psurf->texture->nr_samples);
1631 }
1632 
1633 /**
1634  * Returns the UNKNOWN_8C01 value for handling partial depth/stencil
1635  * clear/stores to Z24S8.
1636  */
1637 static uint32_t
fd6_unknown_8c01(enum pipe_format format,unsigned buffers)1638 fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
1639 {
1640    buffers &= FD_BUFFER_DEPTH | FD_BUFFER_STENCIL;
1641    if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1642       if (buffers == FD_BUFFER_DEPTH)
1643          return 0x08000041;
1644       else if (buffers == FD_BUFFER_STENCIL)
1645          return 0x00084001;
1646    }
1647    return 0;
1648 }
1649 
1650 template <chip CHIP>
1651 static void
emit_resolve_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1652 emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1653                   uint32_t base, struct pipe_surface *psurf,
1654                   unsigned buffer) assert_dt
1655 {
1656    uint32_t info = 0;
1657    bool stencil = false;
1658 
1659    if (!fd_resource(psurf->texture)->valid)
1660       return;
1661 
1662    /* if we need to resolve, but cannot with BLIT event, we instead need
1663     * to generate per-tile CP_BLIT (r2d) commands:
1664     *
1665     * The separate-stencil is a special case, we might need to use CP_BLIT
1666     * for depth, but we can still resolve stencil with a BLIT event
1667     */
1668    if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1669        (buffer != FD_BUFFER_STENCIL)) {
1670       /* We could potentially use fd6_unknown_8c01() to handle partial z/s
1671        * resolve to packed z/s, but we would need a corresponding ability in the
1672        * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
1673        * just do a restore of the other channel for partial packed z/s writes.
1674        */
1675       fd6_resolve_tile<CHIP>(batch, ring, base, psurf, 0);
1676       return;
1677    }
1678 
1679    switch (buffer) {
1680    case FD_BUFFER_COLOR:
1681       info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE);
1682       break;
1683    case FD_BUFFER_STENCIL:
1684       info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE_AND_CLEAR);
1685       stencil = true;
1686       break;
1687    case FD_BUFFER_DEPTH:
1688       info = A6XX_RB_BLIT_INFO_TYPE(BLIT_EVENT_STORE) | A6XX_RB_BLIT_INFO_DEPTH;
1689       break;
1690    }
1691 
1692    if (util_format_is_pure_integer(psurf->format) ||
1693        util_format_is_depth_or_stencil(psurf->format))
1694       info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1695 
1696    OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1697    OUT_RING(ring, info);
1698 
1699    emit_blit<CHIP>(batch, ring, base, psurf, stencil);
1700 }
1701 
1702 /*
1703  * transfer from gmem to system memory (ie. normal RAM)
1704  */
1705 
1706 template <chip CHIP>
1707 static void
prepare_tile_fini(struct fd_batch * batch)1708 prepare_tile_fini(struct fd_batch *batch)
1709    assert_dt
1710 {
1711    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1712    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1713    struct fd_ringbuffer *ring;
1714 
1715    batch->tile_store =
1716       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1717    ring = batch->tile_store;
1718 
1719    set_blit_scissor(batch, ring);
1720 
1721    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1722       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1723 
1724       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1725          emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[0],
1726                                  pfb->zsbuf, FD_BUFFER_DEPTH);
1727       }
1728       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1729          emit_resolve_blit<CHIP>(batch, ring, gmem->zsbuf_base[1],
1730                                  pfb->zsbuf, FD_BUFFER_STENCIL);
1731       }
1732    }
1733 
1734    if (batch->resolve & FD_BUFFER_COLOR) {
1735       unsigned i;
1736       for (i = 0; i < pfb->nr_cbufs; i++) {
1737          if (!pfb->cbufs[i])
1738             continue;
1739          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1740             continue;
1741          emit_resolve_blit<CHIP>(batch, ring, gmem->cbuf_base[i],
1742                                  pfb->cbufs[i], FD_BUFFER_COLOR);
1743       }
1744    }
1745 }
1746 
1747 template <chip CHIP>
1748 static void
fd6_emit_tile(struct fd_batch * batch,const struct fd_tile * tile)1749 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1750 {
1751    foreach_subpass (subpass, batch) {
1752       if (subpass->subpass_clears) {
1753          trace_start_clears(&batch->trace, batch->gmem, subpass->fast_cleared);
1754          emit_conditional_ib(batch, tile, subpass->subpass_clears);
1755          trace_end_clears(&batch->trace, batch->gmem);
1756       }
1757 
1758       emit_lrz<CHIP>(batch, subpass);
1759 
1760       fd6_emit_ib(batch->gmem, subpass->draw);
1761    }
1762 
1763    if (batch->tile_epilogue)
1764       fd6_emit_ib(batch->gmem, batch->tile_epilogue);
1765 }
1766 
1767 static void
fd6_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)1768 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1769 {
1770    struct fd_ringbuffer *ring = batch->gmem;
1771 
1772    if (batch->epilogue)
1773       fd6_emit_ib(batch->gmem, batch->epilogue);
1774 
1775    if (use_hw_binning(batch)) {
1776       OUT_PKT7(ring, CP_SET_MARKER, 1);
1777       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1778    }
1779 
1780    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1781    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1782                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1783                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
1784    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1785    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1786 
1787    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1788    OUT_RING(ring, 0x0);
1789 
1790    emit_marker6(ring, 7);
1791    OUT_PKT7(ring, CP_SET_MARKER, 1);
1792    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1793    emit_marker6(ring, 7);
1794 
1795    if (batch->tile_store) {
1796       trace_start_tile_stores(&batch->trace, batch->gmem, batch->resolve);
1797       emit_conditional_ib(batch, tile, batch->tile_store);
1798       trace_end_tile_stores(&batch->trace, batch->gmem);
1799    }
1800 }
1801 
1802 template <chip CHIP>
1803 static void
fd6_emit_tile_fini(struct fd_batch * batch)1804 fd6_emit_tile_fini(struct fd_batch *batch)
1805 {
1806    struct fd_ringbuffer *ring = batch->gmem;
1807 
1808    emit_common_fini<CHIP>(batch);
1809 
1810    OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1811    OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1812 
1813    fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1814    fd6_event_write<CHIP>(batch->ctx, ring, FD_CCU_CLEAN_BLIT_CACHE);
1815 
1816    if (use_hw_binning(batch)) {
1817       check_vsc_overflow(batch->ctx);
1818    }
1819 }
1820 
1821 template <chip CHIP>
1822 static void
emit_sysmem_clears(struct fd_batch * batch,struct fd_batch_subpass * subpass)1823 emit_sysmem_clears(struct fd_batch *batch, struct fd_batch_subpass *subpass)
1824    assert_dt
1825 {
1826    struct fd_context *ctx = batch->ctx;
1827    struct fd_ringbuffer *ring = batch->gmem;
1828    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1829 
1830    uint32_t buffers = subpass->fast_cleared;
1831 
1832    if (!buffers)
1833       return;
1834 
1835    struct pipe_box box2d;
1836    u_box_2d(0, 0, pfb->width, pfb->height, &box2d);
1837 
1838    trace_start_clears(&batch->trace, ring, buffers);
1839 
1840    if (buffers & PIPE_CLEAR_COLOR) {
1841       for (int i = 0; i < pfb->nr_cbufs; i++) {
1842          union pipe_color_union color = subpass->clear_color[i];
1843 
1844          if (!pfb->cbufs[i])
1845             continue;
1846 
1847          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1848             continue;
1849 
1850          fd6_clear_surface<CHIP>(ctx, ring, pfb->cbufs[i], &box2d, &color, 0);
1851       }
1852    }
1853    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1854       union pipe_color_union value = {};
1855 
1856       const bool has_depth = pfb->zsbuf;
1857       struct pipe_resource *separate_stencil =
1858          has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1859             ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1860             : NULL;
1861 
1862       if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1863          value.f[0] = subpass->clear_depth;
1864          value.ui[1] = subpass->clear_stencil;
1865          fd6_clear_surface<CHIP>(ctx, ring, pfb->zsbuf, &box2d,
1866                                  &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
1867       }
1868 
1869       if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1870          value.ui[0] = subpass->clear_stencil;
1871 
1872          struct pipe_surface stencil_surf = *pfb->zsbuf;
1873          stencil_surf.format = PIPE_FORMAT_S8_UINT;
1874          stencil_surf.texture = separate_stencil;
1875 
1876          fd6_clear_surface<CHIP>(ctx, ring, &stencil_surf, &box2d, &value, 0);
1877       }
1878    }
1879 
1880    fd6_emit_flushes<CHIP>(ctx, ring, FD6_FLUSH_CCU_COLOR | FD6_INVALIDATE_CCU_COLOR);
1881 
1882    trace_end_clears(&batch->trace, ring);
1883 }
1884 
1885 template <chip CHIP>
1886 static void
fd6_emit_sysmem_prep(struct fd_batch * batch)1887 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1888 {
1889    struct fd_ringbuffer *ring = batch->gmem;
1890 
1891    emit_lrz_clears<CHIP>(batch);
1892 
1893    fd6_emit_restore<CHIP>(batch, ring);
1894    fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
1895 
1896    if (batch->prologue) {
1897       if (!batch->nondraw) {
1898          trace_start_prologue(&batch->trace, ring);
1899       }
1900       fd6_emit_ib(ring, batch->prologue);
1901       if (!batch->nondraw) {
1902          trace_end_prologue(&batch->trace, ring);
1903       }
1904    }
1905 
1906    /* remaining setup below here does not apply to blit/compute: */
1907    if (batch->nondraw)
1908       return;
1909 
1910    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1911 
1912    if (pfb->width > 0 && pfb->height > 0)
1913       set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1914    else
1915       set_scissor(ring, 0, 0, 0, 0);
1916 
1917    set_window_offset<CHIP>(ring, 0, 0);
1918 
1919    set_bin_size<CHIP>(ring, NULL, {
1920          .render_mode = RENDERING_PASS,
1921          .buffers_location = BUFFERS_IN_SYSMEM,
1922    });
1923 
1924    if (CHIP >= A7XX) {
1925       OUT_REG(ring, A7XX_RB_UNKNOWN_8812(0x3ff)); // all buffers in sysmem
1926       OUT_REG(ring, A7XX_RB_UNKNOWN_8E06(batch->ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E06));
1927       OUT_REG(ring, A7XX_GRAS_UNKNOWN_8007(0x0));
1928       OUT_REG(ring, A6XX_GRAS_UNKNOWN_8110(0x2));
1929       OUT_REG(ring, A7XX_RB_UNKNOWN_8E09(0x4));
1930    }
1931 
1932    emit_marker6(ring, 7);
1933    OUT_PKT7(ring, CP_SET_MARKER, 1);
1934    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1935    emit_marker6(ring, 7);
1936 
1937    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1938    OUT_RING(ring, 0x0);
1939 
1940    /* blob controls "local" in IB2, but I think that is not required */
1941    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1942    OUT_RING(ring, 0x1);
1943 
1944    /* enable stream-out, with sysmem there is only one pass: */
1945    OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1946 
1947    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1948    OUT_RING(ring, 0x1);
1949 
1950    emit_zs<CHIP>(batch->ctx, ring, pfb->zsbuf, NULL);
1951    emit_mrt<CHIP>(ring, pfb, NULL);
1952    emit_msaa(ring, pfb->samples);
1953    patch_fb_read_sysmem<CHIP>(batch);
1954 
1955    emit_common_init<CHIP>(batch);
1956 }
1957 
1958 template <chip CHIP>
1959 static void
fd6_emit_sysmem(struct fd_batch * batch)1960 fd6_emit_sysmem(struct fd_batch *batch)
1961    assert_dt
1962 {
1963    struct fd_ringbuffer *ring = batch->gmem;
1964    struct fd_screen *screen = batch->ctx->screen;
1965 
1966    foreach_subpass (subpass, batch) {
1967       if (subpass->fast_cleared) {
1968          unsigned flushes = 0;
1969          if (subpass->fast_cleared & FD_BUFFER_COLOR)
1970             flushes |= FD6_INVALIDATE_CCU_COLOR;
1971          if (subpass->fast_cleared & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
1972             flushes |= FD6_INVALIDATE_CCU_DEPTH;
1973 
1974          fd6_emit_flushes<CHIP>(batch->ctx, ring, flushes);
1975          emit_sysmem_clears<CHIP>(batch, subpass);
1976       }
1977 
1978       OUT_WFI5(ring);
1979       fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
1980 
1981       struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1982       update_render_cntl<CHIP>(batch, pfb, false);
1983 
1984       emit_lrz<CHIP>(batch, subpass);
1985 
1986       fd6_emit_ib(ring, subpass->draw);
1987    }
1988 }
1989 
1990 template <chip CHIP>
1991 static void
fd6_emit_sysmem_fini(struct fd_batch * batch)1992 fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
1993 {
1994    struct fd_ringbuffer *ring = batch->gmem;
1995 
1996    emit_common_fini<CHIP>(batch);
1997 
1998    if (batch->tile_epilogue)
1999       fd6_emit_ib(batch->gmem, batch->tile_epilogue);
2000 
2001    if (batch->epilogue)
2002       fd6_emit_ib(batch->gmem, batch->epilogue);
2003 
2004    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
2005    OUT_RING(ring, 0x0);
2006 
2007    fd6_event_write<CHIP>(batch->ctx, ring, FD_LRZ_FLUSH);
2008 
2009    fd6_emit_flushes<CHIP>(batch->ctx, ring,
2010                           FD6_FLUSH_CCU_COLOR |
2011                           FD6_FLUSH_CCU_DEPTH);
2012 }
2013 
2014 template <chip CHIP>
2015 void
fd6_gmem_init(struct pipe_context * pctx)2016 fd6_gmem_init(struct pipe_context *pctx)
2017    disable_thread_safety_analysis
2018 {
2019    struct fd_context *ctx = fd_context(pctx);
2020 
2021    ctx->emit_tile_init = fd6_emit_tile_init<CHIP>;
2022    ctx->emit_tile_prep = fd6_emit_tile_prep<CHIP>;
2023    ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
2024    ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
2025    ctx->emit_tile = fd6_emit_tile<CHIP>;
2026    ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
2027    ctx->emit_tile_fini = fd6_emit_tile_fini<CHIP>;
2028    ctx->emit_sysmem_prep = fd6_emit_sysmem_prep<CHIP>;
2029    ctx->emit_sysmem = fd6_emit_sysmem<CHIP>;
2030    ctx->emit_sysmem_fini = fd6_emit_sysmem_fini<CHIP>;
2031 }
2032 FD_GENX(fd6_gmem_init);
2033