xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "pipe/p_state.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12 #include "util/u_string.h"
13 
14 #include "freedreno_draw.h"
15 #include "freedreno_resource.h"
16 #include "freedreno_state.h"
17 
18 #include "ir2/instr-a2xx.h"
19 #include "fd2_context.h"
20 #include "fd2_draw.h"
21 #include "fd2_emit.h"
22 #include "fd2_gmem.h"
23 #include "fd2_program.h"
24 #include "fd2_util.h"
25 #include "fd2_zsa.h"
26 
27 static uint32_t
fmt2swap(enum pipe_format format)28 fmt2swap(enum pipe_format format)
29 {
30    switch (format) {
31    case PIPE_FORMAT_B8G8R8A8_UNORM:
32    case PIPE_FORMAT_B8G8R8X8_UNORM:
33    case PIPE_FORMAT_B5G6R5_UNORM:
34    case PIPE_FORMAT_B5G5R5A1_UNORM:
35    case PIPE_FORMAT_B5G5R5X1_UNORM:
36    case PIPE_FORMAT_B4G4R4A4_UNORM:
37    case PIPE_FORMAT_B4G4R4X4_UNORM:
38    case PIPE_FORMAT_B2G3R3_UNORM:
39       return 1;
40    default:
41       return 0;
42    }
43 }
44 
45 static bool
use_hw_binning(struct fd_batch * batch)46 use_hw_binning(struct fd_batch *batch)
47 {
48    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
49 
50    /* we hardcoded a limit of 8 "pipes", we can increase this limit
51     * at the cost of a slightly larger command stream
52     * however very few cases will need more than 8
53     * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
54     */
55    if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
56       return false;
57 
58    /* only a20x hw binning is implement
59     * a22x is more like a3xx, but perhaps the a20x works? (TODO)
60     */
61    if (!is_a20x(batch->ctx->screen))
62       return false;
63 
64    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
65 }
66 
67 /* transfer from gmem to system memory (ie. normal RAM) */
68 
69 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)70 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
71                    struct pipe_surface *psurf)
72 {
73    struct fd_ringbuffer *ring = batch->tile_store;
74    struct fd_resource *rsc = fd_resource(psurf->texture);
75    uint32_t offset =
76       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
77    enum pipe_format format = fd_gmem_restore_format(psurf->format);
78    uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
79 
80    assert((pitch & 31) == 0);
81    assert((offset & 0xfff) == 0);
82 
83    if (!rsc->valid)
84       return;
85 
86    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
87    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
88    OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
89                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
90 
91    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
92    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
93    OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
94    OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
95    OUT_RING(ring, pitch >> 5);             /* RB_COPY_DEST_PITCH */
96    OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
97             A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
98                COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
99                A2XX_RB_COPY_DEST_INFO_WRITE_RED |
100                A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
101                A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
102                A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
103 
104    if (!is_a20x(batch->ctx->screen)) {
105       OUT_WFI(ring);
106 
107       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
108       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
109       OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
110       OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
111    }
112 
113    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
114            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
115 }
116 
117 static void
prepare_tile_fini_ib(struct fd_batch * batch)118 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
119 {
120    struct fd_context *ctx = batch->ctx;
121    struct fd2_context *fd2_ctx = fd2_context(ctx);
122    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
123    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
124    struct fd_ringbuffer *ring;
125 
126    batch->tile_store =
127       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
128    ring = batch->tile_store;
129 
130    fd2_emit_vertex_bufs(ring, 0x9c,
131                         (struct fd2_vertex_buf[]){
132                            {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
133                         },
134                         1);
135 
136    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
137    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
138    OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
139 
140    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
141    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
142    OUT_RING(ring, 0);
143 
144    if (!is_a20x(ctx->screen)) {
145       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
146       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
147       OUT_RING(ring, 0x0000028f);
148    }
149 
150    fd2_program_emit(ctx, ring, &ctx->solid_prog);
151 
152    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
153    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
154    OUT_RING(ring, 0x0000ffff);
155 
156    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
157    OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
158    OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
159 
160    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
161    OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
162    OUT_RING(
163       ring,
164       A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
165          A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
166          A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
167 
168    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
169    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
170    OUT_RING(ring, xy2d(0, 0));                    /* PA_SC_WINDOW_SCISSOR_TL */
171    OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
172 
173    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
174    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
175    OUT_RING(ring, 0x00000000);
176 
177    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
178    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
179    OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XSCALE */
180    OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XOFFSET */
181    OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YSCALE */
182    OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YOFFSET */
183 
184    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
185    OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
186    OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
187 
188    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
189       emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
190 
191    if (batch->resolve & FD_BUFFER_COLOR)
192       emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
193 
194    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
195    OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
196    OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
197 
198    if (!is_a20x(ctx->screen)) {
199       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
200       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
201       OUT_RING(ring, 0x0000003b);
202    }
203 }
204 
205 static void
fd2_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)206 fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
207 {
208    fd2_emit_ib(batch->gmem, batch->tile_store);
209 }
210 
211 /* transfer from system memory to gmem */
212 
213 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)214 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
215                    struct pipe_surface *psurf)
216 {
217    struct fd_ringbuffer *ring = batch->gmem;
218    struct fd_resource *rsc = fd_resource(psurf->texture);
219    uint32_t offset =
220       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
221    enum pipe_format format = fd_gmem_restore_format(psurf->format);
222 
223    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
224    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
225    OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
226                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
227 
228    /* emit fb as a texture: */
229    OUT_PKT3(ring, CP_SET_CONSTANT, 7);
230    OUT_RING(ring, 0x00010000);
231    OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
232                      A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
233                      A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
234                      A2XX_SQ_TEX_0_PITCH(
235                         fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
236    OUT_RELOC(ring, rsc->bo, offset,
237              A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
238                 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL),
239              0);
240    OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
241                      A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
242    OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
243                      A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) |
244                      A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) |
245                      A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
246                      A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
247    OUT_RING(ring, 0x00000000);
248    OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
249 
250    if (!is_a20x(batch->ctx->screen)) {
251       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
252       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
253       OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
254       OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
255    }
256 
257    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
258            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
259 }
260 
261 static void
fd2_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)262 fd2_emit_tile_mem2gmem(struct fd_batch *batch,
263                        const struct fd_tile *tile) assert_dt
264 {
265    struct fd_context *ctx = batch->ctx;
266    struct fd2_context *fd2_ctx = fd2_context(ctx);
267    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
268    struct fd_ringbuffer *ring = batch->gmem;
269    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
270    unsigned bin_w = tile->bin_w;
271    unsigned bin_h = tile->bin_h;
272    float x0, y0, x1, y1;
273 
274    fd2_emit_vertex_bufs(
275       ring, 0x9c,
276       (struct fd2_vertex_buf[]){
277          {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
278          {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36},
279       },
280       2);
281 
282    /* write texture coordinates to vertexbuf: */
283    x0 = ((float)tile->xoff) / ((float)pfb->width);
284    x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
285    y0 = ((float)tile->yoff) / ((float)pfb->height);
286    y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
287    OUT_PKT3(ring, CP_MEM_WRITE, 7);
288    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
289    OUT_RING(ring, fui(x0));
290    OUT_RING(ring, fui(y0));
291    OUT_RING(ring, fui(x1));
292    OUT_RING(ring, fui(y0));
293    OUT_RING(ring, fui(x0));
294    OUT_RING(ring, fui(y1));
295 
296    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
297    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
298    OUT_RING(ring, 0);
299 
300    fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
301 
302    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
303    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
304 
305    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
306    OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
307    OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
308 
309    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
310    OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
311    OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
312                      A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
313                      A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
314 
315    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
316    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
317    OUT_RING(ring, 0x0000ffff);
318 
319    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
321    OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
322                      A2XX_RB_COLORCONTROL_BLEND_DISABLE |
323                      A2XX_RB_COLORCONTROL_ROP_CODE(12) |
324                      A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
325                      A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
326 
327    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328    OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
329    OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
330                      A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
331                      A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
332                      A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
333                      A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
334                      A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
335 
336    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
337    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
338    OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
339                      xy2d(0, 0));      /* PA_SC_WINDOW_SCISSOR_TL */
340    OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
341 
342    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
343    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
344    OUT_RING(ring, fui((float)bin_w / 2.0f));  /* PA_CL_VPORT_XSCALE */
345    OUT_RING(ring, fui((float)bin_w / 2.0f));  /* PA_CL_VPORT_XOFFSET */
346    OUT_RING(ring, fui(-(float)bin_h / 2.0f)); /* PA_CL_VPORT_YSCALE */
347    OUT_RING(ring, fui((float)bin_h / 2.0f));  /* PA_CL_VPORT_YOFFSET */
348 
349    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
350    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
351    OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
352                      A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
353                      A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
354                      A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
355                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
356                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
357 
358    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
359    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
360    OUT_RING(ring, 0x00000000);
361 
362    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
363       emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
364 
365    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
366       emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
367 
368    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
369    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
370    OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
371                      A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
372                      A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
373                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
374                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
375                      A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
376                      A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
377 
378    /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
379 }
380 
381 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)382 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
383 {
384    unsigned i;
385 
386    if (!is_a20x(batch->ctx->screen)) {
387       /* identical to a3xx */
388       for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
389          struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
390          *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
391       }
392       util_dynarray_clear(&batch->draw_patches);
393       return;
394    }
395 
396    if (vismode == USE_VISIBILITY)
397       return;
398 
399    for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) {
400       uint32_t *ptr =
401          *util_dynarray_element(&batch->draw_patches, uint32_t *, i);
402       unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
403 
404       /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
405        * replace first two DWORDS with NOP and move the rest down
406        * (we don't want to have to move the idx buffer reloc)
407        */
408       ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
409       ptr[1] = 0x00000000;
410 
411       ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
412       ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8);
413       ptr[3] = 0x00000000;
414    }
415 }
416 
417 static void
fd2_emit_sysmem_prep(struct fd_batch * batch)418 fd2_emit_sysmem_prep(struct fd_batch *batch)
419 {
420    struct fd_context *ctx = batch->ctx;
421    struct fd_ringbuffer *ring = batch->gmem;
422    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
423    struct pipe_surface *psurf = pfb->cbufs[0];
424 
425    if (!psurf)
426       return;
427 
428    struct fd_resource *rsc = fd_resource(psurf->texture);
429    uint32_t offset =
430       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
431    uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
432 
433    assert((pitch & 31) == 0);
434    assert((offset & 0xfff) == 0);
435 
436    fd2_emit_restore(ctx, ring);
437 
438    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
439    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
440    OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
441 
442    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
443    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
444    OUT_RELOC(ring, rsc->bo, offset,
445              COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
446                 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
447                 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)),
448              0);
449 
450    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
451    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
452    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
453    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
454                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
455 
456    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
457    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
458    OUT_RING(ring,
459             A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0));
460 
461    patch_draws(batch, IGNORE_VISIBILITY);
462    util_dynarray_clear(&batch->draw_patches);
463    util_dynarray_clear(&batch->shader_patches);
464 }
465 
466 /* before first tile */
467 static void
fd2_emit_tile_init(struct fd_batch * batch)468 fd2_emit_tile_init(struct fd_batch *batch) assert_dt
469 {
470    struct fd_context *ctx = batch->ctx;
471    struct fd_ringbuffer *ring = batch->gmem;
472    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
473    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
474    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
475    uint32_t reg;
476 
477    fd2_emit_restore(ctx, ring);
478 
479    prepare_tile_fini_ib(batch);
480 
481    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
482    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
483    OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
484    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
485                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
486    reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
487    if (pfb->zsbuf)
488       reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
489    OUT_RING(ring, reg); /* RB_DEPTH_INFO */
490 
491    /* fast clear patches */
492    int depth_size = -1;
493    int color_size = -1;
494 
495    if (pfb->cbufs[0])
496       color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
497 
498    if (pfb->zsbuf)
499       depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
500 
501    for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
502       struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
503       uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
504       uint32_t size, lines;
505 
506       /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
507       switch (patch->val) {
508       case GMEM_PATCH_FASTCLEAR_COLOR:
509          size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
510          lines = size / 1024;
511          depth_base = size / 2;
512          break;
513       case GMEM_PATCH_FASTCLEAR_DEPTH:
514          size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
515          lines = size / 1024;
516          color_base = depth_base;
517          depth_base = depth_base + size / 2;
518          break;
519       case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
520          lines =
521             align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
522          break;
523       case GMEM_PATCH_RESTORE_INFO:
524          patch->cs[0] = gmem->bin_w;
525          patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
526                         A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
527          patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
528          if (pfb->zsbuf)
529             patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(
530                fd_pipe2depth(pfb->zsbuf->format));
531          continue;
532       default:
533          continue;
534       }
535 
536       patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
537                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
538       patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
539                      A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
540       patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
541                      A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
542    }
543    util_dynarray_clear(&batch->gmem_patches);
544 
545    /* set to zero, for some reason hardware doesn't like certain values */
546    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
547    OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
548    OUT_RING(ring, 0);
549 
550    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
551    OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
552    OUT_RING(ring, 0);
553 
554    if (use_hw_binning(batch)) {
555       /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
556        *
557        * in the shader compiler, we guarantee that the shader ends with
558        * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
559        *
560        * the since patches point only to dwords and CFs are 1.5 dwords
561        * the patch is aligned and might point to a ALLOC CF
562        */
563       for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) {
564          instr_cf_t *cf =
565             *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i);
566          if (cf->opc == ALLOC)
567             cf++;
568          assert(cf->opc == EXEC);
569          assert(cf[ctx->screen->info->num_vsc_pipes * 2 - 2].opc == EXEC_END);
570          cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END;
571       }
572 
573       patch_draws(batch, USE_VISIBILITY);
574 
575       /* initialize shader constants for the binning memexport */
576       OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
577       OUT_RING(ring, 0x0000000C);
578 
579       for (int i = 0; i < gmem->num_vsc_pipes; i++) {
580          /* allocate in 64k increments to avoid reallocs */
581          uint32_t bo_size = align(batch->num_vertices, 0x10000);
582          if (!ctx->vsc_pipe_bo[i] ||
583              fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
584             if (ctx->vsc_pipe_bo[i])
585                fd_bo_del(ctx->vsc_pipe_bo[i]);
586             ctx->vsc_pipe_bo[i] =
587                fd_bo_new(ctx->dev, bo_size, 0, "vsc_pipe[%u]", i);
588             assert(ctx->vsc_pipe_bo[i]);
589          }
590 
591          /* memory export address (export32):
592           * .x: (base_address >> 2) | 0x40000000 (?)
593           * .y: index (float) - set by shader
594           * .z: 0x4B00D000 (?)
595           * .w: 0x4B000000 (?) | max_index (?)
596           */
597          OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
598          OUT_RING(ring, 0x00000000);
599          OUT_RING(ring, 0x4B00D000);
600          OUT_RING(ring, 0x4B000000 | bo_size);
601       }
602 
603       OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
604       OUT_RING(ring, 0x0000018C);
605 
606       for (int i = 0; i < gmem->num_vsc_pipes; i++) {
607          const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
608          float off_x, off_y, mul_x, mul_y;
609 
610          /* const to tranform from [-1,1] to bin coordinates for this pipe
611           * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
612           * 8 possible values on x/y axis,
613           * to clip at binning stage: only use center 6x6
614           * TODO: set the z parameters too so that hw binning
615           * can clip primitives in Z too
616           */
617 
618          mul_x = 1.0f / (float)(gmem->bin_w * 8);
619          mul_y = 1.0f / (float)(gmem->bin_h * 8);
620          off_x = -pipe->x * (1.0f / 8.0f) + 0.125f - mul_x * gmem->minx;
621          off_y = -pipe->y * (1.0f / 8.0f) + 0.125f - mul_y * gmem->miny;
622 
623          OUT_RING(ring, fui(off_x * (256.0f / 255.0f)));
624          OUT_RING(ring, fui(off_y * (256.0f / 255.0f)));
625          OUT_RING(ring, 0x3f000000);
626          OUT_RING(ring, fui(0.0f));
627 
628          OUT_RING(ring, fui(mul_x * (256.0f / 255.0f)));
629          OUT_RING(ring, fui(mul_y * (256.0f / 255.0f)));
630          OUT_RING(ring, fui(0.0f));
631          OUT_RING(ring, fui(0.0f));
632       }
633 
634       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
635       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
636       OUT_RING(ring, 0);
637 
638       fd2_emit_ib(ring, batch->binning);
639 
640       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
641       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
642       OUT_RING(ring, 0x00000002);
643    } else {
644       patch_draws(batch, IGNORE_VISIBILITY);
645    }
646 
647    util_dynarray_clear(&batch->draw_patches);
648    util_dynarray_clear(&batch->shader_patches);
649 }
650 
651 /* before mem2gmem */
652 static void
fd2_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)653 fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
654 {
655    struct fd_ringbuffer *ring = batch->gmem;
656    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
657    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
658 
659    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
660    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
661    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
662                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
663 
664    /* setup screen scissor for current tile (same for mem2gmem): */
665    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
666    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
667    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
668                      A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
669    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
670                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
671 }
672 
673 /* before IB to rendering cmds: */
674 static void
fd2_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)675 fd2_emit_tile_renderprep(struct fd_batch *batch,
676                          const struct fd_tile *tile) assert_dt
677 {
678    struct fd_context *ctx = batch->ctx;
679    struct fd2_context *fd2_ctx = fd2_context(ctx);
680    struct fd_ringbuffer *ring = batch->gmem;
681    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
682    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
683 
684    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
685    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
686    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
687                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
688 
689    /* setup window scissor and offset for current tile (different
690     * from mem2gmem):
691     */
692    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
693    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
694    OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
695                      A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
696 
697    /* write SCISSOR_BR to memory so fast clear path can restore from it */
698    OUT_PKT3(ring, CP_MEM_WRITE, 2);
699    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
700    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
701                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
702 
703    /* set the copy offset for gmem2mem */
704    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
705    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
706    OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
707                      A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
708 
709    /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
710    if (is_a20x(ctx->screen)) {
711       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
712       OUT_RING(ring, 0x00000580);
713       OUT_RING(ring, fui(tile->xoff));
714       OUT_RING(ring, fui(tile->yoff));
715       OUT_RING(ring, fui(0.0f));
716       OUT_RING(ring, fui(0.0f));
717    }
718 
719    if (use_hw_binning(batch)) {
720       struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
721 
722       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
723       OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
724       OUT_RING(ring, tile->n);
725 
726       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
727       OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
728       OUT_RING(ring, tile->n);
729 
730       /* TODO only emit this when tile->p changes */
731       OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
732       OUT_RELOC(ring, pipe_bo, 0, 0, 0);
733    }
734 }
735 
736 void
fd2_gmem_init(struct pipe_context * pctx)737 fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
738 {
739    struct fd_context *ctx = fd_context(pctx);
740 
741    ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
742    ctx->emit_tile_init = fd2_emit_tile_init;
743    ctx->emit_tile_prep = fd2_emit_tile_prep;
744    ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
745    ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
746    ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
747 }
748