1 /*
2 * Copyright © 2012 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "pipe/p_state.h"
10 #include "util/u_inlines.h"
11 #include "util/u_memory.h"
12 #include "util/u_string.h"
13
14 #include "freedreno_draw.h"
15 #include "freedreno_resource.h"
16 #include "freedreno_state.h"
17
18 #include "ir2/instr-a2xx.h"
19 #include "fd2_context.h"
20 #include "fd2_draw.h"
21 #include "fd2_emit.h"
22 #include "fd2_gmem.h"
23 #include "fd2_program.h"
24 #include "fd2_util.h"
25 #include "fd2_zsa.h"
26
27 static uint32_t
fmt2swap(enum pipe_format format)28 fmt2swap(enum pipe_format format)
29 {
30 switch (format) {
31 case PIPE_FORMAT_B8G8R8A8_UNORM:
32 case PIPE_FORMAT_B8G8R8X8_UNORM:
33 case PIPE_FORMAT_B5G6R5_UNORM:
34 case PIPE_FORMAT_B5G5R5A1_UNORM:
35 case PIPE_FORMAT_B5G5R5X1_UNORM:
36 case PIPE_FORMAT_B4G4R4A4_UNORM:
37 case PIPE_FORMAT_B4G4R4X4_UNORM:
38 case PIPE_FORMAT_B2G3R3_UNORM:
39 return 1;
40 default:
41 return 0;
42 }
43 }
44
45 static bool
use_hw_binning(struct fd_batch * batch)46 use_hw_binning(struct fd_batch *batch)
47 {
48 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
49
50 /* we hardcoded a limit of 8 "pipes", we can increase this limit
51 * at the cost of a slightly larger command stream
52 * however very few cases will need more than 8
53 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
54 */
55 if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
56 return false;
57
58 /* only a20x hw binning is implement
59 * a22x is more like a3xx, but perhaps the a20x works? (TODO)
60 */
61 if (!is_a20x(batch->ctx->screen))
62 return false;
63
64 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
65 }
66
67 /* transfer from gmem to system memory (ie. normal RAM) */
68
69 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)70 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
71 struct pipe_surface *psurf)
72 {
73 struct fd_ringbuffer *ring = batch->tile_store;
74 struct fd_resource *rsc = fd_resource(psurf->texture);
75 uint32_t offset =
76 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
77 enum pipe_format format = fd_gmem_restore_format(psurf->format);
78 uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
79
80 assert((pitch & 31) == 0);
81 assert((offset & 0xfff) == 0);
82
83 if (!rsc->valid)
84 return;
85
86 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
87 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
88 OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
89 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
90
91 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
92 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
93 OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */
94 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
95 OUT_RING(ring, pitch >> 5); /* RB_COPY_DEST_PITCH */
96 OUT_RING(ring, /* RB_COPY_DEST_INFO */
97 A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
98 COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
99 A2XX_RB_COPY_DEST_INFO_WRITE_RED |
100 A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
101 A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
102 A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
103
104 if (!is_a20x(batch->ctx->screen)) {
105 OUT_WFI(ring);
106
107 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
108 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
109 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
110 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
111 }
112
113 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
114 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
115 }
116
117 static void
prepare_tile_fini_ib(struct fd_batch * batch)118 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
119 {
120 struct fd_context *ctx = batch->ctx;
121 struct fd2_context *fd2_ctx = fd2_context(ctx);
122 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
123 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
124 struct fd_ringbuffer *ring;
125
126 batch->tile_store =
127 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
128 ring = batch->tile_store;
129
130 fd2_emit_vertex_bufs(ring, 0x9c,
131 (struct fd2_vertex_buf[]){
132 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
133 },
134 1);
135
136 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
137 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
138 OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
139
140 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
141 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
142 OUT_RING(ring, 0);
143
144 if (!is_a20x(ctx->screen)) {
145 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
146 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
147 OUT_RING(ring, 0x0000028f);
148 }
149
150 fd2_program_emit(ctx, ring, &ctx->solid_prog);
151
152 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
153 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
154 OUT_RING(ring, 0x0000ffff);
155
156 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
157 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
158 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
159
160 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
161 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
162 OUT_RING(
163 ring,
164 A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
165 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
166 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
167
168 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
169 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
170 OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
171 OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
172
173 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
174 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
175 OUT_RING(ring, 0x00000000);
176
177 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
178 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
179 OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XSCALE */
180 OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XOFFSET */
181 OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YSCALE */
182 OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YOFFSET */
183
184 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
185 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
186 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
187
188 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
189 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
190
191 if (batch->resolve & FD_BUFFER_COLOR)
192 emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
193
194 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
195 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
196 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
197
198 if (!is_a20x(ctx->screen)) {
199 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
200 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
201 OUT_RING(ring, 0x0000003b);
202 }
203 }
204
205 static void
fd2_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)206 fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
207 {
208 fd2_emit_ib(batch->gmem, batch->tile_store);
209 }
210
211 /* transfer from system memory to gmem */
212
213 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)214 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
215 struct pipe_surface *psurf)
216 {
217 struct fd_ringbuffer *ring = batch->gmem;
218 struct fd_resource *rsc = fd_resource(psurf->texture);
219 uint32_t offset =
220 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
221 enum pipe_format format = fd_gmem_restore_format(psurf->format);
222
223 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
224 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
225 OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
226 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
227
228 /* emit fb as a texture: */
229 OUT_PKT3(ring, CP_SET_CONSTANT, 7);
230 OUT_RING(ring, 0x00010000);
231 OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
232 A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
233 A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
234 A2XX_SQ_TEX_0_PITCH(
235 fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
236 OUT_RELOC(ring, rsc->bo, offset,
237 A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
238 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL),
239 0);
240 OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
241 A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
242 OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
243 A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) |
244 A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) |
245 A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
246 A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
247 OUT_RING(ring, 0x00000000);
248 OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
249
250 if (!is_a20x(batch->ctx->screen)) {
251 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
252 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
253 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
254 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
255 }
256
257 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
258 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
259 }
260
261 static void
fd2_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)262 fd2_emit_tile_mem2gmem(struct fd_batch *batch,
263 const struct fd_tile *tile) assert_dt
264 {
265 struct fd_context *ctx = batch->ctx;
266 struct fd2_context *fd2_ctx = fd2_context(ctx);
267 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
268 struct fd_ringbuffer *ring = batch->gmem;
269 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
270 unsigned bin_w = tile->bin_w;
271 unsigned bin_h = tile->bin_h;
272 float x0, y0, x1, y1;
273
274 fd2_emit_vertex_bufs(
275 ring, 0x9c,
276 (struct fd2_vertex_buf[]){
277 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
278 {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36},
279 },
280 2);
281
282 /* write texture coordinates to vertexbuf: */
283 x0 = ((float)tile->xoff) / ((float)pfb->width);
284 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
285 y0 = ((float)tile->yoff) / ((float)pfb->height);
286 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
287 OUT_PKT3(ring, CP_MEM_WRITE, 7);
288 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
289 OUT_RING(ring, fui(x0));
290 OUT_RING(ring, fui(y0));
291 OUT_RING(ring, fui(x1));
292 OUT_RING(ring, fui(y0));
293 OUT_RING(ring, fui(x0));
294 OUT_RING(ring, fui(y1));
295
296 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
297 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
298 OUT_RING(ring, 0);
299
300 fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
301
302 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
303 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
304
305 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
306 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
307 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
308
309 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
310 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
311 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
312 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
313 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
314
315 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
316 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
317 OUT_RING(ring, 0x0000ffff);
318
319 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
320 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
321 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
322 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
323 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
324 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
325 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
326
327 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
329 OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
330 A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
331 A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
332 A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
333 A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
334 A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
335
336 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
337 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
338 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
339 xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */
340 OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
341
342 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
343 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
344 OUT_RING(ring, fui((float)bin_w / 2.0f)); /* PA_CL_VPORT_XSCALE */
345 OUT_RING(ring, fui((float)bin_w / 2.0f)); /* PA_CL_VPORT_XOFFSET */
346 OUT_RING(ring, fui(-(float)bin_h / 2.0f)); /* PA_CL_VPORT_YSCALE */
347 OUT_RING(ring, fui((float)bin_h / 2.0f)); /* PA_CL_VPORT_YOFFSET */
348
349 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
350 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
351 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
352 A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
353 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
354 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
355 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
356 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
357
358 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
359 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
360 OUT_RING(ring, 0x00000000);
361
362 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
363 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
364
365 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
366 emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
367
368 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
369 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
370 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
371 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
372 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
373 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
374 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
375 A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
376 A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
377
378 /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
379 }
380
381 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)382 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
383 {
384 unsigned i;
385
386 if (!is_a20x(batch->ctx->screen)) {
387 /* identical to a3xx */
388 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
389 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
390 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
391 }
392 util_dynarray_clear(&batch->draw_patches);
393 return;
394 }
395
396 if (vismode == USE_VISIBILITY)
397 return;
398
399 for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) {
400 uint32_t *ptr =
401 *util_dynarray_element(&batch->draw_patches, uint32_t *, i);
402 unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
403
404 /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
405 * replace first two DWORDS with NOP and move the rest down
406 * (we don't want to have to move the idx buffer reloc)
407 */
408 ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
409 ptr[1] = 0x00000000;
410
411 ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
412 ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8);
413 ptr[3] = 0x00000000;
414 }
415 }
416
417 static void
fd2_emit_sysmem_prep(struct fd_batch * batch)418 fd2_emit_sysmem_prep(struct fd_batch *batch)
419 {
420 struct fd_context *ctx = batch->ctx;
421 struct fd_ringbuffer *ring = batch->gmem;
422 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
423 struct pipe_surface *psurf = pfb->cbufs[0];
424
425 if (!psurf)
426 return;
427
428 struct fd_resource *rsc = fd_resource(psurf->texture);
429 uint32_t offset =
430 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
431 uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
432
433 assert((pitch & 31) == 0);
434 assert((offset & 0xfff) == 0);
435
436 fd2_emit_restore(ctx, ring);
437
438 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
439 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
440 OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
441
442 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
443 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
444 OUT_RELOC(ring, rsc->bo, offset,
445 COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
446 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
447 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)),
448 0);
449
450 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
451 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
452 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
453 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
454 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
455
456 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
457 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
458 OUT_RING(ring,
459 A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0));
460
461 patch_draws(batch, IGNORE_VISIBILITY);
462 util_dynarray_clear(&batch->draw_patches);
463 util_dynarray_clear(&batch->shader_patches);
464 }
465
466 /* before first tile */
467 static void
fd2_emit_tile_init(struct fd_batch * batch)468 fd2_emit_tile_init(struct fd_batch *batch) assert_dt
469 {
470 struct fd_context *ctx = batch->ctx;
471 struct fd_ringbuffer *ring = batch->gmem;
472 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
473 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
474 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
475 uint32_t reg;
476
477 fd2_emit_restore(ctx, ring);
478
479 prepare_tile_fini_ib(batch);
480
481 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
482 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
483 OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
484 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
485 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
486 reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
487 if (pfb->zsbuf)
488 reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
489 OUT_RING(ring, reg); /* RB_DEPTH_INFO */
490
491 /* fast clear patches */
492 int depth_size = -1;
493 int color_size = -1;
494
495 if (pfb->cbufs[0])
496 color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
497
498 if (pfb->zsbuf)
499 depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
500
501 for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
502 struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
503 uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
504 uint32_t size, lines;
505
506 /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
507 switch (patch->val) {
508 case GMEM_PATCH_FASTCLEAR_COLOR:
509 size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
510 lines = size / 1024;
511 depth_base = size / 2;
512 break;
513 case GMEM_PATCH_FASTCLEAR_DEPTH:
514 size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
515 lines = size / 1024;
516 color_base = depth_base;
517 depth_base = depth_base + size / 2;
518 break;
519 case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
520 lines =
521 align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
522 break;
523 case GMEM_PATCH_RESTORE_INFO:
524 patch->cs[0] = gmem->bin_w;
525 patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
526 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
527 patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
528 if (pfb->zsbuf)
529 patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(
530 fd_pipe2depth(pfb->zsbuf->format));
531 continue;
532 default:
533 continue;
534 }
535
536 patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
537 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
538 patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
539 A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
540 patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
541 A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
542 }
543 util_dynarray_clear(&batch->gmem_patches);
544
545 /* set to zero, for some reason hardware doesn't like certain values */
546 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
547 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
548 OUT_RING(ring, 0);
549
550 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
551 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
552 OUT_RING(ring, 0);
553
554 if (use_hw_binning(batch)) {
555 /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
556 *
557 * in the shader compiler, we guarantee that the shader ends with
558 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
559 *
560 * the since patches point only to dwords and CFs are 1.5 dwords
561 * the patch is aligned and might point to a ALLOC CF
562 */
563 for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) {
564 instr_cf_t *cf =
565 *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i);
566 if (cf->opc == ALLOC)
567 cf++;
568 assert(cf->opc == EXEC);
569 assert(cf[ctx->screen->info->num_vsc_pipes * 2 - 2].opc == EXEC_END);
570 cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END;
571 }
572
573 patch_draws(batch, USE_VISIBILITY);
574
575 /* initialize shader constants for the binning memexport */
576 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
577 OUT_RING(ring, 0x0000000C);
578
579 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
580 /* allocate in 64k increments to avoid reallocs */
581 uint32_t bo_size = align(batch->num_vertices, 0x10000);
582 if (!ctx->vsc_pipe_bo[i] ||
583 fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
584 if (ctx->vsc_pipe_bo[i])
585 fd_bo_del(ctx->vsc_pipe_bo[i]);
586 ctx->vsc_pipe_bo[i] =
587 fd_bo_new(ctx->dev, bo_size, 0, "vsc_pipe[%u]", i);
588 assert(ctx->vsc_pipe_bo[i]);
589 }
590
591 /* memory export address (export32):
592 * .x: (base_address >> 2) | 0x40000000 (?)
593 * .y: index (float) - set by shader
594 * .z: 0x4B00D000 (?)
595 * .w: 0x4B000000 (?) | max_index (?)
596 */
597 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
598 OUT_RING(ring, 0x00000000);
599 OUT_RING(ring, 0x4B00D000);
600 OUT_RING(ring, 0x4B000000 | bo_size);
601 }
602
603 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
604 OUT_RING(ring, 0x0000018C);
605
606 for (int i = 0; i < gmem->num_vsc_pipes; i++) {
607 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
608 float off_x, off_y, mul_x, mul_y;
609
610 /* const to tranform from [-1,1] to bin coordinates for this pipe
611 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
612 * 8 possible values on x/y axis,
613 * to clip at binning stage: only use center 6x6
614 * TODO: set the z parameters too so that hw binning
615 * can clip primitives in Z too
616 */
617
618 mul_x = 1.0f / (float)(gmem->bin_w * 8);
619 mul_y = 1.0f / (float)(gmem->bin_h * 8);
620 off_x = -pipe->x * (1.0f / 8.0f) + 0.125f - mul_x * gmem->minx;
621 off_y = -pipe->y * (1.0f / 8.0f) + 0.125f - mul_y * gmem->miny;
622
623 OUT_RING(ring, fui(off_x * (256.0f / 255.0f)));
624 OUT_RING(ring, fui(off_y * (256.0f / 255.0f)));
625 OUT_RING(ring, 0x3f000000);
626 OUT_RING(ring, fui(0.0f));
627
628 OUT_RING(ring, fui(mul_x * (256.0f / 255.0f)));
629 OUT_RING(ring, fui(mul_y * (256.0f / 255.0f)));
630 OUT_RING(ring, fui(0.0f));
631 OUT_RING(ring, fui(0.0f));
632 }
633
634 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
635 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
636 OUT_RING(ring, 0);
637
638 fd2_emit_ib(ring, batch->binning);
639
640 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
641 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
642 OUT_RING(ring, 0x00000002);
643 } else {
644 patch_draws(batch, IGNORE_VISIBILITY);
645 }
646
647 util_dynarray_clear(&batch->draw_patches);
648 util_dynarray_clear(&batch->shader_patches);
649 }
650
651 /* before mem2gmem */
652 static void
fd2_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)653 fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
654 {
655 struct fd_ringbuffer *ring = batch->gmem;
656 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
657 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
658
659 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
660 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
661 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
662 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
663
664 /* setup screen scissor for current tile (same for mem2gmem): */
665 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
666 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
667 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
668 A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
669 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
670 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
671 }
672
673 /* before IB to rendering cmds: */
674 static void
fd2_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)675 fd2_emit_tile_renderprep(struct fd_batch *batch,
676 const struct fd_tile *tile) assert_dt
677 {
678 struct fd_context *ctx = batch->ctx;
679 struct fd2_context *fd2_ctx = fd2_context(ctx);
680 struct fd_ringbuffer *ring = batch->gmem;
681 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
682 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
683
684 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
685 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
686 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
687 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
688
689 /* setup window scissor and offset for current tile (different
690 * from mem2gmem):
691 */
692 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
693 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
694 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
695 A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
696
697 /* write SCISSOR_BR to memory so fast clear path can restore from it */
698 OUT_PKT3(ring, CP_MEM_WRITE, 2);
699 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
700 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
701 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
702
703 /* set the copy offset for gmem2mem */
704 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
705 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
706 OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
707 A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
708
709 /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
710 if (is_a20x(ctx->screen)) {
711 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
712 OUT_RING(ring, 0x00000580);
713 OUT_RING(ring, fui(tile->xoff));
714 OUT_RING(ring, fui(tile->yoff));
715 OUT_RING(ring, fui(0.0f));
716 OUT_RING(ring, fui(0.0f));
717 }
718
719 if (use_hw_binning(batch)) {
720 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
721
722 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
723 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
724 OUT_RING(ring, tile->n);
725
726 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
727 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
728 OUT_RING(ring, tile->n);
729
730 /* TODO only emit this when tile->p changes */
731 OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
732 OUT_RELOC(ring, pipe_bo, 0, 0, 0);
733 }
734 }
735
736 void
fd2_gmem_init(struct pipe_context * pctx)737 fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
738 {
739 struct fd_context *ctx = fd_context(pctx);
740
741 ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
742 ctx->emit_tile_init = fd2_emit_tile_init;
743 ctx->emit_tile_prep = fd2_emit_tile_prep;
744 ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
745 ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
746 ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
747 }
748