1 /*
2 * Copyright © 2012-2013 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 #include "pipe/p_state.h"
10 #include "util/u_memory.h"
11 #include "util/u_prim.h"
12 #include "util/u_string.h"
13
14 #include "freedreno_resource.h"
15 #include "freedreno_state.h"
16
17 #include "fd2_context.h"
18 #include "fd2_draw.h"
19 #include "fd2_emit.h"
20 #include "fd2_program.h"
21 #include "fd2_util.h"
22 #include "fd2_zsa.h"
23
24 static inline uint32_t
pack_rgba(enum pipe_format format,const float * rgba)25 pack_rgba(enum pipe_format format, const float *rgba)
26 {
27 union util_color uc;
28 util_pack_color(rgba, format, &uc);
29 return uc.ui[0];
30 }
31
32 static void
emit_cacheflush(struct fd_ringbuffer * ring)33 emit_cacheflush(struct fd_ringbuffer *ring)
34 {
35 unsigned i;
36
37 for (i = 0; i < 12; i++) {
38 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
39 OUT_RING(ring, CACHE_FLUSH);
40 }
41 }
42
43 static void
emit_vertexbufs(struct fd_context * ctx)44 emit_vertexbufs(struct fd_context *ctx) assert_dt
45 {
46 struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
47 struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
48 struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
49 unsigned i;
50
51 if (!vtx->num_elements)
52 return;
53
54 for (i = 0; i < vtx->num_elements; i++) {
55 struct pipe_vertex_element *elem = &vtx->pipe[i];
56 struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
57 bufs[i].offset = vb->buffer_offset;
58 bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
59 bufs[i].prsc = vb->buffer.resource;
60 }
61
62 // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
63 // CONST(20,0) (or CONST(26,0) in soliv_vp)
64
65 fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
66 fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
67 }
68
69 static void
draw_impl(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct fd_ringbuffer * ring,unsigned index_offset,bool binning)70 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
71 const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,
72 unsigned index_offset, bool binning) assert_dt
73 {
74 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
75 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
76 OUT_RING(ring, info->index_size ? 0 : draw->start);
77
78 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
79 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
80
81 if (is_a20x(ctx->screen)) {
82 /* wait for DMA to finish and
83 * dummy draw one triangle with indexes 0,0,0.
84 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
85 *
86 * this workaround is for a HW bug related to DMA alignment:
87 * it is necessary for indexed draws and possibly also
88 * draws that read binning data
89 */
90 OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
91 OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
92 OUT_RING(ring, 0x00000000);
93 OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
94 OUT_RING(ring, 0x00000001);
95
96 OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
97 OUT_RING(ring, 0x00000000);
98 OUT_RING(ring, 0x0003c004);
99 OUT_RING(ring, 0x00000000);
100 OUT_RING(ring, 0x00000003);
101 OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
102 0);
103 OUT_RING(ring, 0x00000006);
104 } else {
105 OUT_WFI(ring);
106
107 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
108 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
109 OUT_RING(ring, info->index_bounds_valid ? info->max_index
110 : ~0); /* VGT_MAX_VTX_INDX */
111 OUT_RING(ring, info->index_bounds_valid ? info->min_index
112 : 0); /* VGT_MIN_VTX_INDX */
113 }
114
115 /* binning shader will take offset from C64 */
116 if (binning && is_a20x(ctx->screen)) {
117 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
118 OUT_RING(ring, 0x00000180);
119 OUT_RING(ring, fui(ctx->batch->num_vertices));
120 OUT_RING(ring, fui(0.0f));
121 OUT_RING(ring, fui(0.0f));
122 OUT_RING(ring, fui(0.0f));
123 }
124
125 enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
126 if (binning || info->mode == MESA_PRIM_POINTS)
127 vismode = IGNORE_VISIBILITY;
128
129 fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
130 vismode, info, draw, index_offset);
131
132 if (is_a20x(ctx->screen)) {
133 /* not sure why this is required, but it fixes some hangs */
134 OUT_WFI(ring);
135 } else {
136 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
137 OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
138 OUT_RING(ring, 0x00000000);
139 }
140
141 emit_cacheflush(ring);
142 }
143
144 static bool
fd2_draw_vbo(struct fd_context * ctx,const struct pipe_draw_info * pinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * pdraw,unsigned index_offset)145 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
146 unsigned drawid_offset,
147 const struct pipe_draw_indirect_info *indirect,
148 const struct pipe_draw_start_count_bias *pdraw,
149 unsigned index_offset) assert_dt
150 {
151 if (!ctx->prog.fs || !ctx->prog.vs)
152 return false;
153
154 if (pinfo->mode != MESA_PRIM_COUNT && !indirect && !pinfo->primitive_restart &&
155 !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
156 return false;
157
158 if (ctx->dirty & FD_DIRTY_VTXBUF)
159 emit_vertexbufs(ctx);
160
161 fd_blend_tracking(ctx);
162
163 if (fd_binning_enabled)
164 fd2_emit_state_binning(ctx, ctx->dirty);
165
166 fd2_emit_state(ctx, ctx->dirty);
167
168 /* a2xx can draw only 65535 vertices at once
169 * on a22x the field in the draw command is 32bits but seems limited too
170 * using a limit of 32k because it fixes an unexplained hang
171 * 32766 works for all primitives (multiple of 2 and 3)
172 */
173 if (pdraw->count > 32766) {
174 /* clang-format off */
175 static const uint16_t step_tbl[MESA_PRIM_COUNT] = {
176 [0 ... MESA_PRIM_COUNT - 1] = 32766,
177 [MESA_PRIM_LINE_STRIP] = 32765,
178 [MESA_PRIM_TRIANGLE_STRIP] = 32764,
179
180 /* needs more work */
181 [MESA_PRIM_TRIANGLE_FAN] = 0,
182 [MESA_PRIM_LINE_LOOP] = 0,
183 };
184 /* clang-format on */
185
186 struct pipe_draw_start_count_bias draw = *pdraw;
187 unsigned count = draw.count;
188 unsigned step = step_tbl[pinfo->mode];
189 unsigned num_vertices = ctx->batch->num_vertices;
190
191 if (!step)
192 return false;
193
194 for (; count + step > 32766; count -= step) {
195 draw.count = MIN2(count, 32766);
196 draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
197 draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
198 draw.start += step;
199 ctx->batch->num_vertices += step;
200 }
201 /* changing this value is a hack, restore it */
202 ctx->batch->num_vertices = num_vertices;
203 } else {
204 draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
205 draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
206 }
207
208 fd_context_all_clean(ctx);
209
210 ctx->batch->num_vertices += pdraw->count * pinfo->instance_count;
211
212 return true;
213 }
214
215 static void
fd2_draw_vbos(struct fd_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned index_offset)216 fd2_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
217 unsigned drawid_offset,
218 const struct pipe_draw_indirect_info *indirect,
219 const struct pipe_draw_start_count_bias *draws,
220 unsigned num_draws,
221 unsigned index_offset)
222 assert_dt
223 {
224 for (unsigned i = 0; i < num_draws; i++)
225 fd2_draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset);
226 }
227
228 static void
clear_state(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned buffers,bool fast_clear)229 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
230 unsigned buffers, bool fast_clear) assert_dt
231 {
232 struct fd_context *ctx = batch->ctx;
233 struct fd2_context *fd2_ctx = fd2_context(ctx);
234 uint32_t reg;
235
236 fd2_emit_vertex_bufs(ring, 0x9c,
237 (struct fd2_vertex_buf[]){
238 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
239 },
240 1);
241
242 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
243 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
244 OUT_RING(ring, 0);
245
246 fd2_program_emit(ctx, ring, &ctx->solid_prog);
247
248 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
249 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
250
251 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
252 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
253 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
254 reg = 0;
255 if (buffers & PIPE_CLEAR_DEPTH) {
256 reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
257 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
258 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
259 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
260 }
261 if (buffers & PIPE_CLEAR_STENCIL) {
262 reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
263 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
264 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
265 }
266 OUT_RING(ring, reg);
267 }
268
269 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
270 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
271 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
272 A2XX_RB_COLORCONTROL_BLEND_DISABLE |
273 A2XX_RB_COLORCONTROL_ROP_CODE(12) |
274 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
275 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
276
277 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
278 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
279 OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
280 OUT_RING(
281 ring,
282 A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
283 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
284 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
285 (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
286
287 if (fast_clear) {
288 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
289 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
290 OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
291 }
292
293 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
294 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
295 OUT_RING(ring, 0x0000ffff);
296
297 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
298 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
299 if (buffers & PIPE_CLEAR_COLOR) {
300 OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
301 A2XX_RB_COLOR_MASK_WRITE_GREEN |
302 A2XX_RB_COLOR_MASK_WRITE_BLUE |
303 A2XX_RB_COLOR_MASK_WRITE_ALPHA);
304 } else {
305 OUT_RING(ring, 0x0);
306 }
307
308 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
309 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
310 OUT_RING(ring, 0);
311
312 if (is_a20x(batch->ctx->screen))
313 return;
314
315 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
316 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
317 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
318 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
319
320 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
321 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
322 OUT_RING(ring,
323 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
324 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
325
326 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
327 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
328 OUT_RING(ring, 0x00000084);
329
330 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
331 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
332 OUT_RING(ring, 0x0000028f);
333 }
334
335 static void
clear_state_restore(struct fd_context * ctx,struct fd_ringbuffer * ring)336 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
337 {
338 if (is_a20x(ctx->screen))
339 return;
340
341 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
342 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
343 OUT_RING(ring, 0x00000000);
344
345 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
346 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
347 OUT_RING(ring, 0x00000000);
348
349 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
350 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
351 OUT_RING(ring, 0x0000003b);
352 }
353
354 static void
clear_fast(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t color_clear,uint32_t depth_clear,unsigned patch_type)355 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
356 uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
357 {
358 BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
359
360 /* zero values are patched in */
361 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
362 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
363 OUT_RINGP(ring, patch_type, &batch->gmem_patches);
364
365 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
366 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
367 OUT_RING(ring, 0x8000 | 32);
368 OUT_RING(ring, 0);
369 OUT_RING(ring, 0);
370
371 /* set fill values */
372 if (!is_a20x(batch->ctx->screen)) {
373 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
374 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
375 OUT_RING(ring, color_clear);
376
377 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
378 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
379 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
380 A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
381
382 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
383 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
384 OUT_RING(ring, depth_clear);
385 } else {
386 const float sc = 1.0f / 255.0f;
387
388 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
389 OUT_RING(ring, 0x00000480);
390 OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
391 OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
392 OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
393 OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
394
395 // XXX if using float the rounding error breaks it..
396 float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
397 assert((unsigned)(((double)depth * (double)0xffffff)) ==
398 (depth_clear >> 8));
399
400 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
401 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
402 OUT_RING(ring, fui(0.0f));
403 OUT_RING(ring, fui(depth));
404
405 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
406 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
407 OUT_RING(ring,
408 0xff000000 |
409 A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
410 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
411 OUT_RING(ring, 0xff000000 |
412 A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
413 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
414 }
415
416 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
417 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
418 }
419
420 static bool
fd2_clear_fast(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)421 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
422 const union pipe_color_union *color, double depth,
423 unsigned stencil) assert_dt
424 {
425 /* using 4x MSAA allows clearing ~2x faster
426 * then we can use higher bpp clearing to clear lower bpp
427 * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
428 * note: its possible to clear with 32_32_32_32 format but its not faster
429 * note: fast clear doesn't work with sysmem rendering
430 * (sysmem rendering is disabled when clear is used)
431 *
432 * we only have 16-bit / 32-bit color formats
433 * and 16-bit / 32-bit depth formats
434 * so there are only a few possible combinations
435 *
436 * if the bpp of the color/depth doesn't match
437 * we clear with depth/color individually
438 */
439 struct fd2_context *fd2_ctx = fd2_context(ctx);
440 struct fd_batch *batch = ctx->batch;
441 struct fd_ringbuffer *ring = batch->draw;
442 struct pipe_framebuffer_state *pfb = &batch->framebuffer;
443 uint32_t color_clear = 0, depth_clear = 0;
444 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
445 int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
446 int color_size = -1;
447
448 /* TODO: need to test performance on a22x */
449 if (!is_a20x(ctx->screen))
450 return false;
451
452 if (buffers & PIPE_CLEAR_COLOR)
453 color_size = util_format_get_blocksizebits(format) == 32;
454
455 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
456 /* no fast clear when clearing only one component of depth+stencil buffer */
457 if (!(buffers & PIPE_CLEAR_DEPTH))
458 return false;
459
460 if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
461 pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
462 !(buffers & PIPE_CLEAR_STENCIL))
463 return false;
464
465 depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
466 }
467
468 assert(color_size >= 0 || depth_size >= 0);
469
470 if (color_size == 0) {
471 color_clear = pack_rgba(format, color->f);
472 color_clear = (color_clear << 16) | (color_clear & 0xffff);
473 } else if (color_size == 1) {
474 color_clear = pack_rgba(format, color->f);
475 }
476
477 if (depth_size == 0) {
478 depth_clear = (uint32_t)(0xffff * depth);
479 depth_clear |= depth_clear << 16;
480 } else if (depth_size == 1) {
481 depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
482 depth_clear |= (stencil & 0xff);
483 }
484
485 /* disable "window" scissor.. */
486 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
487 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
488 OUT_RING(ring, xy2d(0, 0));
489 OUT_RING(ring, xy2d(0x7fff, 0x7fff));
490
491 /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
492 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
493 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
494 OUT_RING(ring, fui(4096.0f));
495 OUT_RING(ring, fui(4096.0f));
496 OUT_RING(ring, fui(4096.0f));
497 OUT_RING(ring, fui(4096.0f));
498
499 clear_state(batch, ring, ~0u, true);
500
501 if (color_size >= 0 && depth_size != color_size)
502 clear_fast(batch, ring, color_clear, color_clear,
503 GMEM_PATCH_FASTCLEAR_COLOR);
504
505 if (depth_size >= 0 && depth_size != color_size)
506 clear_fast(batch, ring, depth_clear, depth_clear,
507 GMEM_PATCH_FASTCLEAR_DEPTH);
508
509 if (depth_size == color_size)
510 clear_fast(batch, ring, color_clear, depth_clear,
511 GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
512
513 clear_state_restore(ctx, ring);
514
515 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
516 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
517 OUT_RING(ring, 0);
518
519 /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
520 * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
521 * the value is read from byte offset 60 in the given bo
522 */
523 OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
524 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
525 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
526 OUT_RING(ring, 1);
527
528 OUT_PKT3(ring, CP_SET_CONSTANT, 4);
529 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
530 OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
531 OUT_RING(ring, 0);
532 OUT_RING(ring, 0);
533 return true;
534 }
535
536 static bool
fd2_clear(struct fd_context * ctx,enum fd_buffer_mask buffers,const union pipe_color_union * color,double depth,unsigned stencil)537 fd2_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
538 const union pipe_color_union *color, double depth,
539 unsigned stencil) assert_dt
540 {
541 struct fd_ringbuffer *ring = ctx->batch->draw;
542 struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
543
544 if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
545 goto dirty;
546
547 /* set clear value */
548 if (is_a20x(ctx->screen)) {
549 if (buffers & FD_BUFFER_COLOR) {
550 /* C0 used by fragment shader */
551 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
552 OUT_RING(ring, 0x00000480);
553 OUT_RING(ring, color->ui[0]);
554 OUT_RING(ring, color->ui[1]);
555 OUT_RING(ring, color->ui[2]);
556 OUT_RING(ring, color->ui[3]);
557 }
558
559 if (buffers & FD_BUFFER_DEPTH) {
560 /* use viewport to set depth value */
561 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
562 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
563 OUT_RING(ring, fui(0.0f));
564 OUT_RING(ring, fui(depth));
565 }
566
567 if (buffers & FD_BUFFER_STENCIL) {
568 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
569 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
570 OUT_RING(ring, 0xff000000 |
571 A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
572 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
573 OUT_RING(ring, 0xff000000 |
574 A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
575 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
576 }
577 } else {
578 if (buffers & FD_BUFFER_COLOR) {
579 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
580 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
581 OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
582 }
583
584 if (buffers & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
585 uint32_t clear_mask, depth_clear;
586 switch (fd_pipe2depth(fb->zsbuf->format)) {
587 case DEPTHX_24_8:
588 clear_mask = ((buffers & FD_BUFFER_DEPTH) ? 0xe : 0) |
589 ((buffers & FD_BUFFER_STENCIL) ? 0x1 : 0);
590 depth_clear =
591 (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
592 break;
593 case DEPTHX_16:
594 clear_mask = 0xf;
595 depth_clear = (uint32_t)(0xffffffff * depth);
596 break;
597 default:
598 unreachable("invalid depth");
599 break;
600 }
601
602 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
603 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
604 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
605 A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
606
607 OUT_PKT3(ring, CP_SET_CONSTANT, 2);
608 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
609 OUT_RING(ring, depth_clear);
610 }
611 }
612
613 /* scissor state */
614 OUT_PKT3(ring, CP_SET_CONSTANT, 3);
615 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
616 OUT_RING(ring, xy2d(0, 0));
617 OUT_RING(ring, xy2d(fb->width, fb->height));
618
619 /* viewport state */
620 OUT_PKT3(ring, CP_SET_CONSTANT, 5);
621 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
622 OUT_RING(ring, fui((float)fb->width / 2.0f));
623 OUT_RING(ring, fui((float)fb->width / 2.0f));
624 OUT_RING(ring, fui((float)fb->height / 2.0f));
625 OUT_RING(ring, fui((float)fb->height / 2.0f));
626
627 /* common state */
628 clear_state(ctx->batch, ring, buffers, false);
629
630 fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
631 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
632
633 clear_state_restore(ctx, ring);
634
635 dirty:
636 ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
637 FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
638 FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
639
640 ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
641 ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
642 FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
643
644 return true;
645 }
646
647 void
fd2_draw_init(struct pipe_context * pctx)648 fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
649 {
650 struct fd_context *ctx = fd_context(pctx);
651 ctx->draw_vbos = fd2_draw_vbos;
652 ctx->clear = fd2_clear;
653 }
654