xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a2xx/fd2_draw.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012-2013 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "pipe/p_state.h"
10 #include "util/u_memory.h"
11 #include "util/u_prim.h"
12 #include "util/u_string.h"
13 
14 #include "freedreno_resource.h"
15 #include "freedreno_state.h"
16 
17 #include "fd2_context.h"
18 #include "fd2_draw.h"
19 #include "fd2_emit.h"
20 #include "fd2_program.h"
21 #include "fd2_util.h"
22 #include "fd2_zsa.h"
23 
24 static inline uint32_t
pack_rgba(enum pipe_format format,const float * rgba)25 pack_rgba(enum pipe_format format, const float *rgba)
26 {
27    union util_color uc;
28    util_pack_color(rgba, format, &uc);
29    return uc.ui[0];
30 }
31 
32 static void
emit_cacheflush(struct fd_ringbuffer * ring)33 emit_cacheflush(struct fd_ringbuffer *ring)
34 {
35    unsigned i;
36 
37    for (i = 0; i < 12; i++) {
38       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
39       OUT_RING(ring, CACHE_FLUSH);
40    }
41 }
42 
43 static void
emit_vertexbufs(struct fd_context * ctx)44 emit_vertexbufs(struct fd_context *ctx) assert_dt
45 {
46    struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
47    struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
48    struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
49    unsigned i;
50 
51    if (!vtx->num_elements)
52       return;
53 
54    for (i = 0; i < vtx->num_elements; i++) {
55       struct pipe_vertex_element *elem = &vtx->pipe[i];
56       struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index];
57       bufs[i].offset = vb->buffer_offset;
58       bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo);
59       bufs[i].prsc = vb->buffer.resource;
60    }
61 
62    // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
63    // CONST(20,0) (or CONST(26,0) in soliv_vp)
64 
65    fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements);
66    fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements);
67 }
68 
69 static void
draw_impl(struct fd_context * ctx,const struct pipe_draw_info * info,const struct pipe_draw_start_count_bias * draw,struct fd_ringbuffer * ring,unsigned index_offset,bool binning)70 draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
71           const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring,
72           unsigned index_offset, bool binning) assert_dt
73 {
74    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
75    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
76    OUT_RING(ring, info->index_size ? 0 : draw->start);
77 
78    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
79    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
80 
81    if (is_a20x(ctx->screen)) {
82       /* wait for DMA to finish and
83        * dummy draw one triangle with indexes 0,0,0.
84        * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE.
85        *
86        * this workaround is for a HW bug related to DMA alignment:
87        * it is necessary for indexed draws and possibly also
88        * draws that read binning data
89        */
90       OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
91       OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */
92       OUT_RING(ring, 0x00000000);
93       OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */
94       OUT_RING(ring, 0x00000001);
95 
96       OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6);
97       OUT_RING(ring, 0x00000000);
98       OUT_RING(ring, 0x0003c004);
99       OUT_RING(ring, 0x00000000);
100       OUT_RING(ring, 0x00000003);
101       OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0,
102                 0);
103       OUT_RING(ring, 0x00000006);
104    } else {
105       OUT_WFI(ring);
106 
107       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
108       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
109       OUT_RING(ring, info->index_bounds_valid ? info->max_index
110                                               : ~0); /* VGT_MAX_VTX_INDX */
111       OUT_RING(ring, info->index_bounds_valid ? info->min_index
112                                               : 0); /* VGT_MIN_VTX_INDX */
113    }
114 
115    /* binning shader will take offset from C64 */
116    if (binning && is_a20x(ctx->screen)) {
117       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
118       OUT_RING(ring, 0x00000180);
119       OUT_RING(ring, fui(ctx->batch->num_vertices));
120       OUT_RING(ring, fui(0.0f));
121       OUT_RING(ring, fui(0.0f));
122       OUT_RING(ring, fui(0.0f));
123    }
124 
125    enum pc_di_vis_cull_mode vismode = USE_VISIBILITY;
126    if (binning || info->mode == MESA_PRIM_POINTS)
127       vismode = IGNORE_VISIBILITY;
128 
129    fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode],
130                 vismode, info, draw, index_offset);
131 
132    if (is_a20x(ctx->screen)) {
133       /* not sure why this is required, but it fixes some hangs */
134       OUT_WFI(ring);
135    } else {
136       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
137       OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
138       OUT_RING(ring, 0x00000000);
139    }
140 
141    emit_cacheflush(ring);
142 }
143 
144 static bool
fd2_draw_vbo(struct fd_context * ctx,const struct pipe_draw_info * pinfo,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * pdraw,unsigned index_offset)145 fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo,
146              unsigned drawid_offset,
147              const struct pipe_draw_indirect_info *indirect,
148              const struct pipe_draw_start_count_bias *pdraw,
149              unsigned index_offset) assert_dt
150 {
151    if (!ctx->prog.fs || !ctx->prog.vs)
152       return false;
153 
154    if (pinfo->mode != MESA_PRIM_COUNT && !indirect && !pinfo->primitive_restart &&
155        !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count))
156       return false;
157 
158    if (ctx->dirty & FD_DIRTY_VTXBUF)
159       emit_vertexbufs(ctx);
160 
161    fd_blend_tracking(ctx);
162 
163    if (fd_binning_enabled)
164       fd2_emit_state_binning(ctx, ctx->dirty);
165 
166    fd2_emit_state(ctx, ctx->dirty);
167 
168    /* a2xx can draw only 65535 vertices at once
169     * on a22x the field in the draw command is 32bits but seems limited too
170     * using a limit of 32k because it fixes an unexplained hang
171     * 32766 works for all primitives (multiple of 2 and 3)
172     */
173    if (pdraw->count > 32766) {
174       /* clang-format off */
175       static const uint16_t step_tbl[MESA_PRIM_COUNT] = {
176          [0 ... MESA_PRIM_COUNT - 1]  = 32766,
177          [MESA_PRIM_LINE_STRIP]     = 32765,
178          [MESA_PRIM_TRIANGLE_STRIP] = 32764,
179 
180          /* needs more work */
181          [MESA_PRIM_TRIANGLE_FAN]   = 0,
182          [MESA_PRIM_LINE_LOOP]      = 0,
183       };
184       /* clang-format on */
185 
186       struct pipe_draw_start_count_bias draw = *pdraw;
187       unsigned count = draw.count;
188       unsigned step = step_tbl[pinfo->mode];
189       unsigned num_vertices = ctx->batch->num_vertices;
190 
191       if (!step)
192          return false;
193 
194       for (; count + step > 32766; count -= step) {
195          draw.count = MIN2(count, 32766);
196          draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false);
197          draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true);
198          draw.start += step;
199          ctx->batch->num_vertices += step;
200       }
201       /* changing this value is a hack, restore it */
202       ctx->batch->num_vertices = num_vertices;
203    } else {
204       draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false);
205       draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true);
206    }
207 
208    fd_context_all_clean(ctx);
209 
210    ctx->batch->num_vertices += pdraw->count * pinfo->instance_count;
211 
212    return true;
213 }
214 
215 static void
fd2_draw_vbos(struct fd_context * ctx,const struct pipe_draw_info * info,unsigned drawid_offset,const struct pipe_draw_indirect_info * indirect,const struct pipe_draw_start_count_bias * draws,unsigned num_draws,unsigned index_offset)216 fd2_draw_vbos(struct fd_context *ctx, const struct pipe_draw_info *info,
217               unsigned drawid_offset,
218               const struct pipe_draw_indirect_info *indirect,
219               const struct pipe_draw_start_count_bias *draws,
220               unsigned num_draws,
221               unsigned index_offset)
222    assert_dt
223 {
224    for (unsigned i = 0; i < num_draws; i++)
225       fd2_draw_vbo(ctx, info, drawid_offset, indirect, &draws[i], index_offset);
226 }
227 
228 static void
clear_state(struct fd_batch * batch,struct fd_ringbuffer * ring,unsigned buffers,bool fast_clear)229 clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring,
230             unsigned buffers, bool fast_clear) assert_dt
231 {
232    struct fd_context *ctx = batch->ctx;
233    struct fd2_context *fd2_ctx = fd2_context(ctx);
234    uint32_t reg;
235 
236    fd2_emit_vertex_bufs(ring, 0x9c,
237                         (struct fd2_vertex_buf[]){
238                            {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
239                         },
240                         1);
241 
242    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
243    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
244    OUT_RING(ring, 0);
245 
246    fd2_program_emit(ctx, ring, &ctx->solid_prog);
247 
248    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
249    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
250 
251    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
252       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
253       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
254       reg = 0;
255       if (buffers & PIPE_CLEAR_DEPTH) {
256          reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) |
257                 A2XX_RB_DEPTHCONTROL_Z_ENABLE |
258                 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE |
259                 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE;
260       }
261       if (buffers & PIPE_CLEAR_STENCIL) {
262          reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) |
263                 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE |
264                 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
265       }
266       OUT_RING(ring, reg);
267    }
268 
269    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
270    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
271    OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
272                      A2XX_RB_COLORCONTROL_BLEND_DISABLE |
273                      A2XX_RB_COLORCONTROL_ROP_CODE(12) |
274                      A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
275                      A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
276 
277    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
278    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
279    OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */
280    OUT_RING(
281       ring,
282       A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
283          A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
284          A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) |
285          (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0));
286 
287    if (fast_clear) {
288       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
289       OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
290       OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3));
291    }
292 
293    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
294    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
295    OUT_RING(ring, 0x0000ffff);
296 
297    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
298    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK));
299    if (buffers & PIPE_CLEAR_COLOR) {
300       OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED |
301                         A2XX_RB_COLOR_MASK_WRITE_GREEN |
302                         A2XX_RB_COLOR_MASK_WRITE_BLUE |
303                         A2XX_RB_COLOR_MASK_WRITE_ALPHA);
304    } else {
305       OUT_RING(ring, 0x0);
306    }
307 
308    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
309    OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
310    OUT_RING(ring, 0);
311 
312    if (is_a20x(batch->ctx->screen))
313       return;
314 
315    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
316    OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
317    OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
318    OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
319 
320    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
321    OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
322    OUT_RING(ring,
323             0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
324    OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
325 
326    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
327    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
328    OUT_RING(ring, 0x00000084);
329 
330    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
331    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
332    OUT_RING(ring, 0x0000028f);
333 }
334 
335 static void
clear_state_restore(struct fd_context * ctx,struct fd_ringbuffer * ring)336 clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring)
337 {
338    if (is_a20x(ctx->screen))
339       return;
340 
341    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
342    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
343    OUT_RING(ring, 0x00000000);
344 
345    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
346    OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
347    OUT_RING(ring, 0x00000000);
348 
349    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
350    OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
351    OUT_RING(ring, 0x0000003b);
352 }
353 
354 static void
clear_fast(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t color_clear,uint32_t depth_clear,unsigned patch_type)355 clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring,
356            uint32_t color_clear, uint32_t depth_clear, unsigned patch_type)
357 {
358    BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */
359 
360    /* zero values are patched in */
361    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
362    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
363    OUT_RINGP(ring, patch_type, &batch->gmem_patches);
364 
365    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
366    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
367    OUT_RING(ring, 0x8000 | 32);
368    OUT_RING(ring, 0);
369    OUT_RING(ring, 0);
370 
371    /* set fill values */
372    if (!is_a20x(batch->ctx->screen)) {
373       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
374       OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
375       OUT_RING(ring, color_clear);
376 
377       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
378       OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
379       OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
380                         A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf));
381 
382       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
383       OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
384       OUT_RING(ring, depth_clear);
385    } else {
386       const float sc = 1.0f / 255.0f;
387 
388       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
389       OUT_RING(ring, 0x00000480);
390       OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc));
391       OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc));
392       OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc));
393       OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc));
394 
395       // XXX if using float the rounding error breaks it..
396       float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff);
397       assert((unsigned)(((double)depth * (double)0xffffff)) ==
398              (depth_clear >> 8));
399 
400       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
401       OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
402       OUT_RING(ring, fui(0.0f));
403       OUT_RING(ring, fui(depth));
404 
405       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
406       OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
407       OUT_RING(ring,
408                0xff000000 |
409                   A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) |
410                   A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
411       OUT_RING(ring, 0xff000000 |
412                         A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) |
413                         A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
414    }
415 
416    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
417            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
418 }
419 
420 static bool
fd2_clear_fast(struct fd_context * ctx,unsigned buffers,const union pipe_color_union * color,double depth,unsigned stencil)421 fd2_clear_fast(struct fd_context *ctx, unsigned buffers,
422                const union pipe_color_union *color, double depth,
423                unsigned stencil) assert_dt
424 {
425    /* using 4x MSAA allows clearing ~2x faster
426     * then we can use higher bpp clearing to clear lower bpp
427     * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8)
428     * note: its possible to clear with 32_32_32_32 format but its not faster
429     * note: fast clear doesn't work with sysmem rendering
430     * (sysmem rendering is disabled when clear is used)
431     *
432     * we only have 16-bit / 32-bit color formats
433     * and 16-bit / 32-bit depth formats
434     * so there are only a few possible combinations
435     *
436     * if the bpp of the color/depth doesn't match
437     * we clear with depth/color individually
438     */
439    struct fd2_context *fd2_ctx = fd2_context(ctx);
440    struct fd_batch *batch = ctx->batch;
441    struct fd_ringbuffer *ring = batch->draw;
442    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
443    uint32_t color_clear = 0, depth_clear = 0;
444    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
445    int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */
446    int color_size = -1;
447 
448    /* TODO: need to test performance on a22x */
449    if (!is_a20x(ctx->screen))
450       return false;
451 
452    if (buffers & PIPE_CLEAR_COLOR)
453       color_size = util_format_get_blocksizebits(format) == 32;
454 
455    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
456       /* no fast clear when clearing only one component of depth+stencil buffer */
457       if (!(buffers & PIPE_CLEAR_DEPTH))
458          return false;
459 
460       if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
461            pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) &&
462           !(buffers & PIPE_CLEAR_STENCIL))
463          return false;
464 
465       depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8;
466    }
467 
468    assert(color_size >= 0 || depth_size >= 0);
469 
470    if (color_size == 0) {
471       color_clear = pack_rgba(format, color->f);
472       color_clear = (color_clear << 16) | (color_clear & 0xffff);
473    } else if (color_size == 1) {
474       color_clear = pack_rgba(format, color->f);
475    }
476 
477    if (depth_size == 0) {
478       depth_clear = (uint32_t)(0xffff * depth);
479       depth_clear |= depth_clear << 16;
480    } else if (depth_size == 1) {
481       depth_clear = (((uint32_t)(0xffffff * depth)) << 8);
482       depth_clear |= (stencil & 0xff);
483    }
484 
485    /* disable "window" scissor.. */
486    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
487    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
488    OUT_RING(ring, xy2d(0, 0));
489    OUT_RING(ring, xy2d(0x7fff, 0x7fff));
490 
491    /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */
492    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
493    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
494    OUT_RING(ring, fui(4096.0f));
495    OUT_RING(ring, fui(4096.0f));
496    OUT_RING(ring, fui(4096.0f));
497    OUT_RING(ring, fui(4096.0f));
498 
499    clear_state(batch, ring, ~0u, true);
500 
501    if (color_size >= 0 && depth_size != color_size)
502       clear_fast(batch, ring, color_clear, color_clear,
503                  GMEM_PATCH_FASTCLEAR_COLOR);
504 
505    if (depth_size >= 0 && depth_size != color_size)
506       clear_fast(batch, ring, depth_clear, depth_clear,
507                  GMEM_PATCH_FASTCLEAR_DEPTH);
508 
509    if (depth_size == color_size)
510       clear_fast(batch, ring, color_clear, depth_clear,
511                  GMEM_PATCH_FASTCLEAR_COLOR_DEPTH);
512 
513    clear_state_restore(ctx, ring);
514 
515    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
516    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG));
517    OUT_RING(ring, 0);
518 
519    /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile.
520     * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT
521     * the value is read from byte offset 60 in the given bo
522     */
523    OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3);
524    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0);
525    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR));
526    OUT_RING(ring, 1);
527 
528    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
529    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
530    OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches);
531    OUT_RING(ring, 0);
532    OUT_RING(ring, 0);
533    return true;
534 }
535 
536 static bool
fd2_clear(struct fd_context * ctx,enum fd_buffer_mask buffers,const union pipe_color_union * color,double depth,unsigned stencil)537 fd2_clear(struct fd_context *ctx, enum fd_buffer_mask buffers,
538           const union pipe_color_union *color, double depth,
539           unsigned stencil) assert_dt
540 {
541    struct fd_ringbuffer *ring = ctx->batch->draw;
542    struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer;
543 
544    if (fd2_clear_fast(ctx, buffers, color, depth, stencil))
545       goto dirty;
546 
547    /* set clear value */
548    if (is_a20x(ctx->screen)) {
549       if (buffers & FD_BUFFER_COLOR) {
550          /* C0 used by fragment shader */
551          OUT_PKT3(ring, CP_SET_CONSTANT, 5);
552          OUT_RING(ring, 0x00000480);
553          OUT_RING(ring, color->ui[0]);
554          OUT_RING(ring, color->ui[1]);
555          OUT_RING(ring, color->ui[2]);
556          OUT_RING(ring, color->ui[3]);
557       }
558 
559       if (buffers & FD_BUFFER_DEPTH) {
560          /* use viewport to set depth value */
561          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
562          OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE));
563          OUT_RING(ring, fui(0.0f));
564          OUT_RING(ring, fui(depth));
565       }
566 
567       if (buffers & FD_BUFFER_STENCIL) {
568          OUT_PKT3(ring, CP_SET_CONSTANT, 3);
569          OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF));
570          OUT_RING(ring, 0xff000000 |
571                            A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) |
572                            A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
573          OUT_RING(ring, 0xff000000 |
574                            A2XX_RB_STENCILREFMASK_STENCILREF(stencil) |
575                            A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
576       }
577    } else {
578       if (buffers & FD_BUFFER_COLOR) {
579          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
580          OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
581          OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f));
582       }
583 
584       if (buffers & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
585          uint32_t clear_mask, depth_clear;
586          switch (fd_pipe2depth(fb->zsbuf->format)) {
587          case DEPTHX_24_8:
588             clear_mask = ((buffers & FD_BUFFER_DEPTH) ? 0xe : 0) |
589                          ((buffers & FD_BUFFER_STENCIL) ? 0x1 : 0);
590             depth_clear =
591                (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
592             break;
593          case DEPTHX_16:
594             clear_mask = 0xf;
595             depth_clear = (uint32_t)(0xffffffff * depth);
596             break;
597          default:
598             unreachable("invalid depth");
599             break;
600          }
601 
602          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
603          OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
604          OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE |
605                            A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask));
606 
607          OUT_PKT3(ring, CP_SET_CONSTANT, 2);
608          OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR));
609          OUT_RING(ring, depth_clear);
610       }
611    }
612 
613    /* scissor state */
614    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
615    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
616    OUT_RING(ring, xy2d(0, 0));
617    OUT_RING(ring, xy2d(fb->width, fb->height));
618 
619    /* viewport state */
620    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
621    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
622    OUT_RING(ring, fui((float)fb->width / 2.0f));
623    OUT_RING(ring, fui((float)fb->width / 2.0f));
624    OUT_RING(ring, fui((float)fb->height / 2.0f));
625    OUT_RING(ring, fui((float)fb->height / 2.0f));
626 
627    /* common state */
628    clear_state(ctx->batch, ring, buffers, false);
629 
630    fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
631            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
632 
633    clear_state_restore(ctx, ring);
634 
635 dirty:
636    ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER |
637                  FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST |
638                  FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
639 
640    ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG;
641    ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |=
642       FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST;
643 
644    return true;
645 }
646 
647 void
fd2_draw_init(struct pipe_context * pctx)648 fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis
649 {
650    struct fd_context *ctx = fd_context(pctx);
651    ctx->draw_vbos = fd2_draw_vbos;
652    ctx->clear = fd2_clear;
653 }
654