xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_blitter.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 #include "util/format_srgb.h"
13 #include "util/half_float.h"
14 #include "util/u_dump.h"
15 #include "util/u_helpers.h"
16 #include "util/u_log.h"
17 #include "util/u_transfer.h"
18 #include "util/u_surface.h"
19 
20 #include "freedreno_blitter.h"
21 #include "freedreno_fence.h"
22 #include "freedreno_resource.h"
23 #include "freedreno_tracepoints.h"
24 
25 #include "fd6_barrier.h"
26 #include "fd6_blitter.h"
27 #include "fd6_emit.h"
28 #include "fd6_pack.h"
29 #include "fd6_resource.h"
30 
31 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)32 fd6_ifmt(enum a6xx_format fmt)
33 {
34    switch (fmt) {
35    case FMT6_A8_UNORM:
36    case FMT6_8_UNORM:
37    case FMT6_8_SNORM:
38    case FMT6_8_8_UNORM:
39    case FMT6_8_8_SNORM:
40    case FMT6_8_8_8_8_UNORM:
41    case FMT6_8_8_8_X8_UNORM:
42    case FMT6_8_8_8_8_SNORM:
43    case FMT6_4_4_4_4_UNORM:
44    case FMT6_5_5_5_1_UNORM:
45    case FMT6_5_6_5_UNORM:
46       return R2D_UNORM8;
47 
48    case FMT6_32_UINT:
49    case FMT6_32_SINT:
50    case FMT6_32_32_UINT:
51    case FMT6_32_32_SINT:
52    case FMT6_32_32_32_32_UINT:
53    case FMT6_32_32_32_32_SINT:
54       return R2D_INT32;
55 
56    case FMT6_16_UINT:
57    case FMT6_16_SINT:
58    case FMT6_16_16_UINT:
59    case FMT6_16_16_SINT:
60    case FMT6_16_16_16_16_UINT:
61    case FMT6_16_16_16_16_SINT:
62    case FMT6_10_10_10_2_UINT:
63       return R2D_INT16;
64 
65    case FMT6_8_UINT:
66    case FMT6_8_SINT:
67    case FMT6_8_8_UINT:
68    case FMT6_8_8_SINT:
69    case FMT6_8_8_8_8_UINT:
70    case FMT6_8_8_8_8_SINT:
71    case FMT6_Z24_UNORM_S8_UINT:
72    case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
73       return R2D_INT8;
74 
75    case FMT6_16_UNORM:
76    case FMT6_16_SNORM:
77    case FMT6_16_16_UNORM:
78    case FMT6_16_16_SNORM:
79    case FMT6_16_16_16_16_UNORM:
80    case FMT6_16_16_16_16_SNORM:
81    case FMT6_32_FLOAT:
82    case FMT6_32_32_FLOAT:
83    case FMT6_32_32_32_32_FLOAT:
84       return R2D_FLOAT32;
85 
86    case FMT6_16_FLOAT:
87    case FMT6_16_16_FLOAT:
88    case FMT6_16_16_16_16_FLOAT:
89    case FMT6_11_11_10_FLOAT:
90    case FMT6_10_10_10_2_UNORM_DEST:
91       return R2D_FLOAT16;
92 
93    default:
94       unreachable("bad format");
95       return (enum a6xx_2d_ifmt)0;
96    }
97 }
98 
99 /* Make sure none of the requested dimensions extend beyond the size of the
100  * resource.  Not entirely sure why this happens, but sometimes it does, and
101  * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
102  * back to u_blitter
103  */
104 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)105 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
106 {
107    int last_layer =
108       r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
109 
110    return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
111           (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
112           (b->z >= 0) && (b->z + b->depth <= last_layer);
113 }
114 
115 static bool
ok_format(enum pipe_format pfmt)116 ok_format(enum pipe_format pfmt)
117 {
118    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
119 
120    if (util_format_is_compressed(pfmt))
121       return true;
122 
123    switch (pfmt) {
124    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
125    case PIPE_FORMAT_Z24X8_UNORM:
126    case PIPE_FORMAT_Z16_UNORM:
127    case PIPE_FORMAT_Z32_UNORM:
128    case PIPE_FORMAT_Z32_FLOAT:
129    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
130    case PIPE_FORMAT_S8_UINT:
131       return true;
132    default:
133       break;
134    }
135 
136    if (fmt == FMT6_NONE)
137       return false;
138 
139    return true;
140 }
141 
142 #define DEBUG_BLIT          0
143 #define DEBUG_BLIT_FALLBACK 0
144 
145 #define fail_if(cond)                                                          \
146    do {                                                                        \
147       if (cond) {                                                              \
148          if (DEBUG_BLIT_FALLBACK) {                                            \
149             fprintf(stderr, "falling back: %s for blit:\n", #cond);            \
150             dump_blit_info(info);                                              \
151          }                                                                     \
152          return false;                                                         \
153       }                                                                        \
154    } while (0)
155 
156 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)157 is_ubwc(struct pipe_resource *prsc, unsigned level)
158 {
159    return fd_resource_ubwc_enabled(fd_resource(prsc), level);
160 }
161 
162 static void
dump_blit_info(const struct pipe_blit_info * info)163 dump_blit_info(const struct pipe_blit_info *info)
164 {
165    util_dump_blit_info(stderr, info);
166    fprintf(stderr, "\n\tdst resource: ");
167    util_dump_resource(stderr, info->dst.resource);
168    if (is_ubwc(info->dst.resource, info->dst.level))
169       fprintf(stderr, " (ubwc)");
170    fprintf(stderr, "\n\tsrc resource: ");
171    util_dump_resource(stderr, info->src.resource);
172    if (is_ubwc(info->src.resource, info->src.level))
173       fprintf(stderr, " (ubwc)");
174    fprintf(stderr, "\n\n");
175 }
176 
177 static bool
can_do_blit(const struct pipe_blit_info * info)178 can_do_blit(const struct pipe_blit_info *info)
179 {
180    /* I think we can do scaling, but not in z dimension since that would
181     * require blending..
182     */
183    fail_if(info->dst.box.depth != info->src.box.depth);
184 
185    /* Fail if unsupported format: */
186    fail_if(!ok_format(info->src.format));
187    fail_if(!ok_format(info->dst.format));
188 
189    assert(!util_format_is_compressed(info->src.format));
190    assert(!util_format_is_compressed(info->dst.format));
191 
192    fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
193 
194    fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
195 
196    assert(info->dst.box.width >= 0);
197    assert(info->dst.box.height >= 0);
198    assert(info->dst.box.depth >= 0);
199 
200    fail_if(info->dst.resource->nr_samples > 1);
201 
202    fail_if(info->window_rectangle_include);
203 
204    /* The blitter can't handle the needed swizzle gymnastics to convert
205     * to/from L/A formats:
206     */
207    if (info->src.format != info->dst.format) {
208       fail_if(util_format_is_luminance(info->dst.format));
209       fail_if(util_format_is_alpha(info->dst.format));
210       fail_if(util_format_is_luminance_alpha(info->dst.format));
211       fail_if(util_format_is_luminance(info->src.format));
212       fail_if(util_format_is_alpha(info->src.format));
213       fail_if(util_format_is_luminance_alpha(info->src.format));
214    }
215 
216    const struct util_format_description *src_desc =
217       util_format_description(info->src.format);
218    const struct util_format_description *dst_desc =
219       util_format_description(info->dst.format);
220    const int common_channels =
221       MIN2(src_desc->nr_channels, dst_desc->nr_channels);
222 
223    if (info->mask & PIPE_MASK_RGBA) {
224       for (int i = 0; i < common_channels; i++) {
225          fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
226                         sizeof(src_desc->channel[0])));
227       }
228    }
229 
230    fail_if(info->alpha_blend);
231 
232    return true;
233 }
234 
235 static bool
can_do_clear(const struct pipe_resource * prsc,unsigned level,const struct pipe_box * box)236 can_do_clear(const struct pipe_resource *prsc, unsigned level,
237              const struct pipe_box *box)
238 {
239    return ok_format(prsc->format) &&
240           ok_dims(prsc, box, level) &&
241           (fd_resource_nr_samples(prsc) == 1);
242 
243    return true;
244 }
245 
246 template <chip CHIP>
247 static void
emit_setup(struct fd_batch * batch)248 emit_setup(struct fd_batch *batch)
249 {
250    struct fd_ringbuffer *ring = batch->draw;
251    struct fd_screen *screen = batch->ctx->screen;
252 
253    fd6_emit_flushes<CHIP>(batch->ctx, ring,
254                           FD6_FLUSH_CCU_COLOR |
255                           FD6_INVALIDATE_CCU_COLOR |
256                           FD6_FLUSH_CCU_DEPTH |
257                           FD6_INVALIDATE_CCU_DEPTH);
258 
259    /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
260    OUT_WFI5(ring);
261    fd6_emit_ccu_cntl<CHIP>(ring, screen, false);
262 }
263 
264 template <chip CHIP>
265 static void
emit_blit_fini(struct fd_context * ctx,struct fd_ringbuffer * ring)266 emit_blit_fini(struct fd_context *ctx, struct fd_ringbuffer *ring)
267 {
268    fd6_event_write<CHIP>(ctx, ring, FD_LABEL);
269    OUT_WFI5(ring);
270 
271    OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
272    OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_DBG_ECO_CNTL_blit);
273 
274    OUT_PKT7(ring, CP_BLIT, 1);
275    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
276 
277    OUT_WFI5(ring);
278 
279    OUT_PKT4(ring, REG_A6XX_RB_DBG_ECO_CNTL, 1);
280    OUT_RING(ring, 0); /* RB_DBG_ECO_CNTL */
281 }
282 FD_GENX(emit_blit_fini);
283 
284 template <chip CHIP>
285 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01,enum a6xx_rotation rotate)286 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
287                 bool scissor_enable, union pipe_color_union *color,
288                 uint32_t unknown_8c01, enum a6xx_rotation rotate)
289 {
290    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
291    bool is_srgb = util_format_is_srgb(pfmt);
292    enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
293 
294    if (is_srgb) {
295       assert(ifmt == R2D_UNORM8);
296       ifmt = R2D_UNORM8_SRGB;
297    }
298 
299    uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
300                         A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
301                         A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
302                         A6XX_RB_2D_BLIT_CNTL_ROTATE(rotate) |
303                         COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
304                         COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
305 
306    OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
307    OUT_RING(ring, blit_cntl);
308 
309    OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
310    OUT_RING(ring, blit_cntl);
311 
312    if (CHIP >= A7XX) {
313       OUT_PKT4(ring, REG_A7XX_SP_PS_UNKNOWN_B2D2, 1);
314       OUT_RING(ring, 0x20000000);
315    }
316 
317    if (fmt == FMT6_10_10_10_2_UNORM_DEST)
318       fmt = FMT6_16_16_16_16_FLOAT;
319 
320    /* This register is probably badly named... it seems that it's
321     * controlling the internal/accumulator format or something like
322     * that. It's certainly not tied to only the src format.
323     */
324    OUT_REG(ring, SP_2D_DST_FORMAT(
325          CHIP,
326          .sint = util_format_is_pure_sint(pfmt),
327          .uint = util_format_is_pure_uint(pfmt),
328          .color_format = fmt,
329          .srgb = is_srgb,
330          .mask = 0xf,
331    ));
332 
333    OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
334    OUT_RING(ring, unknown_8c01);
335 }
336 
337 static void
emit_blit_buffer_dst(struct fd_ringbuffer * ring,struct fd_resource * dst,unsigned off,unsigned size,a6xx_format color_format)338 emit_blit_buffer_dst(struct fd_ringbuffer *ring, struct fd_resource *dst,
339                      unsigned off, unsigned size, a6xx_format color_format)
340 {
341    OUT_REG(ring,
342            A6XX_RB_2D_DST_INFO(
343                  .color_format = color_format,
344                  .tile_mode = TILE6_LINEAR,
345                  .color_swap = WZYX,
346            ),
347            A6XX_RB_2D_DST(
348                  .bo = dst->bo,
349                  .bo_offset = off,
350            ),
351            A6XX_RB_2D_DST_PITCH(size),
352    );
353 }
354 
355 /* buffers need to be handled specially since x/width can exceed the bounds
356  * supported by hw.. if necessary decompose into (potentially) two 2D blits
357  */
358 template <chip CHIP>
359 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)360 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
361                  const struct pipe_blit_info *info)
362 {
363    const struct pipe_box *sbox = &info->src.box;
364    const struct pipe_box *dbox = &info->dst.box;
365    struct fd_resource *src, *dst;
366    unsigned sshift, dshift;
367 
368    if (DEBUG_BLIT) {
369       fprintf(stderr, "buffer blit: ");
370       dump_blit_info(info);
371    }
372 
373    src = fd_resource(info->src.resource);
374    dst = fd_resource(info->dst.resource);
375 
376    assert(src->layout.cpp == 1);
377    assert(dst->layout.cpp == 1);
378    assert(info->src.resource->format == info->dst.resource->format);
379    assert((sbox->y == 0) && (sbox->height == 1));
380    assert((dbox->y == 0) && (dbox->height == 1));
381    assert((sbox->z == 0) && (sbox->depth == 1));
382    assert((dbox->z == 0) && (dbox->depth == 1));
383    assert(sbox->width == dbox->width);
384    assert(info->src.level == 0);
385    assert(info->dst.level == 0);
386 
387    /*
388     * Buffers can have dimensions bigger than max width, remap into
389     * multiple 1d blits to fit within max dimension
390     *
391     * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
392     * seems to prevent overfetch related faults.  Not quite sure what
393     * the deal is there.
394     *
395     * Low 6 bits of SRC/DST addresses need to be zero (ie. address
396     * aligned to 64) so we need to shift src/dst x1/x2 to make up the
397     * difference.  On top of already splitting up the blit so width
398     * isn't > 16k.
399     *
400     * We perhaps could do a bit better, if src and dst are aligned but
401     * in the worst case this means we have to split the copy up into
402     * 16k (0x4000) minus 64 (0x40).
403     */
404 
405    sshift = sbox->x & 0x3f;
406    dshift = dbox->x & 0x3f;
407 
408    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0, ROTATE_0);
409 
410    for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
411       unsigned soff, doff, w, p;
412 
413       soff = (sbox->x + off) & ~0x3f;
414       doff = (dbox->x + off) & ~0x3f;
415 
416       w = MIN2(sbox->width - off, (0x4000 - 0x40));
417       p = align(w, 64);
418 
419       assert((soff + w) <= fd_bo_size(src->bo));
420       assert((doff + w) <= fd_bo_size(dst->bo));
421 
422       /*
423        * Emit source:
424        */
425       OUT_REG(ring,
426               SP_PS_2D_SRC_INFO(
427                     CHIP,
428                     .color_format = FMT6_8_UNORM,
429                     .tile_mode = TILE6_LINEAR,
430                     .color_swap = WZYX,
431                     .unk20 = true,
432                     .unk22 = true,
433               ),
434               SP_PS_2D_SRC_SIZE(
435                     CHIP,
436                     .width = sshift + w,
437                     .height = 1,
438               ),
439               SP_PS_2D_SRC(
440                     CHIP,
441                     .bo = src->bo,
442                     .bo_offset = soff,
443               ),
444               SP_PS_2D_SRC_PITCH(
445                     CHIP,
446                     .pitch = p,
447               ),
448       );
449 
450       /*
451        * Emit destination:
452        */
453       emit_blit_buffer_dst(ring, dst, doff, p, FMT6_8_UNORM);
454 
455       /*
456        * Blit command:
457        */
458       OUT_REG(ring,
459               A6XX_GRAS_2D_SRC_TL_X(sshift),
460               A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1),
461               A6XX_GRAS_2D_SRC_TL_Y(0),
462               A6XX_GRAS_2D_SRC_BR_Y(0),
463       );
464 
465       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
466       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
467       OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
468                         A6XX_GRAS_2D_DST_BR_Y(0));
469 
470       emit_blit_fini<CHIP>(ctx, ring);
471    }
472 }
473 
474 template <chip CHIP>
475 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)476 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
477 {
478    struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
479    union pipe_color_union color = {};
480 
481    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0, ROTATE_0);
482 
483    OUT_REG(ring,
484            SP_PS_2D_SRC_INFO(CHIP),
485            SP_PS_2D_SRC_SIZE(CHIP),
486            SP_PS_2D_SRC(CHIP),
487            SP_PS_2D_SRC_PITCH(CHIP),
488    );
489 
490    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
491    OUT_RING(ring, 0x00000000);
492    OUT_RING(ring, 0x00000000);
493    OUT_RING(ring, 0x00000000);
494    OUT_RING(ring, 0x00000000);
495 
496    OUT_REG(ring,
497            A6XX_GRAS_2D_SRC_TL_X(0),
498            A6XX_GRAS_2D_SRC_BR_X(0),
499            A6XX_GRAS_2D_SRC_TL_Y(0),
500            A6XX_GRAS_2D_SRC_BR_Y(0),
501    );
502 
503    unsigned size = rsc->layout.slices[0].offset;
504    unsigned offset = 0;
505 
506    /* We could be more clever here and realize that we could use a
507     * larger width if the size is aligned to something more than a
508     * single page.. or even use a format larger than r8 in those
509     * cases. But for normal sized textures and even up to 16k x 16k
510     * at <= 4byte/pixel, we'll only go thru the loop once
511     */
512    const unsigned w = 0x1000;
513 
514    /* ubwc size should always be page aligned: */
515    assert((size % w) == 0);
516 
517    while (size > 0) {
518       const unsigned h = MIN2(0x4000, size / w);
519       /* width is already aligned to a suitable pitch: */
520       const unsigned p = w;
521 
522       /*
523        * Emit destination:
524        */
525       emit_blit_buffer_dst(ring, rsc, offset, p, FMT6_8_UNORM);
526 
527       /*
528        * Blit command:
529        */
530 
531       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
532       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
533       OUT_RING(ring,
534                A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
535 
536       emit_blit_fini<CHIP>(batch->ctx, ring);
537       offset += w * h;
538       size -= w * h;
539    }
540 
541    fd6_emit_flushes<CHIP>(batch->ctx, ring,
542                           FD6_FLUSH_CCU_COLOR |
543                           FD6_FLUSH_CCU_DEPTH |
544                           FD6_FLUSH_CACHE |
545                           FD6_WAIT_FOR_IDLE);
546 }
547 
548 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)549 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
550               enum pipe_format pfmt, unsigned level, unsigned layer)
551 {
552    struct fd_resource *dst = fd_resource(prsc);
553    enum a6xx_format fmt =
554          fd6_color_format(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
555    enum a6xx_tile_mode tile =
556          (enum a6xx_tile_mode)fd_resource_tile_mode(prsc, level);
557    enum a3xx_color_swap swap =
558          fd6_color_swap(pfmt, (enum a6xx_tile_mode)dst->layout.tile_mode);
559    uint32_t pitch = fd_resource_pitch(dst, level);
560    bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
561    unsigned off = fd_resource_offset(dst, level, layer);
562 
563    if (fmt == FMT6_Z24_UNORM_S8_UINT)
564       fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
565 
566    OUT_REG(ring,
567            A6XX_RB_2D_DST_INFO(
568                  .color_format = fmt,
569                  .tile_mode = tile,
570                  .color_swap = swap,
571                  .flags = ubwc_enabled,
572                  .srgb = util_format_is_srgb(pfmt),
573            ),
574            A6XX_RB_2D_DST(
575                  .bo = dst->bo,
576                  .bo_offset = off,
577            ),
578            A6XX_RB_2D_DST_PITCH(pitch),
579    );
580 
581    if (ubwc_enabled) {
582       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
583       fd6_emit_flag_reference(ring, dst, level, layer);
584       OUT_RING(ring, 0x00000000);
585       OUT_RING(ring, 0x00000000);
586       OUT_RING(ring, 0x00000000);
587    }
588 }
589 
590 template <chip CHIP>
591 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples)592 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
593               unsigned layer, unsigned nr_samples)
594 {
595    struct fd_resource *src = fd_resource(info->src.resource);
596    enum a6xx_format sfmt =
597       fd6_texture_format(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
598    enum a6xx_tile_mode stile =
599       (enum a6xx_tile_mode)fd_resource_tile_mode(info->src.resource, info->src.level);
600    enum a3xx_color_swap sswap =
601       fd6_texture_swap(info->src.format, (enum a6xx_tile_mode)src->layout.tile_mode);
602    uint32_t pitch = fd_resource_pitch(src, info->src.level);
603    bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
604    unsigned soff = fd_resource_offset(src, info->src.level, layer);
605    uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
606    uint32_t height = u_minify(src->b.b.height0, info->src.level);
607    enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
608 
609    if (info->src.format == PIPE_FORMAT_A8_UNORM)
610       sfmt = FMT6_A8_UNORM;
611 
612    OUT_REG(ring,
613            SP_PS_2D_SRC_INFO(
614                  CHIP,
615                  .color_format = sfmt,
616                  .tile_mode = stile,
617                  .color_swap = sswap,
618                  .flags = subwc_enabled,
619                  .srgb  = util_format_is_srgb(info->src.format),
620                  .samples = samples,
621                  .filter = (info->filter == PIPE_TEX_FILTER_LINEAR),
622                  .samples_average = (samples > MSAA_ONE) && !info->sample0_only,
623                  .unk20 = true,
624                  .unk22 = true,
625            ),
626            SP_PS_2D_SRC_SIZE(
627                  CHIP,
628                  .width = width,
629                  .height = height,
630            ),
631            SP_PS_2D_SRC(
632                  CHIP,
633                  .bo = src->bo,
634                  .bo_offset = soff,
635            ),
636            SP_PS_2D_SRC_PITCH(
637                  CHIP,
638                  .pitch = pitch,
639            ),
640    );
641 
642    if (subwc_enabled && fd_resource_ubwc_enabled(src, info->src.level)) {
643       OUT_REG(ring,
644               SP_PS_2D_SRC_FLAGS(
645                     CHIP,
646                     .bo = src->bo,
647                     .bo_offset = fd_resource_ubwc_offset(src, info->src.level, layer),
648               ),
649               SP_PS_2D_SRC_FLAGS_PITCH(
650                     CHIP, fdl_ubwc_pitch(&src->layout, info->src.level)),
651       );
652    }
653 }
654 
655 template <chip CHIP>
656 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)657 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
658                   const struct pipe_blit_info *info)
659 {
660    const struct pipe_box *sbox = &info->src.box;
661    const struct pipe_box *dbox = &info->dst.box;
662    struct fd_resource *dst;
663    int sx1, sy1, sx2, sy2;
664    int dx1, dy1, dx2, dy2;
665 
666    if (DEBUG_BLIT) {
667       fprintf(stderr, "texture blit: ");
668       dump_blit_info(info);
669    }
670 
671    dst = fd_resource(info->dst.resource);
672 
673    uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
674 
675    sx1 = sbox->x * nr_samples;
676    sy1 = sbox->y;
677    sx2 = (sbox->x + sbox->width) * nr_samples;
678    sy2 = sbox->y + sbox->height;
679 
680    dx1 = dbox->x * nr_samples;
681    dy1 = dbox->y;
682    dx2 = (dbox->x + dbox->width) * nr_samples;
683    dy2 = dbox->y + dbox->height;
684 
685    static const enum a6xx_rotation rotates[2][2] = {
686       {ROTATE_0, ROTATE_HFLIP},
687       {ROTATE_VFLIP, ROTATE_180},
688    };
689    bool mirror_x = (sx2 < sx1) != (dx2 < dx1);
690    bool mirror_y = (sy2 < sy1) != (dy2 < dy1);
691 
692    enum a6xx_rotation rotate = rotates[mirror_y][mirror_x];
693 
694    OUT_REG(ring,
695            A6XX_GRAS_2D_SRC_TL_X(MIN2(sx1, sx2)),
696            A6XX_GRAS_2D_SRC_BR_X(MAX2(sx1, sx2) - 1),
697            A6XX_GRAS_2D_SRC_TL_Y(MIN2(sy1, sy2)),
698            A6XX_GRAS_2D_SRC_BR_Y(MAX2(sy1, sy2) - 1),
699    );
700 
701    OUT_REG(ring,
702            A6XX_GRAS_2D_DST_TL(.x = MIN2(dx1, dx2),
703                                .y = MIN2(dy1, dy2)),
704            A6XX_GRAS_2D_DST_BR(.x = MAX2(dx1, dx2) - 1,
705                                .y = MAX2(dy1, dy2) - 1),
706    );
707 
708    if (info->scissor_enable) {
709       OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
710       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
711                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
712       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
713                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
714    }
715 
716    emit_blit_setup<CHIP>(ring, info->dst.format, info->scissor_enable, NULL, 0, rotate);
717 
718    for (unsigned i = 0; i < info->dst.box.depth; i++) {
719 
720       emit_blit_src<CHIP>(ring, info, sbox->z + i, nr_samples);
721       emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
722                     dbox->z + i);
723 
724       emit_blit_fini<CHIP>(ctx, ring);
725    }
726 }
727 
728 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)729 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
730                  union pipe_color_union *color)
731 {
732    switch (pfmt) {
733    case PIPE_FORMAT_Z24X8_UNORM:
734    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
735    case PIPE_FORMAT_X24S8_UINT: {
736       uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
737       uint8_t stencil = color->ui[1];
738       color->ui[0] = depth_unorm24 & 0xff;
739       color->ui[1] = (depth_unorm24 >> 8) & 0xff;
740       color->ui[2] = (depth_unorm24 >> 16) & 0xff;
741       color->ui[3] = stencil;
742       break;
743    }
744    default:
745       break;
746    }
747 
748    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
749    switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
750    case R2D_UNORM8:
751    case R2D_UNORM8_SRGB:
752       /* The r2d ifmt is badly named, it also covers the signed case: */
753       if (util_format_is_snorm(pfmt)) {
754          OUT_RING(ring, float_to_byte_tex(color->f[0]));
755          OUT_RING(ring, float_to_byte_tex(color->f[1]));
756          OUT_RING(ring, float_to_byte_tex(color->f[2]));
757          OUT_RING(ring, float_to_byte_tex(color->f[3]));
758       } else {
759          OUT_RING(ring, float_to_ubyte(color->f[0]));
760          OUT_RING(ring, float_to_ubyte(color->f[1]));
761          OUT_RING(ring, float_to_ubyte(color->f[2]));
762          OUT_RING(ring, float_to_ubyte(color->f[3]));
763       }
764       break;
765    case R2D_FLOAT16:
766       OUT_RING(ring, _mesa_float_to_half(color->f[0]));
767       OUT_RING(ring, _mesa_float_to_half(color->f[1]));
768       OUT_RING(ring, _mesa_float_to_half(color->f[2]));
769       OUT_RING(ring, _mesa_float_to_half(color->f[3]));
770       break;
771    case R2D_FLOAT32:
772    case R2D_INT32:
773    case R2D_INT16:
774    case R2D_INT8:
775    default:
776       OUT_RING(ring, color->ui[0]);
777       OUT_RING(ring, color->ui[1]);
778       OUT_RING(ring, color->ui[2]);
779       OUT_RING(ring, color->ui[3]);
780       break;
781    }
782 }
783 
784 
785 template <chip CHIP>
786 void
fd6_clear_lrz(struct fd_batch * batch,struct fd_resource * zsbuf,struct fd_bo * lrz,double depth)787 fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf,
788               struct fd_bo *lrz, double depth)
789 {
790    struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
791 
792    if (DEBUG_BLIT) {
793       fprintf(stderr, "lrz clear:\ndst resource: ");
794       util_dump_resource(stderr, &zsbuf->b.b);
795       fprintf(stderr, "\n");
796    }
797 
798    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
799    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
800    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(zsbuf->lrz_width - 1) |
801                      A6XX_GRAS_2D_DST_BR_Y(zsbuf->lrz_height - 1));
802 
803    union pipe_color_union clear_color = { .f = {depth} };
804 
805    emit_clear_color(ring, PIPE_FORMAT_Z16_UNORM, &clear_color);
806    emit_blit_setup<CHIP>(ring, PIPE_FORMAT_Z16_UNORM, false, &clear_color, 0, ROTATE_0);
807 
808    OUT_REG(ring,
809            A6XX_RB_2D_DST_INFO(
810                  .color_format = FMT6_16_UNORM,
811                  .tile_mode = TILE6_LINEAR,
812                  .color_swap = WZYX,
813            ),
814            A6XX_RB_2D_DST(
815                  .bo = lrz,
816            ),
817            A6XX_RB_2D_DST_PITCH(zsbuf->lrz_pitch * 2),
818    );
819 
820    /*
821     * Blit command:
822     */
823 
824    OUT_PKT7(ring, CP_BLIT, 1);
825    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
826 }
827 FD_GENX(fd6_clear_lrz);
828 
829 /**
830  * Handle conversion of clear color
831  */
832 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)833 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
834 {
835    const struct util_format_description *desc = util_format_description(format);
836    union pipe_color_union color = *pcolor;
837 
838    for (unsigned i = 0; i < 4; i++) {
839       unsigned channel = desc->swizzle[i];
840 
841       if (desc->channel[channel].normalized)
842          continue;
843 
844       switch (desc->channel[channel].type) {
845       case UTIL_FORMAT_TYPE_SIGNED:
846          color.i[i] = MAX2(color.i[i], -(1<<(desc->channel[channel].size - 1)));
847          color.i[i] = MIN2(color.i[i], (1 << (desc->channel[channel].size - 1)) - 1);
848          break;
849       case UTIL_FORMAT_TYPE_UNSIGNED:
850          color.ui[i] = MIN2(color.ui[i], BITFIELD_MASK(desc->channel[channel].size));
851          break;
852       }
853    }
854 
855    /* For solid-fill blits, the hw isn't going to convert from
856     * linear to srgb for us:
857     */
858    if (util_format_is_srgb(format)) {
859       for (int i = 0; i < 3; i++)
860          color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
861    }
862 
863    if (util_format_is_snorm(format)) {
864       for (int i = 0; i < 3; i++)
865          color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
866    }
867 
868    return color;
869 }
870 
871 template <chip CHIP>
872 static void
fd6_clear_buffer(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned offset,unsigned size,const void * clear_value,int clear_value_size)873 fd6_clear_buffer(struct pipe_context *pctx,
874                  struct pipe_resource *prsc,
875                  unsigned offset, unsigned size,
876                  const void *clear_value, int clear_value_size)
877 {
878    enum pipe_format dst_fmt;
879    union pipe_color_union color;
880 
881    switch (clear_value_size) {
882    case 16:
883       dst_fmt = PIPE_FORMAT_R32G32B32A32_UINT;
884       memcpy(&color.ui, clear_value, 16);
885       break;
886    case 8:
887       dst_fmt = PIPE_FORMAT_R32G32_UINT;
888       memcpy(&color.ui, clear_value, 8);
889       memset(&color.ui[2], 0, 8);
890       break;
891    case 4:
892       dst_fmt = PIPE_FORMAT_R32_UINT;
893       memcpy(&color.ui, clear_value, 4);
894       memset(&color.ui[1], 0, 12);
895       break;
896    case 2:
897       dst_fmt = PIPE_FORMAT_R16_UINT;
898       color.ui[0] = *(unsigned short *)clear_value;
899       memset(&color.ui[1], 0, 12);
900       break;
901    case 1:
902       dst_fmt = PIPE_FORMAT_R8_UINT;
903       color.ui[0] = *(unsigned char *)clear_value;
904       memset(&color.ui[1], 0, 12);
905       break;
906    default:
907       dst_fmt = PIPE_FORMAT_NONE;
908       break;
909    }
910 
911    /* unsupported clear_value_size and when alignment doesn't match fallback */
912    if ((dst_fmt == PIPE_FORMAT_NONE) || (offset % clear_value_size)) {
913       u_default_clear_buffer(pctx, prsc, offset, size, clear_value, clear_value_size);
914       return;
915    }
916 
917    if (DEBUG_BLIT) {
918       fprintf(stderr, "buffer clear:\ndst resource: ");
919       util_dump_resource(stderr, prsc);
920       fprintf(stderr, "\n");
921    }
922 
923    struct fd_context *ctx = fd_context(pctx);
924    struct fd_resource *rsc = fd_resource(prsc);
925    struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
926    struct fd_ringbuffer *ring = batch->draw;
927 
928    fd_screen_lock(ctx->screen);
929    fd_batch_resource_write(batch, rsc);
930    fd_screen_unlock(ctx->screen);
931 
932    assert(!batch->flushed);
933 
934    /* Marking the batch as needing flush must come after the batch
935     * dependency tracking (resource_read()/resource_write()), as that
936     * can trigger a flush
937     */
938    fd_batch_needs_flush(batch);
939 
940    fd_batch_update_queries(batch);
941 
942    emit_setup<CHIP>(batch);
943 
944    emit_clear_color(ring, dst_fmt, &color);
945    emit_blit_setup<CHIP>(ring, dst_fmt, false, &color, 0, ROTATE_0);
946 
947    unsigned dshift = (offset / clear_value_size) & 0x3f;
948    for (unsigned part_offset = 0; part_offset < size; part_offset += (0x4000 - 0x40)) {
949       unsigned doff = (offset + part_offset) & ~0x3f;
950 
951       unsigned w = MIN2((size - part_offset) / clear_value_size, (0x4000 - 0x40));
952 
953       emit_blit_buffer_dst(ring, rsc, doff, 0, fd6_color_format(dst_fmt, TILE6_LINEAR));
954 
955       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
956       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
957       OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
958                         A6XX_GRAS_2D_DST_BR_Y(0));
959 
960       emit_blit_fini<CHIP>(ctx, ring);
961    }
962 
963    fd6_emit_flushes<CHIP>(batch->ctx, ring,
964                     FD6_FLUSH_CCU_COLOR |
965                     FD6_FLUSH_CCU_DEPTH |
966                     FD6_FLUSH_CACHE |
967                     FD6_WAIT_FOR_IDLE);
968 
969    fd_batch_flush(batch);
970    fd_batch_reference(&batch, NULL);
971 
972    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
973     * the ctx->batch may need to turn its queries back on.
974     */
975    fd_context_dirty(ctx, FD_DIRTY_QUERY);
976 }
977 
978 template <chip CHIP>
979 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,const struct pipe_box * box2d,union pipe_color_union * color,uint32_t unknown_8c01)980 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
981                   struct pipe_surface *psurf, const struct pipe_box *box2d,
982                   union pipe_color_union *color, uint32_t unknown_8c01)
983 {
984    if (DEBUG_BLIT) {
985       fprintf(stderr, "surface clear:\ndst resource: ");
986       util_dump_resource(stderr, psurf->texture);
987       fprintf(stderr, "\n");
988    }
989 
990    uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
991    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
992    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(box2d->x * nr_samples) |
993                      A6XX_GRAS_2D_DST_TL_Y(box2d->y));
994    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X((box2d->x + box2d->width) * nr_samples - 1) |
995                      A6XX_GRAS_2D_DST_BR_Y(box2d->y + box2d->height - 1));
996 
997    union pipe_color_union clear_color = convert_color(psurf->format, color);
998 
999    emit_clear_color(ring, psurf->format, &clear_color);
1000    emit_blit_setup<CHIP>(ring, psurf->format, false, &clear_color, unknown_8c01, ROTATE_0);
1001 
1002    for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
1003         i++) {
1004       emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
1005 
1006       emit_blit_fini<CHIP>(ctx, ring);
1007    }
1008 }
1009 FD_GENX(fd6_clear_surface);
1010 
1011 template <chip CHIP>
1012 static void
fd6_clear_texture(struct pipe_context * pctx,struct pipe_resource * prsc,unsigned level,const struct pipe_box * box,const void * data)1013 fd6_clear_texture(struct pipe_context *pctx, struct pipe_resource *prsc,
1014                   unsigned level, const struct pipe_box *box, const void *data)
1015    assert_dt
1016 {
1017    struct fd_context *ctx = fd_context(pctx);
1018    struct fd_resource *rsc = fd_resource(prsc);
1019 
1020    if (DEBUG_BLIT) {
1021       fprintf(stderr, "surface texture:\ndst resource: ");
1022       util_dump_resource(stderr, prsc);
1023       fprintf(stderr, "\n");
1024    }
1025 
1026    if (!can_do_clear(prsc, level, box)) {
1027       u_default_clear_texture(pctx, prsc, level, box, data);
1028       return;
1029    }
1030 
1031    union pipe_color_union color;
1032 
1033    if (util_format_is_depth_or_stencil(prsc->format)) {
1034       const struct util_format_description *desc =
1035              util_format_description(prsc->format);
1036       float depth = 0.0f;
1037       uint8_t stencil = 0;
1038 
1039       if (util_format_has_depth(desc))
1040          util_format_unpack_z_float(prsc->format, &depth, data, 1);
1041 
1042       if (util_format_has_stencil(desc))
1043          util_format_unpack_s_8uint(prsc->format, &stencil, data, 1);
1044 
1045       if (rsc->stencil)
1046          fd6_clear_texture<CHIP>(pctx, &rsc->stencil->b.b, level, box, &stencil);
1047 
1048       color.f[0] = depth;
1049       color.ui[1] = stencil;
1050    } else {
1051       util_format_unpack_rgba(prsc->format, color.ui, data, 1);
1052    }
1053 
1054    struct fd_batch *batch = fd_bc_alloc_batch(ctx, true);
1055 
1056    fd_screen_lock(ctx->screen);
1057    fd_batch_resource_write(batch, rsc);
1058    fd_screen_unlock(ctx->screen);
1059 
1060    assert(!batch->flushed);
1061 
1062    /* Marking the batch as needing flush must come after the batch
1063     * dependency tracking (resource_read()/resource_write()), as that
1064     * can trigger a flush
1065     */
1066    fd_batch_needs_flush(batch);
1067 
1068    fd_batch_update_queries(batch);
1069 
1070    emit_setup<CHIP>(batch);
1071 
1072    struct pipe_surface surf = {
1073          .format = prsc->format,
1074          .texture = prsc,
1075          .u = {
1076                .tex = {
1077                      .level = level,
1078                      .first_layer = box->z,
1079                      .last_layer = box->depth + box->z - 1,
1080                },
1081          },
1082    };
1083 
1084    fd6_clear_surface<CHIP>(ctx, batch->draw, &surf, box, &color, 0);
1085 
1086    fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
1087                           FD6_FLUSH_CCU_COLOR |
1088                           FD6_FLUSH_CCU_DEPTH |
1089                           FD6_FLUSH_CACHE |
1090                           FD6_WAIT_FOR_IDLE);
1091 
1092    fd_batch_flush(batch);
1093    fd_batch_reference(&batch, NULL);
1094 
1095    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1096     * the ctx->batch may need to turn its queries back on.
1097     */
1098    fd_context_dirty(ctx, FD_DIRTY_QUERY);
1099 }
1100 
1101 template <chip CHIP>
1102 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)1103 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
1104                  uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
1105 {
1106    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1107    uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
1108    uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
1109                          util_format_get_blocksize(psurf->format);
1110 
1111    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
1112    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
1113    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
1114                      A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
1115 
1116    OUT_REG(ring,
1117            A6XX_GRAS_2D_SRC_TL_X(0),
1118            A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1),
1119            A6XX_GRAS_2D_SRC_TL_Y(0),
1120            A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1),
1121    );
1122 
1123    /* Enable scissor bit, which will take into account the window scissor
1124     * which is set per-tile
1125     */
1126    emit_blit_setup<CHIP>(ring, psurf->format, true, NULL, unknown_8c01, ROTATE_0);
1127 
1128    /* We shouldn't be using GMEM in the layered rendering case: */
1129    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1130 
1131    emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
1132                  psurf->u.tex.first_layer);
1133 
1134    enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
1135    enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
1136 
1137    OUT_REG(ring,
1138            SP_PS_2D_SRC_INFO(
1139                  CHIP,
1140                  .color_format = sfmt,
1141                  .tile_mode = TILE6_2,
1142                  .color_swap = WZYX,
1143                  .srgb = util_format_is_srgb(psurf->format),
1144                  .samples = samples,
1145                  .samples_average = samples > MSAA_ONE,
1146                  .unk20 = true,
1147                  .unk22 = true,
1148            ),
1149            SP_PS_2D_SRC_SIZE(
1150                  CHIP,
1151                  .width = psurf->width,
1152                  .height = psurf->height,
1153            ),
1154            SP_PS_2D_SRC(
1155                  CHIP,
1156                  .qword = gmem_base,
1157            ),
1158            SP_PS_2D_SRC_PITCH(
1159                  CHIP,
1160                  .pitch = gmem_pitch,
1161            ),
1162    );
1163 
1164    /* sync GMEM writes with CACHE. */
1165    fd6_cache_inv<CHIP>(batch->ctx, ring);
1166 
1167    /* Wait for CACHE_INVALIDATE to land */
1168    OUT_WFI5(ring);
1169 
1170    OUT_PKT7(ring, CP_BLIT, 1);
1171    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
1172 
1173    OUT_WFI5(ring);
1174 
1175    /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
1176     * sysmem, and we generally assume that GMEM renderpasses leave their
1177     * results in sysmem, so we need to flush manually here.
1178     */
1179    fd6_emit_flushes<CHIP>(batch->ctx, ring,
1180                           FD6_FLUSH_CCU_COLOR | FD6_WAIT_FOR_IDLE);
1181 }
1182 FD_GENX(fd6_resolve_tile);
1183 
1184 template <chip CHIP>
1185 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1186 handle_rgba_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1187    assert_dt
1188 {
1189    struct fd_batch *batch;
1190 
1191    assert(!(info->mask & PIPE_MASK_ZS));
1192 
1193    if (!can_do_blit(info))
1194       return false;
1195 
1196    struct fd_resource *src = fd_resource(info->src.resource);
1197    struct fd_resource *dst = fd_resource(info->dst.resource);
1198 
1199    fd6_validate_format(ctx, src, info->src.format);
1200    fd6_validate_format(ctx, dst, info->dst.format);
1201 
1202    batch = fd_bc_alloc_batch(ctx, true);
1203 
1204    fd_screen_lock(ctx->screen);
1205 
1206    fd_batch_resource_read(batch, src);
1207    fd_batch_resource_write(batch, dst);
1208 
1209    fd_screen_unlock(ctx->screen);
1210 
1211    assert(!batch->flushed);
1212 
1213    /* Marking the batch as needing flush must come after the batch
1214     * dependency tracking (resource_read()/resource_write()), as that
1215     * can trigger a flush
1216     */
1217    fd_batch_needs_flush(batch);
1218 
1219    fd_batch_update_queries(batch);
1220 
1221    emit_setup<CHIP>(batch);
1222 
1223    DBG_BLIT(info, batch);
1224 
1225    trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
1226                     info->dst.resource->target);
1227 
1228    if ((info->src.resource->target == PIPE_BUFFER) &&
1229        (info->dst.resource->target == PIPE_BUFFER)) {
1230       assert(src->layout.tile_mode == TILE6_LINEAR);
1231       assert(dst->layout.tile_mode == TILE6_LINEAR);
1232       emit_blit_buffer<CHIP>(ctx, batch->draw, info);
1233    } else {
1234       /* I don't *think* we need to handle blits between buffer <-> !buffer */
1235       assert(info->src.resource->target != PIPE_BUFFER);
1236       assert(info->dst.resource->target != PIPE_BUFFER);
1237       emit_blit_texture<CHIP>(ctx, batch->draw, info);
1238    }
1239 
1240    trace_end_blit(&batch->trace, batch->draw);
1241 
1242    fd6_emit_flushes<CHIP>(batch->ctx, batch->draw,
1243                           FD6_FLUSH_CCU_COLOR |
1244                           FD6_FLUSH_CCU_DEPTH |
1245                           FD6_FLUSH_CACHE |
1246                           FD6_WAIT_FOR_IDLE);
1247 
1248    fd_batch_flush(batch);
1249    fd_batch_reference(&batch, NULL);
1250 
1251    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
1252     * the ctx->batch may need to turn its queries back on.
1253     */
1254    fd_context_dirty(ctx, FD_DIRTY_QUERY);
1255 
1256    return true;
1257 }
1258 
1259 /**
1260  * Re-written z/s blits can still fail for various reasons (for example MSAA).
1261  * But we want to do the fallback blit with the re-written pipe_blit_info,
1262  * in particular as u_blitter cannot blit stencil.  So handle the fallback
1263  * ourself and never "fail".
1264  */
1265 template <chip CHIP>
1266 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1267 do_rewritten_blit(struct fd_context *ctx, const struct pipe_blit_info *info)
1268    assert_dt
1269 {
1270    bool success = handle_rgba_blit<CHIP>(ctx, info);
1271    if (!success) {
1272       success = fd_blitter_blit(ctx, info);
1273    }
1274    assert(success); /* fallback should never fail! */
1275    return success;
1276 }
1277 
1278 /**
1279  * Handle depth/stencil blits either via u_blitter and/or re-writing the
1280  * blit into an equivilant format that we can handle
1281  */
1282 template <chip CHIP>
1283 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1284 handle_zs_blit(struct fd_context *ctx,
1285                const struct pipe_blit_info *info) assert_dt
1286 {
1287    struct pipe_blit_info blit = *info;
1288 
1289    if (DEBUG_BLIT) {
1290       fprintf(stderr, "---- handle_zs_blit: ");
1291       dump_blit_info(info);
1292    }
1293 
1294    fail_if(info->src.format != info->dst.format);
1295 
1296    struct fd_resource *src = fd_resource(info->src.resource);
1297    struct fd_resource *dst = fd_resource(info->dst.resource);
1298 
1299    switch (info->dst.format) {
1300    case PIPE_FORMAT_S8_UINT:
1301       assert(info->mask == PIPE_MASK_S);
1302       blit.mask = PIPE_MASK_R;
1303       blit.src.format = PIPE_FORMAT_R8_UINT;
1304       blit.dst.format = PIPE_FORMAT_R8_UINT;
1305       blit.sample0_only = true;
1306       return do_rewritten_blit<CHIP>(ctx, &blit);
1307 
1308    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1309       if (info->mask & PIPE_MASK_Z) {
1310          blit.mask = PIPE_MASK_R;
1311          blit.src.format = PIPE_FORMAT_R32_FLOAT;
1312          blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1313          blit.sample0_only = true;
1314          do_rewritten_blit<CHIP>(ctx, &blit);
1315       }
1316 
1317       if (info->mask & PIPE_MASK_S) {
1318          blit.mask = PIPE_MASK_R;
1319          blit.src.format = PIPE_FORMAT_R8_UINT;
1320          blit.dst.format = PIPE_FORMAT_R8_UINT;
1321          blit.src.resource = &src->stencil->b.b;
1322          blit.dst.resource = &dst->stencil->b.b;
1323          blit.sample0_only = true;
1324          do_rewritten_blit<CHIP>(ctx, &blit);
1325       }
1326 
1327       return true;
1328 
1329    case PIPE_FORMAT_Z16_UNORM:
1330       blit.mask = PIPE_MASK_R;
1331       blit.src.format = PIPE_FORMAT_R16_UNORM;
1332       blit.dst.format = PIPE_FORMAT_R16_UNORM;
1333       blit.sample0_only = true;
1334       return do_rewritten_blit<CHIP>(ctx, &blit);
1335 
1336    case PIPE_FORMAT_Z32_UNORM:
1337    case PIPE_FORMAT_Z32_FLOAT:
1338       assert(info->mask == PIPE_MASK_Z);
1339       blit.mask = PIPE_MASK_R;
1340       blit.src.format = PIPE_FORMAT_R32_UINT;
1341       blit.dst.format = PIPE_FORMAT_R32_UINT;
1342       blit.sample0_only = true;
1343       return do_rewritten_blit<CHIP>(ctx, &blit);
1344 
1345    case PIPE_FORMAT_Z24X8_UNORM:
1346    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1347       blit.mask = 0;
1348       if (info->mask & PIPE_MASK_Z)
1349          blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1350       if (info->mask & PIPE_MASK_S)
1351          blit.mask |= PIPE_MASK_A;
1352       blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1353       blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1354       /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1355        * 8888_unorm.
1356        */
1357       if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1358          if (!src->layout.ubwc && !dst->layout.ubwc) {
1359             blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1360             blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1361          } else {
1362             if (!src->layout.ubwc)
1363                blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1364             if (!dst->layout.ubwc)
1365                blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1366          }
1367       }
1368       if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1369          blit.sample0_only = true;
1370       return fd_blitter_blit(ctx, &blit);
1371 
1372    default:
1373       return false;
1374    }
1375 }
1376 
1377 template <chip CHIP>
1378 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1379 handle_compressed_blit(struct fd_context *ctx,
1380                        const struct pipe_blit_info *info) assert_dt
1381 {
1382    struct pipe_blit_info blit = *info;
1383 
1384    if (DEBUG_BLIT) {
1385       fprintf(stderr, "---- handle_compressed_blit: ");
1386       dump_blit_info(info);
1387    }
1388 
1389    if (info->src.format != info->dst.format)
1390       return fd_blitter_blit(ctx, info);
1391 
1392    if (util_format_get_blocksize(info->src.format) == 8) {
1393       blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1394    } else {
1395       assert(util_format_get_blocksize(info->src.format) == 16);
1396       blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1397    }
1398 
1399    int bw = util_format_get_blockwidth(info->src.format);
1400    int bh = util_format_get_blockheight(info->src.format);
1401 
1402    /* NOTE: x/y *must* be aligned to block boundary (ie. in
1403     * glCompressedTexSubImage2D()) but width/height may not
1404     * be:
1405     */
1406 
1407    assert((blit.src.box.x % bw) == 0);
1408    assert((blit.src.box.y % bh) == 0);
1409 
1410    blit.src.box.x /= bw;
1411    blit.src.box.y /= bh;
1412    blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1413    blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1414 
1415    assert((blit.dst.box.x % bw) == 0);
1416    assert((blit.dst.box.y % bh) == 0);
1417 
1418    blit.dst.box.x /= bw;
1419    blit.dst.box.y /= bh;
1420    blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1421    blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1422 
1423    return do_rewritten_blit<CHIP>(ctx, &blit);
1424 }
1425 
1426 /**
1427  * For SNORM formats, copy them as the equivalent UNORM format.  If we treat
1428  * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1429  * (also -1.0), when we're supposed to be memcpying the bits. See
1430  * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1431  */
1432 template <chip CHIP>
1433 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1434 handle_snorm_copy_blit(struct fd_context *ctx,
1435                        const struct pipe_blit_info *info)
1436    assert_dt
1437 {
1438    /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1439    fail_if(info->filter == PIPE_TEX_FILTER_LINEAR);
1440 
1441    struct pipe_blit_info blit = *info;
1442 
1443    blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1444 
1445    return do_rewritten_blit<CHIP>(ctx, &blit);
1446 }
1447 
1448 template <chip CHIP>
1449 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1450 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1451 {
1452    if (info->mask & PIPE_MASK_ZS)
1453       return handle_zs_blit<CHIP>(ctx, info);
1454 
1455    if (util_format_is_compressed(info->src.format) ||
1456        util_format_is_compressed(info->dst.format))
1457       return handle_compressed_blit<CHIP>(ctx, info);
1458 
1459    if ((info->src.format == info->dst.format) &&
1460        util_format_is_snorm(info->src.format))
1461       return handle_snorm_copy_blit<CHIP>(ctx, info);
1462 
1463    return handle_rgba_blit<CHIP>(ctx, info);
1464 }
1465 
1466 template <chip CHIP>
1467 void
fd6_blitter_init(struct pipe_context * pctx)1468 fd6_blitter_init(struct pipe_context *pctx)
1469    disable_thread_safety_analysis
1470 {
1471    struct fd_context *ctx = fd_context(pctx);
1472 
1473    ctx->clear_ubwc = fd6_clear_ubwc<CHIP>;
1474    ctx->validate_format = fd6_validate_format;
1475 
1476    if (FD_DBG(NOBLIT))
1477       return;
1478 
1479    pctx->clear_buffer = fd6_clear_buffer<CHIP>;
1480    pctx->clear_texture = fd6_clear_texture<CHIP>;
1481    ctx->blit = fd6_blit<CHIP>;
1482 }
1483 FD_GENX(fd6_blitter_init);
1484 
1485 unsigned
fd6_tile_mode_for_format(enum pipe_format pfmt)1486 fd6_tile_mode_for_format(enum pipe_format pfmt)
1487 {
1488    /* basically just has to be a format we can blit, so uploads/downloads
1489     * via linear staging buffer works:
1490     */
1491    if (ok_format(pfmt))
1492       return TILE6_3;
1493 
1494    return TILE6_LINEAR;
1495 }
1496 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1497 fd6_tile_mode(const struct pipe_resource *tmpl)
1498 {
1499    /* if the mipmap level 0 is still too small to be tiled, then don't
1500     * bother pretending:
1501     */
1502    if ((tmpl->width0 < FDL_MIN_UBWC_WIDTH) &&
1503          !util_format_is_depth_or_stencil(tmpl->format))
1504       return TILE6_LINEAR;
1505 
1506    return fd6_tile_mode_for_format(tmpl->format);
1507 }
1508