xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_resource.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 #include "drm-uapi/drm_fourcc.h"
13 
14 #include "a6xx/fd6_blitter.h"
15 #include "fd6_resource.h"
16 #include "fdl/fd6_format_table.h"
17 #include "common/freedreno_lrz.h"
18 #include "common/freedreno_ubwc.h"
19 
20 #include "a6xx.xml.h"
21 
22 /* A subset of the valid tiled formats can be compressed.  We do
23  * already require tiled in order to be compressed, but just because
24  * it can be tiled doesn't mean it can be compressed.
25  */
26 static bool
ok_ubwc_format(struct pipe_screen * pscreen,enum pipe_format pfmt)27 ok_ubwc_format(struct pipe_screen *pscreen, enum pipe_format pfmt)
28 {
29    const struct fd_dev_info *info = fd_screen(pscreen)->info;
30 
31    switch (pfmt) {
32    case PIPE_FORMAT_X24S8_UINT:
33    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
34       /* We can't sample stencil with UBWC on a630, and we may need to be able
35        * to sample stencil at some point.  We can't just use
36        * fd_resource_uncompress() at the point of stencil sampling because
37        * that itself uses stencil sampling in the fd_blitter_blit path.
38        */
39       return info->a6xx.has_z24uint_s8uint;
40 
41    case PIPE_FORMAT_R8_G8B8_420_UNORM:
42       /* The difference between NV12 and R8_G8B8_420_UNORM is only where the
43        * conversion to RGB happens, with the latter it happens _after_ the
44        * texture samp instruction.  But dri2_get_mapping_by_fourcc() doesn't
45        * know this, so it asks for NV12 when it really meant to ask for
46        * R8_G8B8_420_UNORM.  Just treat them the same here to work around it:
47        */
48    case PIPE_FORMAT_NV12:
49       return true;
50 
51    default:
52       break;
53    }
54 
55    /* In copy_format, we treat snorm as unorm to avoid clamping.  But snorm
56     * and unorm are UBWC incompatible for special values such as all 0's or
57     * all 1's prior to a740.  Disable UBWC for snorm.
58     */
59    if (util_format_is_snorm(pfmt) &&
60        !info->a7xx.ubwc_unorm_snorm_int_compatible)
61       return false;
62 
63    /* A690 seem to have broken UBWC for depth/stencil, it requires
64     * depth flushing where we cannot realistically place it, like between
65     * ordinary draw calls writing read/depth. WSL blob seem to use ubwc
66     * sometimes for depth/stencil.
67     */
68    if (info->a6xx.broken_ds_ubwc_quirk &&
69        util_format_is_depth_or_stencil(pfmt))
70       return false;
71 
72    switch (fd6_color_format(pfmt, TILE6_LINEAR)) {
73    case FMT6_10_10_10_2_UINT:
74    case FMT6_10_10_10_2_UNORM_DEST:
75    case FMT6_11_11_10_FLOAT:
76    case FMT6_16_FLOAT:
77    case FMT6_16_16_16_16_FLOAT:
78    case FMT6_16_16_16_16_SINT:
79    case FMT6_16_16_16_16_UINT:
80    case FMT6_16_16_FLOAT:
81    case FMT6_16_16_SINT:
82    case FMT6_16_16_UINT:
83    case FMT6_16_SINT:
84    case FMT6_16_UINT:
85    case FMT6_32_32_32_32_SINT:
86    case FMT6_32_32_32_32_UINT:
87    case FMT6_32_32_SINT:
88    case FMT6_32_32_UINT:
89    case FMT6_5_6_5_UNORM:
90    case FMT6_5_5_5_1_UNORM:
91    case FMT6_8_8_8_8_SINT:
92    case FMT6_8_8_8_8_UINT:
93    case FMT6_8_8_8_8_UNORM:
94    case FMT6_8_8_8_X8_UNORM:
95    case FMT6_8_8_SINT:
96    case FMT6_8_8_UINT:
97    case FMT6_8_8_UNORM:
98    case FMT6_Z24_UNORM_S8_UINT:
99    case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
100       return true;
101    case FMT6_8_UNORM:
102       return info->a6xx.has_8bpp_ubwc;
103    default:
104       return false;
105    }
106 }
107 
108 static bool
can_do_ubwc(struct pipe_resource * prsc)109 can_do_ubwc(struct pipe_resource *prsc)
110 {
111    /* limit things to simple single level 2d for now: */
112    if ((prsc->depth0 != 1) || (prsc->array_size != 1) ||
113        (prsc->last_level != 0))
114       return false;
115    if (prsc->target != PIPE_TEXTURE_2D)
116       return false;
117    if (!ok_ubwc_format(prsc->screen, prsc->format))
118       return false;
119    return true;
120 }
121 
122 static bool
is_z24s8(enum pipe_format format)123 is_z24s8(enum pipe_format format)
124 {
125    switch (format) {
126    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
127    case PIPE_FORMAT_Z24X8_UNORM:
128    case PIPE_FORMAT_X24S8_UINT:
129    case PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
130       return true;
131    default:
132       return false;
133    }
134 }
135 
136 static bool
valid_ubwc_format_cast(struct fd_resource * rsc,enum pipe_format format)137 valid_ubwc_format_cast(struct fd_resource *rsc, enum pipe_format format)
138 {
139    const struct fd_dev_info *info = fd_screen(rsc->b.b.screen)->info;
140    enum pipe_format orig_format = rsc->b.b.format;
141 
142    assert(rsc->layout.ubwc);
143 
144    /* Special case "casting" format in hw: */
145    if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)
146       return true;
147 
148    /* If we support z24s8 ubwc then allow casts between the various
149     * permutations of z24s8:
150     */
151    if (info->a6xx.has_z24uint_s8uint && is_z24s8(format) && is_z24s8(orig_format))
152       return true;
153 
154    enum fd6_ubwc_compat_type type = fd6_ubwc_compat_mode(info, orig_format);
155    if (type == FD6_UBWC_UNKNOWN_COMPAT)
156       return false;
157 
158    return fd6_ubwc_compat_mode(info, format) == type;
159 }
160 
161 /**
162  * R8G8 have a different block width/height and height alignment from other
163  * formats that would normally be compatible (like R16), and so if we are
164  * trying to, for example, sample R16 as R8G8 we need to demote to linear.
165  */
166 static bool
is_r8g8(enum pipe_format format)167 is_r8g8(enum pipe_format format)
168 {
169    return (util_format_get_blocksize(format) == 2) &&
170          (util_format_get_nr_components(format) == 2);
171 }
172 
173 /**
174  * Can a rsc as it is currently laid out be accessed as the specified format.
175  * Returns whether the access is ok or whether the rsc needs to be demoted
176  * to uncompressed tiled or linear.
177  */
178 enum fd6_format_status
fd6_check_valid_format(struct fd_resource * rsc,enum pipe_format format)179 fd6_check_valid_format(struct fd_resource *rsc, enum pipe_format format)
180 {
181    enum pipe_format orig_format = rsc->b.b.format;
182 
183    if (orig_format == format)
184       return FORMAT_OK;
185 
186    if (rsc->layout.tile_mode && (is_r8g8(orig_format) != is_r8g8(format)))
187       return DEMOTE_TO_LINEAR;
188 
189    if (!rsc->layout.ubwc)
190       return FORMAT_OK;
191 
192    if (ok_ubwc_format(rsc->b.b.screen, format) &&
193        valid_ubwc_format_cast(rsc, format))
194       return FORMAT_OK;
195 
196    return DEMOTE_TO_TILED;
197 }
198 
199 /**
200  * Ensure the rsc is in an ok state to be used with the specified format.
201  * This handles the case of UBWC buffers used with non-UBWC compatible
202  * formats, by triggering an uncompress.
203  */
204 void
fd6_validate_format(struct fd_context * ctx,struct fd_resource * rsc,enum pipe_format format)205 fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
206                     enum pipe_format format)
207 {
208    tc_assert_driver_thread(ctx->tc);
209 
210    switch (fd6_check_valid_format(rsc, format)) {
211    case FORMAT_OK:
212       return;
213    case DEMOTE_TO_LINEAR:
214       perf_debug_ctx(ctx,
215                      "%" PRSC_FMT ": demoted to linear+uncompressed due to use as %s",
216                      PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
217 
218       fd_resource_uncompress(ctx, rsc, true);
219       return;
220    case DEMOTE_TO_TILED:
221       perf_debug_ctx(ctx,
222                      "%" PRSC_FMT ": demoted to uncompressed due to use as %s",
223                      PRSC_ARGS(&rsc->b.b), util_format_short_name(format));
224 
225       fd_resource_uncompress(ctx, rsc, false);
226       return;
227    }
228 }
229 
230 template <chip CHIP>
231 static void
setup_lrz(struct fd_resource * rsc)232 setup_lrz(struct fd_resource *rsc)
233 {
234    struct fd_screen *screen = fd_screen(rsc->b.b.screen);
235    unsigned width0 = rsc->b.b.width0;
236    unsigned height0 = rsc->b.b.height0;
237 
238    /* LRZ buffer is super-sampled: */
239    switch (rsc->b.b.nr_samples) {
240    case 4:
241       width0 *= 2;
242       FALLTHROUGH;
243    case 2:
244       height0 *= 2;
245    }
246 
247    unsigned lrz_pitch = align(DIV_ROUND_UP(width0, 8), 32);
248    unsigned lrz_height = align(DIV_ROUND_UP(height0, 8), 16);
249 
250    rsc->lrz_height = lrz_height;
251    rsc->lrz_width = lrz_pitch;
252    rsc->lrz_pitch = lrz_pitch;
253 
254    unsigned lrz_size = lrz_pitch * lrz_height * 2;
255 
256    unsigned nblocksx = DIV_ROUND_UP(DIV_ROUND_UP(width0, 8), 16);
257    unsigned nblocksy = DIV_ROUND_UP(DIV_ROUND_UP(height0, 8), 4);
258 
259    /* Fast-clear buffer is 1bit/block */
260    unsigned lrz_fc_size = DIV_ROUND_UP(nblocksx * nblocksy, 8);
261 
262    /* Fast-clear buffer cannot be larger than 512 bytes on A6XX and 1024 bytes
263     * on A7XX (HW limitation)
264     */
265    bool has_lrz_fc = screen->info->a6xx.enable_lrz_fast_clear &&
266                      lrz_fc_size <= fd_lrzfc_layout<CHIP>::FC_SIZE;
267 
268    /* Allocate a LRZ fast-clear buffer even if we aren't using FC, if the
269     * hw is re-using this buffer for direction tracking
270     */
271    if (has_lrz_fc || screen->info->a6xx.has_lrz_dir_tracking) {
272       rsc->lrz_fc_offset = lrz_size;
273       lrz_size += sizeof(fd_lrzfc_layout<CHIP>);
274    }
275 
276    rsc->lrz = fd_bo_new(screen->dev, lrz_size, FD_BO_NOMAP, "lrz");
277 }
278 
279 template <chip CHIP>
280 static uint32_t
fd6_setup_slices(struct fd_resource * rsc)281 fd6_setup_slices(struct fd_resource *rsc)
282 {
283    struct pipe_resource *prsc = &rsc->b.b;
284 
285    if (!FD_DBG(NOLRZ) && has_depth(prsc->format) && !is_z32(prsc->format))
286       setup_lrz<CHIP>(rsc);
287 
288    if (rsc->layout.ubwc && !ok_ubwc_format(prsc->screen, prsc->format))
289       rsc->layout.ubwc = false;
290 
291    fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
292                prsc->width0, prsc->height0, prsc->depth0, prsc->last_level + 1,
293                prsc->array_size, prsc->target == PIPE_TEXTURE_3D, NULL);
294 
295    return rsc->layout.size;
296 }
297 
298 static int
fill_ubwc_buffer_sizes(struct fd_resource * rsc)299 fill_ubwc_buffer_sizes(struct fd_resource *rsc)
300 {
301    struct pipe_resource *prsc = &rsc->b.b;
302    struct fdl_explicit_layout l = {
303       .offset = rsc->layout.slices[0].offset,
304       .pitch = rsc->layout.pitch0,
305    };
306 
307    if (!can_do_ubwc(prsc))
308       return -1;
309 
310    rsc->layout.ubwc = true;
311    rsc->layout.tile_mode = TILE6_3;
312 
313    if (!fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
314                     prsc->width0, prsc->height0, prsc->depth0,
315                     prsc->last_level + 1, prsc->array_size, false, &l))
316       return -1;
317 
318    if (rsc->layout.size > fd_bo_size(rsc->bo))
319       return -1;
320 
321    return 0;
322 }
323 
324 static int
fd6_layout_resource_for_modifier(struct fd_resource * rsc,uint64_t modifier)325 fd6_layout_resource_for_modifier(struct fd_resource *rsc, uint64_t modifier)
326 {
327    switch (modifier) {
328    case DRM_FORMAT_MOD_QCOM_COMPRESSED:
329       return fill_ubwc_buffer_sizes(rsc);
330    case DRM_FORMAT_MOD_LINEAR:
331       if (can_do_ubwc(&rsc->b.b)) {
332          perf_debug("%" PRSC_FMT
333                     ": not UBWC: imported with DRM_FORMAT_MOD_LINEAR!",
334                     PRSC_ARGS(&rsc->b.b));
335       }
336       return 0;
337    case DRM_FORMAT_MOD_QCOM_TILED3:
338       rsc->layout.tile_mode = fd6_tile_mode(&rsc->b.b);
339       FALLTHROUGH;
340    case DRM_FORMAT_MOD_INVALID:
341       /* For now, without buffer metadata, we must assume that buffers
342        * imported with INVALID modifier are linear
343        */
344       if (can_do_ubwc(&rsc->b.b)) {
345          perf_debug("%" PRSC_FMT
346                     ": not UBWC: imported with DRM_FORMAT_MOD_INVALID!",
347                     PRSC_ARGS(&rsc->b.b));
348       }
349       return 0;
350    default:
351       return -1;
352    }
353 }
354 
355 static bool
fd6_is_format_supported(struct pipe_screen * pscreen,enum pipe_format fmt,uint64_t modifier)356 fd6_is_format_supported(struct pipe_screen *pscreen,
357                         enum pipe_format fmt,
358                         uint64_t modifier)
359 {
360    switch (modifier) {
361    case DRM_FORMAT_MOD_LINEAR:
362       return true;
363    case DRM_FORMAT_MOD_QCOM_COMPRESSED:
364       return ok_ubwc_format(pscreen, fmt);
365    case DRM_FORMAT_MOD_QCOM_TILED3:
366       return fd6_tile_mode_for_format(fmt) == TILE6_3;
367    default:
368       return false;
369    }
370 }
371 
372 template <chip CHIP>
373 void
fd6_resource_screen_init(struct pipe_screen * pscreen)374 fd6_resource_screen_init(struct pipe_screen *pscreen)
375 {
376    struct fd_screen *screen = fd_screen(pscreen);
377 
378    screen->setup_slices = fd6_setup_slices<CHIP>;
379    screen->layout_resource_for_modifier = fd6_layout_resource_for_modifier;
380    screen->is_format_supported = fd6_is_format_supported;
381 }
382 FD_GENX(fd6_resource_screen_init);
383