xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/radeonsi/si_texture.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2010 Jerome Glisse <[email protected]>
3  * Copyright 2018 Advanced Micro Devices, Inc.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "drm-uapi/drm_fourcc.h"
9 #include "si_pipe.h"
10 #include "si_query.h"
11 #include "sid.h"
12 #include "frontend/drm_driver.h"
13 #include "util/format/u_format.h"
14 #include "util/os_time.h"
15 #include "util/u_log.h"
16 #include "util/u_memory.h"
17 #include "util/u_pack_color.h"
18 #include "util/u_resource.h"
19 #include "util/u_surface.h"
20 #include "util/u_transfer.h"
21 
22 #include <errno.h>
23 #include <inttypes.h>
24 
25 #include "amd/addrlib/inc/addrinterface.h"
26 #include "ac_formats.h"
27 
28 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
29                                               const struct pipe_resource *templ,
30                                               bool tc_compatible_htile);
31 
32 static bool si_texture_is_aux_plane(const struct pipe_resource *resource);
33 
34 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
si_copy_region_with_blit(struct pipe_context * pipe,struct pipe_resource * dst,unsigned dst_level,unsigned dst_sample,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)35 static void si_copy_region_with_blit(struct pipe_context *pipe, struct pipe_resource *dst,
36                                      unsigned dst_level, unsigned dst_sample, unsigned dstx, unsigned dsty,
37                                      unsigned dstz, struct pipe_resource *src, unsigned src_level,
38                                      const struct pipe_box *src_box)
39 {
40    struct pipe_blit_info blit;
41 
42    memset(&blit, 0, sizeof(blit));
43    blit.src.resource = src;
44    blit.src.format = src->format;
45    blit.src.level = src_level;
46    blit.src.box = *src_box;
47    blit.dst.resource = dst;
48    blit.dst.format = dst->format;
49    blit.dst.level = dst_level;
50    blit.dst.box.x = dstx;
51    blit.dst.box.y = dsty;
52    blit.dst.box.z = dstz;
53    blit.dst.box.width = src_box->width;
54    blit.dst.box.height = src_box->height;
55    blit.dst.box.depth = src_box->depth;
56    blit.mask = util_format_get_mask(dst->format);
57    blit.filter = PIPE_TEX_FILTER_NEAREST;
58    blit.dst_sample = dst_sample;
59 
60    if (blit.mask) {
61       /* Only the gfx blit handles dst_sample. */
62       if (dst_sample)
63          si_gfx_blit(pipe, &blit);
64       else
65          pipe->blit(pipe, &blit);
66    }
67 }
68 
69 /* Copy all planes of multi-plane texture */
si_copy_multi_plane_texture(struct pipe_context * ctx,struct pipe_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct pipe_resource * src,unsigned src_level,const struct pipe_box * src_box)70 static bool si_copy_multi_plane_texture(struct pipe_context *ctx, struct pipe_resource *dst,
71                                         unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,
72                                         struct pipe_resource *src, unsigned src_level,
73                                         const struct pipe_box *src_box)
74 {
75    unsigned i, dx, dy;
76    struct si_texture *src_tex = (struct si_texture *)src;
77    struct si_texture *dst_tex = (struct si_texture *)dst;
78    struct pipe_box sbox;
79 
80    if (src_tex->multi_plane_format == PIPE_FORMAT_NONE || src_tex->plane_index != 0)
81       return false;
82 
83    assert(src_tex->multi_plane_format == dst_tex->multi_plane_format);
84    assert(dst_tex->plane_index == 0 && src_tex->num_planes == dst_tex->num_planes);
85 
86    sbox = *src_box;
87 
88    for (i = 0; i < src_tex->num_planes && src && dst; ++i) {
89       dx = util_format_get_plane_width(src_tex->multi_plane_format, i, dstx);
90       dy = util_format_get_plane_height(src_tex->multi_plane_format, i, dsty);
91       sbox.x = util_format_get_plane_width(src_tex->multi_plane_format, i, src_box->x);
92       sbox.y = util_format_get_plane_height(src_tex->multi_plane_format, i, src_box->y);
93       sbox.width = util_format_get_plane_width(src_tex->multi_plane_format, i, src_box->width);
94       sbox.height = util_format_get_plane_height(src_tex->multi_plane_format, i, src_box->height);
95 
96       si_resource_copy_region(ctx, dst, dst_level, dx, dy, dstz, src, src_level, &sbox);
97 
98       src = src->next;
99       dst = dst->next;
100    }
101 
102    return true;
103 }
104 
105 /* Copy from a full GPU texture to a transfer's staging one. */
si_copy_to_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)106 static void si_copy_to_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
107 {
108    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
109    struct pipe_resource *dst = &stransfer->staging->b.b;
110    struct pipe_resource *src = transfer->resource;
111    /* level means sample_index - 1 with MSAA. Used by texture uploads. */
112    unsigned src_level = src->nr_samples > 1 ? 0 : transfer->level;
113 
114    if (src->nr_samples > 1 || ((struct si_texture *)src)->is_depth) {
115       si_copy_region_with_blit(ctx, dst, 0, 0, 0, 0, 0, src, src_level, &transfer->box);
116       return;
117    }
118 
119    if (si_copy_multi_plane_texture(ctx, dst, 0, 0, 0, 0, src, src_level, &transfer->box))
120       return;
121 
122    si_resource_copy_region(ctx, dst, 0, 0, 0, 0, src, src_level, &transfer->box);
123 }
124 
125 /* Copy from a transfer's staging texture to a full GPU one. */
si_copy_from_staging_texture(struct pipe_context * ctx,struct si_transfer * stransfer)126 static void si_copy_from_staging_texture(struct pipe_context *ctx, struct si_transfer *stransfer)
127 {
128    struct pipe_transfer *transfer = (struct pipe_transfer *)stransfer;
129    struct pipe_resource *dst = transfer->resource;
130    struct pipe_resource *src = &stransfer->staging->b.b;
131    struct pipe_box sbox;
132 
133    u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
134 
135    if (dst->nr_samples > 1 || ((struct si_texture *)dst)->is_depth) {
136       unsigned dst_level = dst->nr_samples > 1 ? 0 : transfer->level;
137       unsigned dst_sample = dst->nr_samples > 1 ? transfer->level : 0;
138 
139       si_copy_region_with_blit(ctx, dst, dst_level, dst_sample, transfer->box.x, transfer->box.y,
140                                transfer->box.z, src, 0, &sbox);
141       return;
142    }
143 
144    if (si_copy_multi_plane_texture(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
145                                    transfer->box.z, src, 0, &sbox))
146       return;
147 
148    if (util_format_is_compressed(dst->format)) {
149       sbox.width = util_format_get_nblocksx(dst->format, sbox.width);
150       sbox.height = util_format_get_nblocksx(dst->format, sbox.height);
151    }
152 
153    si_resource_copy_region(ctx, dst, transfer->level, transfer->box.x, transfer->box.y,
154                            transfer->box.z, src, 0, &sbox);
155 }
156 
si_texture_get_offset(struct si_screen * sscreen,struct si_texture * tex,unsigned level,const struct pipe_box * box,unsigned * stride,uintptr_t * layer_stride)157 static uint64_t si_texture_get_offset(struct si_screen *sscreen, struct si_texture *tex,
158                                       unsigned level, const struct pipe_box *box, unsigned *stride,
159                                       uintptr_t *layer_stride)
160 {
161    if (sscreen->info.gfx_level >= GFX9) {
162       unsigned pitch;
163       if (tex->surface.is_linear) {
164          pitch = tex->surface.u.gfx9.pitch[level];
165       } else {
166          pitch = tex->surface.u.gfx9.surf_pitch;
167       }
168 
169       *stride = pitch * tex->surface.bpe;
170       *layer_stride = tex->surface.u.gfx9.surf_slice_size;
171 
172       if (!box)
173          return 0;
174 
175       /* Each texture is an array of slices. Each slice is an array
176        * of mipmap levels. */
177       return tex->surface.u.gfx9.surf_offset + box->z * tex->surface.u.gfx9.surf_slice_size +
178              tex->surface.u.gfx9.offset[level] +
179              (box->y / tex->surface.blk_h * (uint64_t)pitch + box->x / tex->surface.blk_w) *
180              tex->surface.bpe;
181    } else {
182       *stride = tex->surface.u.legacy.level[level].nblk_x * tex->surface.bpe;
183       assert((uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
184       *layer_stride = (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4;
185 
186       if (!box)
187          return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256;
188 
189       /* Each texture is an array of mipmap levels. Each level is
190        * an array of slices. */
191       return (uint64_t)tex->surface.u.legacy.level[level].offset_256B * 256 +
192              box->z * (uint64_t)tex->surface.u.legacy.level[level].slice_size_dw * 4 +
193              (box->y / tex->surface.blk_h * tex->surface.u.legacy.level[level].nblk_x +
194               box->x / tex->surface.blk_w) *
195                 tex->surface.bpe;
196    }
197 }
198 
si_init_surface(struct si_screen * sscreen,struct radeon_surf * surface,const struct pipe_resource * ptex,enum radeon_surf_mode array_mode,uint64_t modifier,bool is_imported,bool is_scanout,bool is_flushed_depth,bool tc_compatible_htile)199 static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface,
200                            const struct pipe_resource *ptex, enum radeon_surf_mode array_mode,
201                            uint64_t modifier, bool is_imported, bool is_scanout,
202                            bool is_flushed_depth, bool tc_compatible_htile)
203 {
204    const struct util_format_description *desc = util_format_description(ptex->format);
205    bool is_depth = util_format_has_depth(desc);
206    bool is_stencil = util_format_has_stencil(desc);
207    int r;
208    unsigned bpe;
209    uint64_t flags = 0;
210 
211    if (!is_flushed_depth && ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
212       bpe = 4; /* stencil is allocated separately */
213    } else {
214       bpe = util_format_get_blocksize(ptex->format);
215       assert(util_is_power_of_two_or_zero(bpe));
216    }
217 
218    if (sscreen->info.gfx_level >= GFX12) {
219       if (!is_flushed_depth && is_depth) {
220          flags |= RADEON_SURF_ZBUFFER;
221          if (is_stencil)
222             flags |= RADEON_SURF_SBUFFER;
223 
224          if (sscreen->debug_flags & DBG(NO_HYPERZ) ||
225              ptex->flags & PIPE_RESOURCE_FLAG_SPARSE)
226             flags |= RADEON_SURF_NO_HTILE;
227       }
228 
229       /* TODO: Set these for scanout after display DCC is enabled. The reason these are not set is
230        * because they overlap DCC_OFFSET_256B and the kernel driver incorrectly reads DCC_OFFSET_256B
231        * on GFX12, which completely breaks the display code.
232        */
233       if (!is_imported && !(ptex->bind & PIPE_BIND_SCANOUT)) {
234          enum pipe_format format = util_format_get_depth_only(ptex->format);
235 
236          /* These should be set for both color and Z/S. */
237          surface->u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format);
238          surface->u.gfx9.color.dcc_data_format = ac_get_cb_format(sscreen->info.gfx_level, format);
239       }
240 
241       if (surface->modifier == DRM_FORMAT_MOD_INVALID &&
242           (ptex->bind & PIPE_BIND_CONST_BW ||
243            ptex->bind & PIPE_BIND_PROTECTED ||
244            sscreen->debug_flags & DBG(NO_DCC) ||
245            (ptex->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_DCC))))
246          flags |= RADEON_SURF_DISABLE_DCC;
247    } else {
248       /* Gfx6-11 */
249       if (!is_flushed_depth && is_depth) {
250          flags |= RADEON_SURF_ZBUFFER;
251 
252          if ((sscreen->debug_flags & DBG(NO_HYPERZ)) ||
253              (ptex->bind & PIPE_BIND_SHARED) || is_imported) {
254             flags |= RADEON_SURF_NO_HTILE;
255          } else if (tc_compatible_htile &&
256                     (sscreen->info.gfx_level >= GFX9 || array_mode == RADEON_SURF_MODE_2D)) {
257             /* TC-compatible HTILE only supports Z32_FLOAT.
258              * GFX9 also supports Z16_UNORM.
259              * On GFX8, promote Z16 to Z32. DB->CB copies will convert
260              * the format for transfers.
261              */
262             if (sscreen->info.gfx_level == GFX8)
263                bpe = 4;
264 
265             flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
266          }
267 
268          if (is_stencil)
269             flags |= RADEON_SURF_SBUFFER;
270       }
271 
272       /* Disable DCC? (it can't be disabled if modifiers are used) */
273       if (sscreen->info.gfx_level >= GFX8 && modifier == DRM_FORMAT_MOD_INVALID && !is_imported) {
274          /* Global options that disable DCC. */
275          if (ptex->nr_samples >= 2 && sscreen->debug_flags & DBG(NO_DCC_MSAA))
276             flags |= RADEON_SURF_DISABLE_DCC;
277 
278          /* Shared textures must always set up DCC. If it's not present, it will be disabled by
279           * si_get_opaque_metadata later.
280           */
281          if (!is_imported && sscreen->debug_flags & DBG(NO_DCC))
282             flags |= RADEON_SURF_DISABLE_DCC;
283 
284          /* R9G9B9E5 isn't supported for rendering by older generations. */
285          if (sscreen->info.gfx_level < GFX10_3 &&
286              ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
287             flags |= RADEON_SURF_DISABLE_DCC;
288 
289          /* If constant (non-data-dependent) format is requested, disable DCC: */
290          if (ptex->bind & PIPE_BIND_CONST_BW)
291             flags |= RADEON_SURF_DISABLE_DCC;
292 
293          switch (sscreen->info.gfx_level) {
294          case GFX8:
295             /* Stoney: 128bpp MSAA textures randomly fail piglit tests with DCC. */
296             if (sscreen->info.family == CHIP_STONEY && bpe == 16 && ptex->nr_samples >= 2)
297                flags |= RADEON_SURF_DISABLE_DCC;
298 
299             /* DCC clear for 4x and 8x MSAA array textures unimplemented. */
300             if (ptex->nr_storage_samples >= 4 && ptex->array_size > 1)
301                flags |= RADEON_SURF_DISABLE_DCC;
302             break;
303 
304          case GFX9:
305             /* DCC MSAA fails this on Raven:
306              *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.2_samples.html
307              * and this on Picasso:
308              *    https://www.khronos.org/registry/webgl/sdk/tests/deqp/functional/gles3/fbomultisample.4_samples.html
309              */
310             if (sscreen->info.family == CHIP_RAVEN && ptex->nr_storage_samples >= 2 && bpe < 4)
311                flags |= RADEON_SURF_DISABLE_DCC;
312 
313             /* Vega10 fails these 2x and 4x MSAA tests with DCC:
314              *    piglit/bin/ext_framebuffer_multisample-formats 2 GL_EXT_texture_snorm
315              *    piglit/bin/ext_framebuffer_multisample-formats 4 GL_EXT_texture_snorm
316              */
317             if ((ptex->nr_storage_samples == 2 || ptex->nr_storage_samples == 4) && bpe <= 2 &&
318                 util_format_is_snorm(ptex->format))
319                flags |= RADEON_SURF_DISABLE_DCC;
320 
321             /* Vega10 fails these MSAA tests with DCC:
322              *    piglit/bin/ext_framebuffer_multisample-formats 2 GL_ARB_texture_float
323              *    piglit/bin/ext_framebuffer_multisample-formats 2 GL_ARB_texture_rg-float
324              */
325             if (ptex->nr_storage_samples == 2 && bpe == 2 && util_format_is_float(ptex->format))
326                flags |= RADEON_SURF_DISABLE_DCC;
327 
328             /* We allow S8_UINT as a color format, and piglit/draw-pixels fails if we enable DCC. */
329             if (ptex->format == PIPE_FORMAT_S8_UINT)
330                flags |= RADEON_SURF_DISABLE_DCC;
331             break;
332 
333          case GFX10:
334          case GFX10_3:
335             if (ptex->nr_storage_samples >= 2 && !sscreen->options.dcc_msaa)
336                flags |= RADEON_SURF_DISABLE_DCC;
337 
338             /* Navi10 fails these MSAA tests with DCC:
339              *    piglit/bin/arb_sample_shading-samplemask 2 all all
340              *    piglit/bin/arb_sample_shading-samplemask 4 all all
341              *    piglit/bin/ext_framebuffer_multisample-formats 2 GL_ARB_texture_float
342              *    piglit/bin/ext_framebuffer_multisample-formats 2 GL_EXT_texture_integer
343              */
344             if (sscreen->info.gfx_level == GFX10 &&
345                 (ptex->nr_storage_samples == 2 || ptex->nr_storage_samples == 4))
346                flags |= RADEON_SURF_DISABLE_DCC;
347             break;
348 
349          case GFX11:
350          case GFX11_5:
351             break;
352 
353          default:
354             assert(0);
355          }
356       }
357 
358       if (sscreen->debug_flags & DBG(NO_FMASK))
359          flags |= RADEON_SURF_NO_FMASK;
360 
361       if (ptex->flags & PIPE_RESOURCE_FLAG_SPARSE) {
362          flags |= RADEON_SURF_NO_FMASK |
363                   RADEON_SURF_NO_HTILE |
364                   RADEON_SURF_DISABLE_DCC;
365       }
366    }
367 
368    if (is_scanout) {
369       /* This should catch bugs in gallium users setting incorrect flags. */
370       assert(ptex->nr_samples <= 1 && ptex->depth0 == 1 &&
371              ptex->last_level == 0 && !(flags & RADEON_SURF_Z_OR_SBUFFER));
372 
373       flags |= RADEON_SURF_SCANOUT;
374    }
375 
376    if (ptex->bind & PIPE_BIND_SHARED)
377       flags |= RADEON_SURF_SHAREABLE;
378 
379    if (is_imported)
380       flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
381 
382    if (ptex->flags & PIPE_RESOURCE_FLAG_SPARSE)
383       flags |= RADEON_SURF_PRT;
384 
385    surface->modifier = modifier;
386 
387    r = sscreen->ws->surface_init(sscreen->ws, &sscreen->info, ptex, flags, bpe, array_mode,
388                                  surface);
389    if (r) {
390       return r;
391    }
392 
393    return 0;
394 }
395 
si_eliminate_fast_color_clear(struct si_context * sctx,struct si_texture * tex,bool * ctx_flushed)396 void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,
397                                    bool *ctx_flushed)
398 {
399    struct pipe_context *ctx = &sctx->b;
400 
401    unsigned n = sctx->num_decompress_calls;
402    ctx->flush_resource(ctx, &tex->buffer.b.b);
403 
404    /* Flush only if any fast clear elimination took place. */
405    bool flushed = false;
406    if (n != sctx->num_decompress_calls)
407    {
408       ctx->flush(ctx, NULL, 0);
409       flushed = true;
410    }
411    if (ctx_flushed)
412       *ctx_flushed = flushed;
413 }
414 
si_texture_discard_cmask(struct si_screen * sscreen,struct si_texture * tex)415 void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex)
416 {
417    if (!tex->cmask_buffer)
418       return;
419 
420    assert(tex->buffer.b.b.nr_samples <= 1);
421 
422    /* Disable CMASK. */
423    tex->cmask_base_address_reg = tex->buffer.gpu_address >> 8;
424    tex->dirty_level_mask = 0;
425 
426    tex->cb_color_info &= ~S_028C70_FAST_CLEAR(1);
427 
428    if (tex->cmask_buffer != &tex->buffer)
429       si_resource_reference(&tex->cmask_buffer, NULL);
430 
431    tex->cmask_buffer = NULL;
432 
433    /* Notify all contexts about the change. */
434    p_atomic_inc(&sscreen->dirty_tex_counter);
435    p_atomic_inc(&sscreen->compressed_colortex_counter);
436 }
437 
si_can_disable_dcc(struct si_texture * tex)438 static bool si_can_disable_dcc(struct si_texture *tex)
439 {
440    /* We can't disable DCC if it can be written by another process. */
441    return !tex->is_depth &&
442           tex->surface.meta_offset &&
443           (!tex->buffer.b.is_shared ||
444            !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) &&
445           !ac_modifier_has_dcc(tex->surface.modifier);
446 }
447 
si_texture_discard_dcc(struct si_screen * sscreen,struct si_texture * tex)448 static bool si_texture_discard_dcc(struct si_screen *sscreen, struct si_texture *tex)
449 {
450    if (!si_can_disable_dcc(tex))
451       return false;
452 
453    /* Disable DCC. */
454    ac_surface_zero_dcc_fields(&tex->surface);
455 
456    /* Notify all contexts about the change. */
457    p_atomic_inc(&sscreen->dirty_tex_counter);
458    return true;
459 }
460 
461 /**
462  * Disable DCC for the texture. (first decompress, then discard metadata).
463  *
464  * There is unresolved multi-context synchronization issue between
465  * screen::aux_context and the current context. If applications do this with
466  * multiple contexts, it's already undefined behavior for them and we don't
467  * have to worry about that. The scenario is:
468  *
469  * If context 1 disables DCC and context 2 has queued commands that write
470  * to the texture via CB with DCC enabled, and the order of operations is
471  * as follows:
472  *   context 2 queues draw calls rendering to the texture, but doesn't flush
473  *   context 1 disables DCC and flushes
474  *   context 1 & 2 reset descriptors and FB state
475  *   context 2 flushes (new compressed tiles written by the draw calls)
476  *   context 1 & 2 read garbage, because DCC is disabled, yet there are
477  *   compressed tiled
478  *
479  * \param sctx  the current context if you have one, or sscreen->aux_context
480  *              if you don't.
481  */
si_texture_disable_dcc(struct si_context * sctx,struct si_texture * tex)482 bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex)
483 {
484    struct si_screen *sscreen = sctx->screen;
485 
486    if (!sctx->has_graphics)
487       return si_texture_discard_dcc(sscreen, tex);
488 
489    if (!si_can_disable_dcc(tex))
490       return false;
491 
492    /* Decompress DCC. */
493    si_decompress_dcc(sctx, tex);
494    sctx->b.flush(&sctx->b, NULL, 0);
495 
496    return si_texture_discard_dcc(sscreen, tex);
497 }
498 
si_reallocate_texture_inplace(struct si_context * sctx,struct si_texture * tex,unsigned new_bind_flag,bool invalidate_storage)499 static void si_reallocate_texture_inplace(struct si_context *sctx, struct si_texture *tex,
500                                           unsigned new_bind_flag, bool invalidate_storage)
501 {
502    struct pipe_screen *screen = sctx->b.screen;
503    struct si_texture *new_tex;
504    struct pipe_resource templ = tex->buffer.b.b;
505    unsigned i;
506 
507    templ.bind |= new_bind_flag;
508 
509    if (tex->buffer.b.is_shared || tex->num_planes > 1)
510       return;
511 
512    if (new_bind_flag == PIPE_BIND_LINEAR) {
513       if (tex->surface.is_linear)
514          return;
515 
516       /* This fails with MSAA, depth, and compressed textures. */
517       if (si_choose_tiling(sctx->screen, &templ, false) != RADEON_SURF_MODE_LINEAR_ALIGNED)
518          return;
519    }
520 
521    /* Inherit the modifier from the old texture. */
522    if (tex->surface.modifier != DRM_FORMAT_MOD_INVALID && screen->resource_create_with_modifiers)
523       new_tex = (struct si_texture *)screen->resource_create_with_modifiers(screen, &templ,
524                                                                             &tex->surface.modifier, 1);
525    else
526       new_tex = (struct si_texture *)screen->resource_create(screen, &templ);
527 
528    if (!new_tex)
529       return;
530 
531    /* Copy the pixels to the new texture. */
532    if (!invalidate_storage) {
533       for (i = 0; i <= templ.last_level; i++) {
534          struct pipe_box box;
535 
536          u_box_3d(0, 0, 0, u_minify(templ.width0, i), u_minify(templ.height0, i),
537                   util_num_layers(&templ, i), &box);
538 
539          si_resource_copy_region(&sctx->b, &new_tex->buffer.b.b,
540                                  i, 0, 0, 0, &tex->buffer.b.b, i, &box);
541       }
542    }
543 
544    if (new_bind_flag == PIPE_BIND_LINEAR) {
545       si_texture_discard_cmask(sctx->screen, tex);
546       si_texture_discard_dcc(sctx->screen, tex);
547    }
548 
549    /* Replace the structure fields of tex. */
550    tex->buffer.b.b.bind = templ.bind;
551    radeon_bo_reference(sctx->screen->ws, &tex->buffer.buf, new_tex->buffer.buf);
552    tex->buffer.gpu_address = new_tex->buffer.gpu_address;
553    tex->buffer.bo_size = new_tex->buffer.bo_size;
554    tex->buffer.bo_alignment_log2 = new_tex->buffer.bo_alignment_log2;
555    tex->buffer.domains = new_tex->buffer.domains;
556    tex->buffer.flags = new_tex->buffer.flags;
557 
558    tex->surface = new_tex->surface;
559    si_texture_reference(&tex->flushed_depth_texture, new_tex->flushed_depth_texture);
560 
561    tex->surface.fmask_offset = new_tex->surface.fmask_offset;
562    tex->surface.cmask_offset = new_tex->surface.cmask_offset;
563    tex->cmask_base_address_reg = new_tex->cmask_base_address_reg;
564 
565    if (tex->cmask_buffer == &tex->buffer)
566       tex->cmask_buffer = NULL;
567    else
568       si_resource_reference(&tex->cmask_buffer, NULL);
569 
570    if (new_tex->cmask_buffer == &new_tex->buffer)
571       tex->cmask_buffer = &tex->buffer;
572    else
573       si_resource_reference(&tex->cmask_buffer, new_tex->cmask_buffer);
574 
575    tex->surface.meta_offset = new_tex->surface.meta_offset;
576    tex->cb_color_info = new_tex->cb_color_info;
577    memcpy(tex->color_clear_value, new_tex->color_clear_value, sizeof(tex->color_clear_value));
578    tex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
579 
580    memcpy(tex->depth_clear_value, new_tex->depth_clear_value, sizeof(tex->depth_clear_value));
581    tex->dirty_level_mask = new_tex->dirty_level_mask;
582    tex->stencil_dirty_level_mask = new_tex->stencil_dirty_level_mask;
583    tex->db_render_format = new_tex->db_render_format;
584    memcpy(tex->stencil_clear_value, new_tex->stencil_clear_value, sizeof(tex->stencil_clear_value));
585    tex->tc_compatible_htile = new_tex->tc_compatible_htile;
586    tex->depth_cleared_level_mask_once = new_tex->depth_cleared_level_mask_once;
587    tex->stencil_cleared_level_mask_once = new_tex->stencil_cleared_level_mask_once;
588    tex->upgraded_depth = new_tex->upgraded_depth;
589    tex->db_compatible = new_tex->db_compatible;
590    tex->can_sample_z = new_tex->can_sample_z;
591    tex->can_sample_s = new_tex->can_sample_s;
592 
593    tex->displayable_dcc_dirty = new_tex->displayable_dcc_dirty;
594 
595    if (new_bind_flag == PIPE_BIND_LINEAR) {
596       assert(!tex->surface.meta_offset);
597       assert(!tex->cmask_buffer);
598       assert(!tex->surface.fmask_size);
599       assert(!tex->is_depth);
600    }
601 
602    si_texture_reference(&new_tex, NULL);
603 
604    p_atomic_inc(&sctx->screen->dirty_tex_counter);
605 }
606 
si_set_tex_bo_metadata(struct si_screen * sscreen,struct si_texture * tex)607 static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture *tex)
608 {
609    struct pipe_resource *res = &tex->buffer.b.b;
610    struct radeon_bo_metadata md;
611 
612    memset(&md, 0, sizeof(md));
613 
614    assert(tex->surface.fmask_size == 0);
615 
616    static const unsigned char swizzle[] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
617                                            PIPE_SWIZZLE_W};
618    bool is_array = util_texture_is_array(res->target);
619    uint32_t desc[8];
620 
621    sscreen->make_texture_descriptor(sscreen, tex, true, res->target,
622                                     tex->is_depth ? tex->db_render_format : res->format, swizzle, 0,
623                                     res->last_level, 0, is_array ? res->array_size - 1 : 0,
624                                     res->width0, res->height0, res->depth0, true, desc, NULL);
625    si_set_mutable_tex_desc_fields(sscreen, tex, &tex->surface.u.legacy.level[0], 0, 0,
626                                   tex->surface.blk_w, false, 0, desc);
627 
628    ac_surface_compute_umd_metadata(&sscreen->info, &tex->surface,
629                                    tex->buffer.b.b.last_level + 1,
630                                    desc, &md.size_metadata, md.metadata,
631                                    sscreen->debug_flags & DBG(EXTRA_METADATA));
632    sscreen->ws->buffer_set_metadata(sscreen->ws, tex->buffer.buf, &md, &tex->surface);
633 }
634 
si_displayable_dcc_needs_explicit_flush(struct si_texture * tex)635 static bool si_displayable_dcc_needs_explicit_flush(struct si_texture *tex)
636 {
637    struct si_screen *sscreen = (struct si_screen *)tex->buffer.b.b.screen;
638 
639    if (sscreen->info.gfx_level <= GFX8)
640       return false;
641 
642    /* With modifiers and > 1 planes any applications will know that they
643     * cannot do frontbuffer rendering with the texture. */
644    if (ac_surface_get_nplanes(&tex->surface) > 1)
645       return false;
646 
647    return tex->surface.is_displayable && tex->surface.meta_offset;
648 }
649 
si_resource_get_param(struct pipe_screen * screen,struct pipe_context * context,struct pipe_resource * resource,unsigned plane,unsigned layer,unsigned level,enum pipe_resource_param param,unsigned handle_usage,uint64_t * value)650 static bool si_resource_get_param(struct pipe_screen *screen, struct pipe_context *context,
651                                   struct pipe_resource *resource, unsigned plane, unsigned layer,
652                                   unsigned level,
653                                   enum pipe_resource_param param, unsigned handle_usage,
654                                   uint64_t *value)
655 {
656    while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
657       --plane;
658       resource = resource->next;
659    }
660 
661    struct si_screen *sscreen = (struct si_screen *)screen;
662    struct si_texture *tex = (struct si_texture *)resource;
663    struct winsys_handle whandle;
664 
665    switch (param) {
666    case PIPE_RESOURCE_PARAM_NPLANES:
667       if (resource->target == PIPE_BUFFER)
668          *value = 1;
669       else if (tex->num_planes > 1)
670          *value = tex->num_planes;
671       else
672          *value = ac_surface_get_nplanes(&tex->surface);
673       return true;
674 
675    case PIPE_RESOURCE_PARAM_STRIDE:
676       if (resource->target == PIPE_BUFFER)
677          *value = 0;
678       else
679          *value = ac_surface_get_plane_stride(sscreen->info.gfx_level,
680                                               &tex->surface, plane, level);
681       return true;
682 
683    case PIPE_RESOURCE_PARAM_OFFSET:
684       if (resource->target == PIPE_BUFFER) {
685          *value = 0;
686       } else {
687          uint64_t level_offset = 0;
688          if (sscreen->info.gfx_level >= GFX9 && tex->surface.is_linear)
689             level_offset = tex->surface.u.gfx9.offset[level];
690          *value = ac_surface_get_plane_offset(sscreen->info.gfx_level,
691                                               &tex->surface, plane, layer)  + level_offset;
692       }
693       return true;
694 
695    case PIPE_RESOURCE_PARAM_MODIFIER:
696       *value = tex->surface.modifier;
697       return true;
698 
699    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED:
700    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS:
701    case PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD:
702       memset(&whandle, 0, sizeof(whandle));
703 
704       if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_SHARED)
705          whandle.type = WINSYS_HANDLE_TYPE_SHARED;
706       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_KMS)
707          whandle.type = WINSYS_HANDLE_TYPE_KMS;
708       else if (param == PIPE_RESOURCE_PARAM_HANDLE_TYPE_FD)
709          whandle.type = WINSYS_HANDLE_TYPE_FD;
710 
711       if (!screen->resource_get_handle(screen, context, resource, &whandle, handle_usage))
712          return false;
713 
714       *value = whandle.handle;
715       return true;
716    case PIPE_RESOURCE_PARAM_LAYER_STRIDE:
717       break;
718    }
719    return false;
720 }
721 
si_texture_get_info(struct pipe_screen * screen,struct pipe_resource * resource,unsigned * pstride,unsigned * poffset)722 static void si_texture_get_info(struct pipe_screen *screen, struct pipe_resource *resource,
723                                 unsigned *pstride, unsigned *poffset)
724 {
725    uint64_t value;
726 
727    if (pstride) {
728       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_STRIDE, 0, &value);
729       *pstride = value;
730    }
731 
732    if (poffset) {
733       si_resource_get_param(screen, NULL, resource, 0, 0, 0, PIPE_RESOURCE_PARAM_OFFSET, 0, &value);
734       *poffset = value;
735    }
736 }
737 
si_texture_get_handle(struct pipe_screen * screen,struct pipe_context * ctx,struct pipe_resource * resource,struct winsys_handle * whandle,unsigned usage)738 static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_context *ctx,
739                                   struct pipe_resource *resource, struct winsys_handle *whandle,
740                                   unsigned usage)
741 {
742    struct si_screen *sscreen = (struct si_screen *)screen;
743    struct si_context *sctx;
744    struct si_resource *res = si_resource(resource);
745    struct si_texture *tex = (struct si_texture *)resource;
746    bool update_metadata = false;
747    unsigned stride, offset, slice_size;
748    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
749    bool flush = false;
750 
751    ctx = threaded_context_unwrap_sync(ctx);
752    sctx = ctx ? (struct si_context *)ctx : si_get_aux_context(&sscreen->aux_context.general);
753 
754    if (resource->target != PIPE_BUFFER) {
755       unsigned plane = whandle->plane;
756 
757       /* Individual planes are chained pipe_resource instances. */
758       while (plane && resource->next && !si_texture_is_aux_plane(resource->next)) {
759          resource = resource->next;
760          --plane;
761       }
762 
763       res = si_resource(resource);
764       tex = (struct si_texture *)resource;
765 
766       /* This is not supported now, but it might be required for OpenCL
767        * interop in the future.
768        */
769       if (resource->nr_samples > 1 || tex->is_depth) {
770          if (!ctx)
771             si_put_aux_context_flush(&sscreen->aux_context.general);
772          return false;
773       }
774 
775       whandle->size = tex->buffer.bo_size;
776 
777       if (plane) {
778          if (!ctx)
779             si_put_aux_context_flush(&sscreen->aux_context.general);
780          whandle->offset = ac_surface_get_plane_offset(sscreen->info.gfx_level,
781                                                        &tex->surface, plane, 0);
782          whandle->stride = ac_surface_get_plane_stride(sscreen->info.gfx_level,
783                                                        &tex->surface, plane, 0);
784          whandle->modifier = tex->surface.modifier;
785          return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
786       }
787 
788       /* Move a suballocated texture into a non-suballocated allocation. */
789       if (sscreen->ws->buffer_is_suballocated(res->buf) || tex->surface.tile_swizzle ||
790           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
791            sscreen->info.has_local_buffers)) {
792          assert(!res->b.is_shared);
793          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_SHARED, false);
794          flush = true;
795          assert(res->b.b.bind & PIPE_BIND_SHARED);
796          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
797          assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
798          assert(tex->surface.tile_swizzle == 0);
799       }
800 
801       /* Since shader image stores don't support DCC on GFX8,
802        * disable it for external clients that want write
803        * access.
804        */
805       if (sscreen->debug_flags & DBG(NO_EXPORTED_DCC) ||
806           (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && !tex->is_depth && tex->surface.meta_offset) ||
807           /* Displayable DCC requires an explicit flush. */
808           (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
809            si_displayable_dcc_needs_explicit_flush(tex))) {
810          if (si_texture_disable_dcc(sctx, tex)) {
811             update_metadata = true;
812             /* si_texture_disable_dcc flushes the context */
813             flush = false;
814          }
815       }
816 
817       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
818           (tex->cmask_buffer || (!tex->is_depth && tex->surface.meta_offset))) {
819          /* Eliminate fast clear (both CMASK and DCC) */
820          bool flushed;
821          si_eliminate_fast_color_clear(sctx, tex, &flushed);
822          /* eliminate_fast_color_clear sometimes flushes the context */
823          flush = !flushed;
824 
825          /* Disable CMASK if flush_resource isn't going
826           * to be called.
827           */
828          if (tex->cmask_buffer)
829             si_texture_discard_cmask(sscreen, tex);
830       }
831 
832       /* Set metadata. */
833       if ((!res->b.is_shared || update_metadata) && whandle->offset == 0)
834          si_set_tex_bo_metadata(sscreen, tex);
835 
836       if (sscreen->info.gfx_level >= GFX9) {
837          slice_size = tex->surface.u.gfx9.surf_slice_size;
838       } else {
839          slice_size = (uint64_t)tex->surface.u.legacy.level[0].slice_size_dw * 4;
840       }
841 
842       modifier = tex->surface.modifier;
843    } else {
844       tc_buffer_disable_cpu_storage(&res->b.b);
845 
846       /* Buffer exports are for the OpenCL interop. */
847       /* Move a suballocated buffer into a non-suballocated allocation. */
848       if (sscreen->ws->buffer_is_suballocated(res->buf) ||
849           /* A DMABUF export always fails if the BO is local. */
850           (tex->buffer.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
851            sscreen->info.has_local_buffers)) {
852          assert(!res->b.is_shared);
853 
854          /* Allocate a new buffer with PIPE_BIND_SHARED. */
855          struct pipe_resource templ = res->b.b;
856          templ.bind |= PIPE_BIND_SHARED;
857 
858          struct pipe_resource *newb = screen->resource_create(screen, &templ);
859          if (!newb) {
860             if (!ctx)
861                si_put_aux_context_flush(&sscreen->aux_context.general);
862             return false;
863          }
864 
865          /* Copy the old buffer contents to the new one. */
866          struct pipe_box box;
867          u_box_1d(0, newb->width0, &box);
868          sctx->b.resource_copy_region(&sctx->b, newb, 0, 0, 0, 0, &res->b.b, 0, &box);
869          flush = true;
870          /* Move the new buffer storage to the old pipe_resource. */
871          si_replace_buffer_storage(&sctx->b, &res->b.b, newb, 0, 0, 0);
872          pipe_resource_reference(&newb, NULL);
873 
874          assert(res->b.b.bind & PIPE_BIND_SHARED);
875          assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
876       }
877 
878       /* Buffers */
879       slice_size = 0;
880    }
881 
882    si_texture_get_info(screen, resource, &stride, &offset);
883 
884    if (res->b.is_shared) {
885       /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
886        * doesn't set it.
887        */
888       res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
889       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
890          res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
891    } else {
892       res->b.is_shared = true;
893       res->external_usage = usage;
894    }
895 
896    if (flush && ctx)
897       sctx->b.flush(&sctx->b, NULL, 0);
898    if (!ctx)
899       si_put_aux_context_flush(&sscreen->aux_context.general);
900 
901    whandle->stride = stride;
902    whandle->offset = offset + slice_size * whandle->layer;
903    whandle->modifier = modifier;
904 
905    return sscreen->ws->buffer_get_handle(sscreen->ws, res->buf, whandle);
906 }
907 
si_print_texture_info(struct si_screen * sscreen,struct si_texture * tex,struct u_log_context * log)908 void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex,
909                            struct u_log_context *log)
910 {
911    int i;
912    FILE *f;
913    char *surf_info = NULL;
914    size_t surf_info_size;
915 
916    /* Common parameters. */
917    u_log_printf(log,
918                 "  Info: npix_x=%u, npix_y=%u, npix_z=%u, "
919                 "array_size=%u, last_level=%u, nsamples=%u",
920                 tex->buffer.b.b.width0, tex->buffer.b.b.height0,
921                 tex->buffer.b.b.depth0, tex->buffer.b.b.array_size,
922                 tex->buffer.b.b.last_level, tex->buffer.b.b.nr_samples);
923 
924    if (tex->is_depth && tex->surface.meta_offset)
925       u_log_printf(log, ", tc_compatible_htile=%u", tex->tc_compatible_htile);
926 
927    u_log_printf(log, ", %s\n",
928                 util_format_short_name(tex->buffer.b.b.format));
929 
930    f = open_memstream(&surf_info, &surf_info_size);
931    if (!f)
932       return;
933    ac_surface_print_info(f, &sscreen->info, &tex->surface);
934    fclose(f);
935    u_log_printf(log, "%s", surf_info);
936    free(surf_info);
937 
938    if (sscreen->info.gfx_level >= GFX9) {
939       return;
940    }
941 
942    if (!tex->is_depth && tex->surface.meta_offset) {
943       for (i = 0; i <= tex->buffer.b.b.last_level; i++)
944          u_log_printf(log,
945                       "    DCCLevel[%i]: enabled=%u, offset=%u, "
946                       "fast_clear_size=%u\n",
947                       i, i < tex->surface.num_meta_levels, tex->surface.u.legacy.color.dcc_level[i].dcc_offset,
948                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size);
949    }
950 
951    for (i = 0; i <= tex->buffer.b.b.last_level; i++)
952       u_log_printf(log,
953                    "    Level[%i]: offset=%" PRIu64 ", slice_size=%" PRIu64 ", "
954                    "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
955                    "mode=%u, tiling_index = %u\n",
956                    i, (uint64_t)tex->surface.u.legacy.level[i].offset_256B * 256,
957                    (uint64_t)tex->surface.u.legacy.level[i].slice_size_dw * 4,
958                    u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
959                    u_minify(tex->buffer.b.b.depth0, i), tex->surface.u.legacy.level[i].nblk_x,
960                    tex->surface.u.legacy.level[i].nblk_y, tex->surface.u.legacy.level[i].mode,
961                    tex->surface.u.legacy.tiling_index[i]);
962 
963    if (tex->surface.has_stencil) {
964       for (i = 0; i <= tex->buffer.b.b.last_level; i++) {
965          u_log_printf(log,
966                       "    StencilLevel[%i]: offset=%" PRIu64 ", "
967                       "slice_size=%" PRIu64 ", npix_x=%u, "
968                       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
969                       "mode=%u, tiling_index = %u\n",
970                       i, (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].offset_256B * 256,
971                       (uint64_t)tex->surface.u.legacy.zs.stencil_level[i].slice_size_dw * 4,
972                       u_minify(tex->buffer.b.b.width0, i), u_minify(tex->buffer.b.b.height0, i),
973                       u_minify(tex->buffer.b.b.depth0, i),
974                       tex->surface.u.legacy.zs.stencil_level[i].nblk_x,
975                       tex->surface.u.legacy.zs.stencil_level[i].nblk_y,
976                       tex->surface.u.legacy.zs.stencil_level[i].mode,
977                       tex->surface.u.legacy.zs.stencil_tiling_index[i]);
978       }
979    }
980 }
981 
print_debug_tex(struct si_screen * sscreen,struct si_texture * tex)982 static void print_debug_tex(struct si_screen *sscreen, struct si_texture *tex)
983 {
984    if (sscreen->debug_flags & DBG(TEX)) {
985       puts("Texture:");
986       struct u_log_context log;
987       u_log_context_init(&log);
988       si_print_texture_info(sscreen, tex, &log);
989       u_log_new_page_print(&log, stdout);
990       fflush(stdout);
991       u_log_context_destroy(&log);
992    }
993 }
994 
995 /**
996  * Common function for si_texture_create and si_texture_from_handle.
997  *
998  * \param screen       screen
999  * \param base         resource template
1000  * \param surface      radeon_surf
1001  * \param plane0       if a non-zero plane is being created, this is the first plane
1002  * \param imported_buf from si_texture_from_handle
1003  * \param offset       offset for non-zero planes or imported buffers
1004  * \param alloc_size   the size to allocate if plane0 != NULL
1005  * \param alignment    alignment for the allocation
1006  */
si_texture_create_object(struct pipe_screen * screen,const struct pipe_resource * base,const struct radeon_surf * surface,const struct si_texture * plane0,struct pb_buffer_lean * imported_buf,uint64_t offset,unsigned pitch_in_bytes,uint64_t alloc_size,unsigned alignment)1007 static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
1008                                                    const struct pipe_resource *base,
1009                                                    const struct radeon_surf *surface,
1010                                                    const struct si_texture *plane0,
1011                                                    struct pb_buffer_lean *imported_buf,
1012                                                    uint64_t offset, unsigned pitch_in_bytes,
1013                                                    uint64_t alloc_size, unsigned alignment)
1014 {
1015    struct si_texture *tex;
1016    struct si_resource *resource;
1017    struct si_screen *sscreen = (struct si_screen *)screen;
1018 
1019    if (!sscreen->info.has_3d_cube_border_color_mipmap &&
1020        (base->last_level > 0 ||
1021         base->target == PIPE_TEXTURE_3D ||
1022         base->target == PIPE_TEXTURE_CUBE)) {
1023       assert(0);
1024       return NULL;
1025    }
1026 
1027    tex = CALLOC_STRUCT_CL(si_texture);
1028    if (!tex)
1029       goto error;
1030 
1031    resource = &tex->buffer;
1032    resource->b.b = *base;
1033    pipe_reference_init(&resource->b.b.reference, 1);
1034    resource->b.b.screen = screen;
1035 
1036    /* don't include stencil-only formats which we don't support for rendering */
1037    tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
1038    tex->surface = *surface;
1039 
1040    if (!ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
1041                                           tex->buffer.b.b.array_size,
1042                                           tex->buffer.b.b.last_level + 1,
1043                                           offset, pitch_in_bytes / tex->surface.bpe))
1044       goto error;
1045 
1046    if (plane0) {
1047       /* The buffer is shared with the first plane. */
1048       resource->bo_size = plane0->buffer.bo_size;
1049       resource->bo_alignment_log2 = plane0->buffer.bo_alignment_log2;
1050       resource->flags = plane0->buffer.flags;
1051       resource->domains = plane0->buffer.domains;
1052 
1053       radeon_bo_reference(sscreen->ws, &resource->buf, plane0->buffer.buf);
1054       resource->gpu_address = plane0->buffer.gpu_address;
1055    } else if (!(surface->flags & RADEON_SURF_IMPORTED)) {
1056       if (base->flags & PIPE_RESOURCE_FLAG_SPARSE)
1057          resource->b.b.flags |= PIPE_RESOURCE_FLAG_UNMAPPABLE;
1058       if (base->bind & PIPE_BIND_PRIME_BLIT_DST)
1059          resource->b.b.flags |= SI_RESOURCE_FLAG_GL2_BYPASS;
1060 
1061       /* Create the backing buffer. */
1062       si_init_resource_fields(sscreen, resource, alloc_size, alignment);
1063 
1064       /* GFX12: Image descriptors always set COMPRESSION_EN=1, so this is the only thing that
1065        * disables DCC in the driver.
1066        */
1067       if (sscreen->info.gfx_level >= GFX12 &&
1068           resource->domains & RADEON_DOMAIN_VRAM &&
1069           surface->u.gfx9.gfx12_enable_dcc)
1070          resource->flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
1071 
1072       if (!si_alloc_resource(sscreen, resource))
1073          goto error;
1074    } else {
1075       resource->buf = imported_buf;
1076       resource->gpu_address = sscreen->ws->buffer_get_virtual_address(resource->buf);
1077       resource->bo_size = imported_buf->size;
1078       resource->bo_alignment_log2 = imported_buf->alignment_log2;
1079       resource->domains = sscreen->ws->buffer_get_initial_domain(resource->buf);
1080       if (sscreen->ws->buffer_get_flags)
1081          resource->flags = sscreen->ws->buffer_get_flags(resource->buf);
1082    }
1083 
1084    if (sscreen->debug_flags & DBG(VM)) {
1085       fprintf(stderr,
1086               "VM start=0x%" PRIX64 "  end=0x%" PRIX64
1087               " | Texture %ix%ix%i, %i levels, %i samples, %s | Flags: ",
1088               tex->buffer.gpu_address, tex->buffer.gpu_address + tex->buffer.buf->size,
1089               base->width0, base->height0, util_num_layers(base, 0), base->last_level + 1,
1090               base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1091       si_res_print_flags(tex->buffer.flags);
1092       fprintf(stderr, "\n");
1093    }
1094 
1095    if (sscreen->info.gfx_level >= GFX12) {
1096       print_debug_tex(sscreen, tex);
1097       if (tex->is_depth) {
1098          /* Z24 is no longer supported. We should use Z32_FLOAT instead. */
1099          if (base->format == PIPE_FORMAT_Z16_UNORM) {
1100             tex->db_render_format = base->format;
1101          } else {
1102             tex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
1103             tex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
1104                                   base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
1105          }
1106 
1107          tex->db_compatible = true;
1108          tex->can_sample_z = true;
1109          tex->can_sample_s = true;
1110       }
1111 
1112       /* Always set BO metadata - required for programming DCC fields for GFX12 SDMA in the kernel.
1113        * If the texture is suballocated, this will overwrite the metadata for all suballocations,
1114        * but there is nothing we can do about that.
1115        */
1116       si_set_tex_bo_metadata(sscreen, tex);
1117       return tex;
1118    }
1119 
1120    /* Everything below is for GFX6-11. */
1121 
1122    /* Use 1.0 as the default clear value to get optimal ZRANGE_PRECISION if we don't
1123     * get a fast clear.
1124     */
1125    for (unsigned i = 0; i < ARRAY_SIZE(tex->depth_clear_value); i++)
1126       tex->depth_clear_value[i] = 1.0;
1127 
1128    /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
1129     * setting, so we have to enable it if we enabled it at allocation.
1130     *
1131     * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be
1132     * enabled on demand.
1133     */
1134    tex->tc_compatible_htile = (sscreen->info.gfx_level == GFX8 &&
1135                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ||
1136                               /* Mipmapping always starts TC-compatible. */
1137                               (sscreen->info.gfx_level >= GFX8 &&
1138                                tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE &&
1139                                tex->buffer.b.b.last_level > 0);
1140 
1141    print_debug_tex(sscreen, tex);
1142 
1143    /* TC-compatible HTILE:
1144     * - GFX8 only supports Z32_FLOAT.
1145     * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
1146    if (tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
1147       if (sscreen->info.gfx_level >= GFX9 && base->format == PIPE_FORMAT_Z16_UNORM)
1148          tex->db_render_format = base->format;
1149       else {
1150          tex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
1151          tex->upgraded_depth = base->format != PIPE_FORMAT_Z32_FLOAT &&
1152                                base->format != PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
1153       }
1154    } else {
1155       tex->db_render_format = base->format;
1156    }
1157 
1158    /* Applies to GCN. */
1159    tex->last_msaa_resolve_target_micro_mode = tex->surface.micro_tile_mode;
1160 
1161    if (tex->is_depth) {
1162       tex->htile_stencil_disabled = !tex->surface.has_stencil;
1163 
1164       if (sscreen->info.gfx_level >= GFX9) {
1165          tex->can_sample_z = true;
1166          tex->can_sample_s = true;
1167 
1168          /* Stencil texturing with HTILE doesn't work
1169           * with mipmapping on Navi10-14. */
1170          if (sscreen->info.gfx_level == GFX10 && base->last_level > 0)
1171             tex->htile_stencil_disabled = true;
1172       } else {
1173          tex->can_sample_z = !tex->surface.u.legacy.depth_adjusted;
1174          tex->can_sample_s = !tex->surface.u.legacy.stencil_adjusted;
1175 
1176          /* GFX8 must keep stencil enabled because it can't use Z-only TC-compatible
1177           * HTILE because of a hw bug. This has only a small effect on performance
1178           * because we lose a little bit of Z precision in order to make space for
1179           * stencil in HTILE.
1180           */
1181          if (sscreen->info.gfx_level == GFX8 &&
1182              tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE)
1183             tex->htile_stencil_disabled = false;
1184       }
1185 
1186       tex->db_compatible = surface->flags & RADEON_SURF_ZBUFFER;
1187    } else {
1188       if (tex->surface.cmask_offset) {
1189          assert(sscreen->info.gfx_level < GFX11);
1190          tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
1191          tex->cmask_buffer = &tex->buffer;
1192       }
1193    }
1194 
1195    /* Prepare metadata clears.  */
1196    struct si_clear_info clears[4];
1197    unsigned num_clears = 0;
1198 
1199    if (tex->cmask_buffer) {
1200       /* Initialize the cmask to 0xCC (= compressed state). */
1201       assert(num_clears < ARRAY_SIZE(clears));
1202       si_init_buffer_clear(&clears[num_clears++], &tex->cmask_buffer->b.b,
1203                            tex->surface.cmask_offset, tex->surface.cmask_size,
1204                            0xCCCCCCCC);
1205    }
1206    if (tex->is_depth && tex->surface.meta_offset) {
1207       uint32_t clear_value = 0;
1208 
1209       if (sscreen->info.gfx_level >= GFX9 || tex->tc_compatible_htile)
1210          clear_value = 0x0000030F;
1211 
1212       assert(num_clears < ARRAY_SIZE(clears));
1213       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1214                            tex->surface.meta_size, clear_value);
1215    }
1216 
1217    /* Initialize DCC only if the texture is not being imported. */
1218    if (!(surface->flags & RADEON_SURF_IMPORTED) && !tex->is_depth && tex->surface.meta_offset) {
1219       /* Clear DCC to black for all tiles with DCC enabled.
1220        *
1221        * This fixes corruption in 3DMark Slingshot Extreme, which
1222        * uses uninitialized textures, causing corruption.
1223        */
1224       if (tex->surface.num_meta_levels == tex->buffer.b.b.last_level + 1 &&
1225           tex->buffer.b.b.nr_samples <= 2) {
1226          /* Simple case - all tiles have DCC enabled. */
1227          assert(num_clears < ARRAY_SIZE(clears));
1228          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1229                               tex->surface.meta_size, DCC_CLEAR_0000);
1230       } else if (sscreen->info.gfx_level >= GFX9) {
1231          /* Clear to uncompressed. Clearing this to black is complicated. */
1232          assert(num_clears < ARRAY_SIZE(clears));
1233          si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1234                               tex->surface.meta_size, DCC_UNCOMPRESSED);
1235       } else {
1236          /* GFX8: Initialize mipmap levels and multisamples separately. */
1237          if (tex->buffer.b.b.nr_samples >= 2) {
1238             /* Clearing this to black is complicated. */
1239             assert(num_clears < ARRAY_SIZE(clears));
1240             si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset,
1241                                  tex->surface.meta_size, DCC_UNCOMPRESSED);
1242          } else {
1243             /* Clear the enabled mipmap levels to black. */
1244             unsigned size = 0;
1245 
1246             for (unsigned i = 0; i < tex->surface.num_meta_levels; i++) {
1247                if (!tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size)
1248                   break;
1249 
1250                size = tex->surface.u.legacy.color.dcc_level[i].dcc_offset +
1251                       tex->surface.u.legacy.color.dcc_level[i].dcc_fast_clear_size;
1252             }
1253 
1254             /* Mipmap levels with DCC. */
1255             if (size) {
1256                assert(num_clears < ARRAY_SIZE(clears));
1257                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset, size,
1258                                     DCC_CLEAR_0000);
1259             }
1260             /* Mipmap levels without DCC. */
1261             if (size != tex->surface.meta_size) {
1262                assert(num_clears < ARRAY_SIZE(clears));
1263                si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.meta_offset + size,
1264                                     tex->surface.meta_size - size, DCC_UNCOMPRESSED);
1265             }
1266          }
1267       }
1268    }
1269 
1270    /* Initialize displayable DCC that requires the retile blit. */
1271    if (tex->surface.display_dcc_offset && !(surface->flags & RADEON_SURF_IMPORTED)) {
1272       /* Uninitialized DCC can hang the display hw.
1273        * Clear to white to indicate that. */
1274       assert(num_clears < ARRAY_SIZE(clears));
1275       si_init_buffer_clear(&clears[num_clears++], &tex->buffer.b.b, tex->surface.display_dcc_offset,
1276                            tex->surface.u.gfx9.color.display_dcc_size,
1277                            sscreen->info.gfx_level >= GFX11 ? GFX11_DCC_CLEAR_1111_UNORM
1278                                                              : GFX8_DCC_CLEAR_1111);
1279    }
1280 
1281    /* Execute the clears. */
1282    if (num_clears) {
1283       si_execute_clears(si_get_aux_context(&sscreen->aux_context.general), clears, num_clears, 0,
1284                         false);
1285       si_put_aux_context_flush(&sscreen->aux_context.general);
1286    }
1287 
1288    /* Initialize the CMASK base register value. */
1289    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1290 
1291    return tex;
1292 
1293 error:
1294    FREE_CL(tex);
1295    return NULL;
1296 }
1297 
si_choose_tiling(struct si_screen * sscreen,const struct pipe_resource * templ,bool tc_compatible_htile)1298 static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
1299                                               const struct pipe_resource *templ,
1300                                               bool tc_compatible_htile)
1301 {
1302    const struct util_format_description *desc = util_format_description(templ->format);
1303    bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
1304                            !(templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH);
1305 
1306    /* MSAA resources must be 2D tiled. */
1307    if (templ->nr_samples > 1)
1308       return RADEON_SURF_MODE_2D;
1309 
1310    /* Transfer resources should be linear. */
1311    if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)
1312       return RADEON_SURF_MODE_LINEAR_ALIGNED;
1313 
1314    /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
1315     * which requires 2D tiling.
1316     */
1317    if (sscreen->info.gfx_level == GFX8 && tc_compatible_htile)
1318       return RADEON_SURF_MODE_2D;
1319 
1320    /* Handle common candidates for the linear mode.
1321     * Compressed textures and DB surfaces must always be tiled.
1322     */
1323    if (!is_depth_stencil && !util_format_is_compressed(templ->format)) {
1324       if (sscreen->debug_flags & DBG(NO_TILING) ||
1325           (templ->bind & PIPE_BIND_SCANOUT && sscreen->debug_flags & DBG(NO_DISPLAY_TILING)))
1326          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1327 
1328       /* Tiling doesn't work with the 422 (SUBSAMPLED) formats. */
1329       if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1330          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1331 
1332       /* Cursors are linear on AMD GCN.
1333        * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1334       if (templ->bind & PIPE_BIND_CURSOR)
1335          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1336 
1337       if (templ->bind & PIPE_BIND_LINEAR)
1338          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1339 
1340       /* Textures with a very small height are recommended to be linear. */
1341       if (templ->target == PIPE_TEXTURE_1D || templ->target == PIPE_TEXTURE_1D_ARRAY ||
1342           /* Only very thin and long 2D textures should benefit from
1343            * linear_aligned. */
1344           templ->height0 <= 2)
1345          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1346 
1347       /* Textures likely to be mapped often. */
1348       if (templ->usage == PIPE_USAGE_STAGING || templ->usage == PIPE_USAGE_STREAM)
1349          return RADEON_SURF_MODE_LINEAR_ALIGNED;
1350    }
1351 
1352    /* Make small textures 1D tiled. */
1353    if (templ->width0 <= 16 || templ->height0 <= 16 || (sscreen->debug_flags & DBG(NO_2D_TILING)))
1354       return RADEON_SURF_MODE_1D;
1355 
1356    /* The allocator will switch to 1D if needed. */
1357    return RADEON_SURF_MODE_2D;
1358 }
1359 
1360 static struct pipe_resource *
si_texture_create_with_modifier(struct pipe_screen * screen,const struct pipe_resource * templ,uint64_t modifier)1361 si_texture_create_with_modifier(struct pipe_screen *screen,
1362                                 const struct pipe_resource *templ,
1363                                 uint64_t modifier)
1364 {
1365    struct si_screen *sscreen = (struct si_screen *)screen;
1366    bool is_zs = util_format_is_depth_or_stencil(templ->format);
1367 
1368    if (templ->nr_samples >= 2) {
1369       /* This is hackish (overwriting the const pipe_resource template),
1370        * but should be harmless and gallium frontends can also see
1371        * the overridden number of samples in the created pipe_resource.
1372        */
1373       if (is_zs && sscreen->eqaa_force_z_samples) {
1374          ((struct pipe_resource *)templ)->nr_samples =
1375             ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_z_samples;
1376       } else if (!is_zs && sscreen->eqaa_force_color_samples) {
1377          ((struct pipe_resource *)templ)->nr_samples = sscreen->eqaa_force_coverage_samples;
1378          ((struct pipe_resource *)templ)->nr_storage_samples = sscreen->eqaa_force_color_samples;
1379       }
1380    }
1381 
1382    bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
1383                            templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR;
1384    bool tc_compatible_htile =
1385       sscreen->info.has_tc_compatible_htile &&
1386       /* There are issues with TC-compatible HTILE on Tonga (and
1387        * Iceland is the same design), and documented bug workarounds
1388        * don't help. For example, this fails:
1389        *   piglit/bin/tex-miplevel-selection 'texture()' 2DShadow -auto
1390        */
1391       sscreen->info.family != CHIP_TONGA && sscreen->info.family != CHIP_ICELAND &&
1392       (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1393       !(sscreen->debug_flags & DBG(NO_HYPERZ)) && !is_flushed_depth &&
1394       is_zs;
1395    enum radeon_surf_mode tile_mode = si_choose_tiling(sscreen, templ, tc_compatible_htile);
1396 
1397    /* This allocates textures with multiple planes like NV12 in 1 buffer. */
1398    enum
1399    {
1400       SI_TEXTURE_MAX_PLANES = 3
1401    };
1402    struct radeon_surf surface[SI_TEXTURE_MAX_PLANES] = {};
1403    struct pipe_resource plane_templ[SI_TEXTURE_MAX_PLANES];
1404    uint64_t plane_offset[SI_TEXTURE_MAX_PLANES] = {};
1405    uint64_t total_size = 0;
1406    unsigned max_alignment = 0;
1407    unsigned num_planes = util_format_get_num_planes(templ->format);
1408    assert(num_planes <= SI_TEXTURE_MAX_PLANES);
1409 
1410    /* Compute texture or plane layouts and offsets. */
1411    for (unsigned i = 0; i < num_planes; i++) {
1412       plane_templ[i] = *templ;
1413       plane_templ[i].format = util_format_get_plane_format(templ->format, i);
1414       plane_templ[i].width0 = util_format_get_plane_width(templ->format, i, templ->width0);
1415       plane_templ[i].height0 = util_format_get_plane_height(templ->format, i, templ->height0);
1416 
1417       /* Multi-plane allocations need PIPE_BIND_SHARED, because we can't
1418        * reallocate the storage to add PIPE_BIND_SHARED, because it's
1419        * shared by 3 pipe_resources.
1420        */
1421       if (num_planes > 1)
1422          plane_templ[i].bind |= PIPE_BIND_SHARED;
1423       /* Setting metadata on suballocated buffers is impossible. So use PIPE_BIND_CUSTOM to
1424        * request a non-suballocated buffer.
1425        */
1426       if (!is_zs && sscreen->debug_flags & DBG(EXTRA_METADATA))
1427          plane_templ[i].bind |= PIPE_BIND_CUSTOM;
1428 
1429       if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, modifier,
1430                           false, plane_templ[i].bind & PIPE_BIND_SCANOUT,
1431                           is_flushed_depth, tc_compatible_htile))
1432          return NULL;
1433 
1434       plane_templ[i].nr_sparse_levels = surface[i].first_mip_tail_level;
1435 
1436       plane_offset[i] = align64(total_size, 1 << surface[i].surf_alignment_log2);
1437       total_size = plane_offset[i] + surface[i].total_size;
1438       max_alignment = MAX2(max_alignment, 1 << surface[i].surf_alignment_log2);
1439    }
1440 
1441    struct si_texture *plane0 = NULL, *last_plane = NULL;
1442 
1443    for (unsigned i = 0; i < num_planes; i++) {
1444       struct si_texture *tex =
1445          si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL,
1446                                   plane_offset[i], 0, total_size, max_alignment);
1447       if (!tex) {
1448          si_texture_reference(&plane0, NULL);
1449          return NULL;
1450       }
1451 
1452       tex->plane_index = i;
1453       tex->num_planes = num_planes;
1454 
1455       if (!plane0) {
1456          plane0 = last_plane = tex;
1457       } else {
1458          last_plane->buffer.b.b.next = &tex->buffer.b.b;
1459          last_plane = tex;
1460       }
1461       if (i == 0 && !is_zs && tex->surface.fmask_size == 0 &&
1462           sscreen->debug_flags & DBG(EXTRA_METADATA))
1463          si_set_tex_bo_metadata(sscreen, tex);
1464    }
1465 
1466    if (num_planes >= 2)
1467       plane0->multi_plane_format = templ->format;
1468 
1469    return (struct pipe_resource *)plane0;
1470 }
1471 
si_texture_create(struct pipe_screen * screen,const struct pipe_resource * templ)1472 struct pipe_resource *si_texture_create(struct pipe_screen *screen,
1473                                         const struct pipe_resource *templ)
1474 {
1475    return si_texture_create_with_modifier(screen, templ, DRM_FORMAT_MOD_INVALID);
1476 }
1477 
si_texture_commit(struct si_context * ctx,struct si_resource * res,unsigned level,struct pipe_box * box,bool commit)1478 bool si_texture_commit(struct si_context *ctx, struct si_resource *res, unsigned level,
1479                        struct pipe_box *box, bool commit)
1480 {
1481    struct si_texture *tex = (struct si_texture *)res;
1482    struct radeon_surf *surface = &tex->surface;
1483    enum pipe_format format = res->b.b.format;
1484    unsigned blks = util_format_get_blocksize(format);
1485    unsigned samples = MAX2(1, res->b.b.nr_samples);
1486 
1487    assert(ctx->gfx_level >= GFX9);
1488 
1489    unsigned row_pitch = surface->u.gfx9.prt_level_pitch[level] *
1490       surface->prt_tile_height * surface->prt_tile_depth * blks * samples;
1491    uint64_t depth_pitch = surface->u.gfx9.surf_slice_size * surface->prt_tile_depth;
1492 
1493    unsigned x = box->x / surface->prt_tile_width;
1494    unsigned y = box->y / surface->prt_tile_height;
1495    unsigned z = box->z / surface->prt_tile_depth;
1496 
1497    unsigned w = DIV_ROUND_UP(box->width, surface->prt_tile_width);
1498    unsigned h = DIV_ROUND_UP(box->height, surface->prt_tile_height);
1499    unsigned d = DIV_ROUND_UP(box->depth, surface->prt_tile_depth);
1500 
1501    /* Align to tile block base, for levels in mip tail whose offset is inside
1502     * a tile block.
1503     */
1504    uint64_t level_base = ROUND_DOWN_TO(surface->u.gfx9.prt_level_offset[level],
1505                                        RADEON_SPARSE_PAGE_SIZE);
1506    uint64_t commit_base = level_base +
1507       x * RADEON_SPARSE_PAGE_SIZE + y * (uint64_t)row_pitch + z * depth_pitch;
1508 
1509    uint64_t size = (uint64_t)w * RADEON_SPARSE_PAGE_SIZE;
1510    for (int i = 0; i < d; i++) {
1511       uint64_t base = commit_base + i * depth_pitch;
1512       for (int j = 0; j < h; j++) {
1513          uint64_t offset = base + j * row_pitch;
1514          if (!ctx->ws->buffer_commit(ctx->ws, res->buf, offset, size, commit))
1515             return false;
1516       }
1517    }
1518 
1519    return true;
1520 }
1521 
si_query_dmabuf_modifiers(struct pipe_screen * screen,enum pipe_format format,int max,uint64_t * modifiers,unsigned int * external_only,int * count)1522 static void si_query_dmabuf_modifiers(struct pipe_screen *screen,
1523                                       enum pipe_format format,
1524                                       int max,
1525                                       uint64_t *modifiers,
1526                                       unsigned int *external_only,
1527                                       int *count)
1528 {
1529    struct si_screen *sscreen = (struct si_screen *)screen;
1530 
1531    unsigned ac_mod_count = max;
1532    ac_get_supported_modifiers(&sscreen->info, &(struct ac_modifier_options) {
1533          .dcc = !(sscreen->debug_flags & (DBG(NO_DCC) | DBG(NO_EXPORTED_DCC))),
1534          /* Do not support DCC with retiling yet. This needs explicit
1535           * resource flushes, but the app has no way to promise doing
1536           * flushes with modifiers. */
1537          .dcc_retile = !(sscreen->debug_flags & DBG(NO_DCC)),
1538       }, format, &ac_mod_count,  max ? modifiers : NULL);
1539    if (max && external_only) {
1540       for (unsigned i = 0; i < ac_mod_count; ++i)
1541          external_only[i] = util_format_is_yuv(format);
1542    }
1543    *count = ac_mod_count;
1544 }
1545 
1546 static bool
si_is_dmabuf_modifier_supported(struct pipe_screen * screen,uint64_t modifier,enum pipe_format format,bool * external_only)1547 si_is_dmabuf_modifier_supported(struct pipe_screen *screen,
1548                                uint64_t modifier,
1549                                enum pipe_format format,
1550                                bool *external_only)
1551 {
1552    int allowed_mod_count;
1553    si_query_dmabuf_modifiers(screen, format, 0, NULL, NULL, &allowed_mod_count);
1554 
1555    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1556    if (!allowed_modifiers)
1557       return false;
1558 
1559    unsigned *external_array = NULL;
1560    if (external_only) {
1561       external_array = (unsigned *)calloc(allowed_mod_count, sizeof(unsigned));
1562       if (!external_array) {
1563          free(allowed_modifiers);
1564          return false;
1565       }
1566    }
1567 
1568    si_query_dmabuf_modifiers(screen, format, allowed_mod_count, allowed_modifiers,
1569                             external_array, &allowed_mod_count);
1570 
1571    bool supported = false;
1572    for (int i = 0; i < allowed_mod_count && !supported; ++i) {
1573       if (allowed_modifiers[i] != modifier)
1574          continue;
1575 
1576       supported = true;
1577       if (external_only)
1578          *external_only = external_array[i];
1579    }
1580 
1581    free(allowed_modifiers);
1582    free(external_array);
1583    return supported;
1584 }
1585 
1586 static unsigned
si_get_dmabuf_modifier_planes(struct pipe_screen * pscreen,uint64_t modifier,enum pipe_format format)1587 si_get_dmabuf_modifier_planes(struct pipe_screen *pscreen, uint64_t modifier,
1588                              enum pipe_format format)
1589 {
1590    unsigned planes = util_format_get_num_planes(format);
1591 
1592    if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) < AMD_FMT_MOD_TILE_VER_GFX12) {
1593       if (IS_AMD_FMT_MOD(modifier) && planes == 1) {
1594          if (AMD_FMT_MOD_GET(DCC_RETILE, modifier))
1595             return 3;
1596          else if (AMD_FMT_MOD_GET(DCC, modifier))
1597             return 2;
1598          else
1599             return 1;
1600       }
1601    }
1602 
1603    return planes;
1604 }
1605 
1606 static bool
si_modifier_supports_resource(struct pipe_screen * screen,uint64_t modifier,const struct pipe_resource * templ)1607 si_modifier_supports_resource(struct pipe_screen *screen,
1608                               uint64_t modifier,
1609                               const struct pipe_resource *templ)
1610 {
1611    struct si_screen *sscreen = (struct si_screen *)screen;
1612    uint32_t max_width, max_height;
1613 
1614    if (((templ->bind & PIPE_BIND_LINEAR) || sscreen->debug_flags & DBG(NO_TILING)) &&
1615        modifier != DRM_FORMAT_MOD_LINEAR)
1616       return false;
1617 
1618    /* Protected content doesn't support DCC on GFX12. */
1619    if (sscreen->info.gfx_level >= GFX12 && templ->bind & PIPE_BIND_PROTECTED &&
1620        IS_AMD_FMT_MOD(modifier) &&
1621        AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12 &&
1622        AMD_FMT_MOD_GET(DCC, modifier))
1623       return false;
1624 
1625    ac_modifier_max_extent(&sscreen->info, modifier, &max_width, &max_height);
1626    return templ->width0 <= max_width && templ->height0 <= max_height;
1627 }
1628 
1629 static struct pipe_resource *
si_texture_create_with_modifiers(struct pipe_screen * screen,const struct pipe_resource * templ,const uint64_t * modifiers,int modifier_count)1630 si_texture_create_with_modifiers(struct pipe_screen *screen,
1631                                  const struct pipe_resource *templ,
1632                                  const uint64_t *modifiers,
1633                                  int modifier_count)
1634 {
1635    /* Buffers with modifiers make zero sense. */
1636    assert(templ->target != PIPE_BUFFER);
1637 
1638    /* Select modifier. */
1639    int allowed_mod_count;
1640    si_query_dmabuf_modifiers(screen, templ->format, 0, NULL, NULL, &allowed_mod_count);
1641 
1642    uint64_t *allowed_modifiers = (uint64_t *)calloc(allowed_mod_count, sizeof(uint64_t));
1643    if (!allowed_modifiers) {
1644       return NULL;
1645    }
1646 
1647    /* This does not take external_only into account. We assume it is the same for all modifiers. */
1648    si_query_dmabuf_modifiers(screen, templ->format, allowed_mod_count, allowed_modifiers, NULL, &allowed_mod_count);
1649 
1650    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1651 
1652    /* Try to find the first allowed modifier that is in the application provided
1653     * list. We assume that the allowed modifiers are ordered in descending
1654     * preference in the list provided by si_query_dmabuf_modifiers. */
1655    for (int i = 0; i < allowed_mod_count; ++i) {
1656       bool found = false;
1657       for (int j = 0; j < modifier_count && !found; ++j)
1658          if (modifiers[j] == allowed_modifiers[i] && si_modifier_supports_resource(screen, modifiers[j], templ))
1659             found = true;
1660 
1661       if (found) {
1662          modifier = allowed_modifiers[i];
1663          break;
1664       }
1665    }
1666 
1667    free(allowed_modifiers);
1668 
1669    if (modifier == DRM_FORMAT_MOD_INVALID) {
1670       return NULL;
1671    }
1672    return si_texture_create_with_modifier(screen, templ, modifier);
1673 }
1674 
si_texture_is_aux_plane(const struct pipe_resource * resource)1675 static bool si_texture_is_aux_plane(const struct pipe_resource *resource)
1676 {
1677    return resource->flags & SI_RESOURCE_AUX_PLANE;
1678 }
1679 
si_texture_from_winsys_buffer(struct si_screen * sscreen,const struct pipe_resource * templ,struct pb_buffer_lean * buf,unsigned stride,uint64_t offset,uint64_t modifier,unsigned usage,bool dedicated)1680 static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
1681                                                            const struct pipe_resource *templ,
1682                                                            struct pb_buffer_lean *buf, unsigned stride,
1683                                                            uint64_t offset, uint64_t modifier,
1684                                                            unsigned usage, bool dedicated)
1685 {
1686    struct radeon_surf surface = {};
1687    struct radeon_bo_metadata metadata = {};
1688    struct si_texture *tex;
1689    int r;
1690 
1691    /* Ignore metadata for non-zero planes. */
1692    if (offset != 0)
1693       dedicated = false;
1694 
1695    if (dedicated) {
1696       sscreen->ws->buffer_get_metadata(sscreen->ws, buf, &metadata, &surface);
1697    } else {
1698       /**
1699        * The bo metadata is unset for un-dedicated images. So we fall
1700        * back to linear. See answer to question 5 of the
1701        * VK_KHX_external_memory spec for some details.
1702        *
1703        * It is possible that this case isn't going to work if the
1704        * surface pitch isn't correctly aligned by default.
1705        *
1706        * In order to support it correctly we require multi-image
1707        * metadata to be synchronized between radv and radeonsi. The
1708        * semantics of associating multiple image metadata to a memory
1709        * object on the vulkan export side are not concretely defined
1710        * either.
1711        *
1712        * All the use cases we are aware of at the moment for memory
1713        * objects use dedicated allocations. So lets keep the initial
1714        * implementation simple.
1715        *
1716        * A possible alternative is to attempt to reconstruct the
1717        * tiling information when the TexParameter TEXTURE_TILING_EXT
1718        * is set.
1719        */
1720       metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1721    }
1722 
1723    r = si_init_surface(sscreen, &surface, templ, metadata.mode, modifier, true,
1724                        surface.flags & RADEON_SURF_SCANOUT, false, false);
1725    if (r)
1726       return NULL;
1727 
1728    /* This is a hack to skip alignment checking for 3D textures */
1729    if (templ->target == PIPE_TEXTURE_3D)
1730       stride = 0;
1731 
1732    tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
1733                                   offset, stride, 0, 0);
1734    if (!tex)
1735       return NULL;
1736 
1737    tex->buffer.b.is_shared = true;
1738    tex->buffer.external_usage = usage;
1739    tex->num_planes = 1;
1740    if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
1741       tex->buffer.b.b.bind |= PIPE_BIND_PROTECTED;
1742 
1743    /* Account for multiple planes with lowered yuv import. */
1744    struct pipe_resource *next_plane = tex->buffer.b.b.next;
1745    while (next_plane && !si_texture_is_aux_plane(next_plane)) {
1746       struct si_texture *next_tex = (struct si_texture *)next_plane;
1747       ++next_tex->num_planes;
1748       ++tex->num_planes;
1749       next_plane = next_plane->next;
1750    }
1751 
1752    unsigned nplanes = ac_surface_get_nplanes(&tex->surface);
1753    unsigned plane = 1;
1754    while (next_plane) {
1755       struct si_auxiliary_texture *ptex = (struct si_auxiliary_texture *)next_plane;
1756       if (plane >= nplanes || ptex->buffer != tex->buffer.buf ||
1757           ptex->offset != ac_surface_get_plane_offset(sscreen->info.gfx_level,
1758                                                       &tex->surface, plane, 0) ||
1759           ptex->stride != ac_surface_get_plane_stride(sscreen->info.gfx_level,
1760                                                       &tex->surface, plane, 0)) {
1761          si_texture_reference(&tex, NULL);
1762          return NULL;
1763       }
1764       ++plane;
1765       next_plane = next_plane->next;
1766    }
1767 
1768    if (plane != nplanes && tex->num_planes == 1) {
1769       si_texture_reference(&tex, NULL);
1770       return NULL;
1771    }
1772 
1773    if (!ac_surface_apply_umd_metadata(&sscreen->info, &tex->surface,
1774                                       tex->buffer.b.b.nr_storage_samples,
1775                                       tex->buffer.b.b.last_level + 1,
1776                                       metadata.size_metadata,
1777                                       metadata.metadata)) {
1778       si_texture_reference(&tex, NULL);
1779       return NULL;
1780    }
1781 
1782    if (ac_surface_get_plane_offset(sscreen->info.gfx_level, &tex->surface, 0, 0) +
1783         tex->surface.total_size > buf->size) {
1784       si_texture_reference(&tex, NULL);
1785       return NULL;
1786    }
1787 
1788    /* Displayable DCC requires an explicit flush. */
1789    if (dedicated && offset == 0 && !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
1790        si_displayable_dcc_needs_explicit_flush(tex)) {
1791       /* TODO: do we need to decompress DCC? */
1792       if (si_texture_discard_dcc(sscreen, tex)) {
1793          /* Update BO metadata after disabling DCC. */
1794          si_set_tex_bo_metadata(sscreen, tex);
1795       }
1796    }
1797 
1798    assert(tex->surface.tile_swizzle == 0);
1799    return &tex->buffer.b.b;
1800 }
1801 
si_texture_from_handle(struct pipe_screen * screen,const struct pipe_resource * templ,struct winsys_handle * whandle,unsigned usage)1802 static struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
1803                                                     const struct pipe_resource *templ,
1804                                                     struct winsys_handle *whandle, unsigned usage)
1805 {
1806    struct si_screen *sscreen = (struct si_screen *)screen;
1807    struct pb_buffer_lean *buf = NULL;
1808 
1809    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle,
1810                                          sscreen->info.max_alignment,
1811                                          templ->bind & PIPE_BIND_PRIME_BLIT_DST);
1812    if (!buf)
1813       return NULL;
1814 
1815    if (templ->target == PIPE_BUFFER)
1816       return si_buffer_from_winsys_buffer(screen, templ, buf, 0);
1817 
1818    if (whandle->plane >= util_format_get_num_planes(whandle->format)) {
1819       struct si_auxiliary_texture *tex = CALLOC_STRUCT_CL(si_auxiliary_texture);
1820       if (!tex)
1821          return NULL;
1822       tex->b.b = *templ;
1823       tex->b.b.flags |= SI_RESOURCE_AUX_PLANE;
1824       tex->stride = whandle->stride;
1825       tex->offset = whandle->offset;
1826       tex->buffer = buf;
1827       pipe_reference_init(&tex->b.b.reference, 1);
1828       tex->b.b.screen = screen;
1829 
1830       return &tex->b.b;
1831    }
1832 
1833    return si_texture_from_winsys_buffer(sscreen, templ, buf, whandle->stride, whandle->offset,
1834                                         whandle->modifier, usage, true);
1835 }
1836 
si_init_flushed_depth_texture(struct pipe_context * ctx,struct pipe_resource * texture)1837 bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture)
1838 {
1839    struct si_texture *tex = (struct si_texture *)texture;
1840    struct pipe_resource resource;
1841    enum pipe_format pipe_format = texture->format;
1842 
1843    assert(!tex->flushed_depth_texture);
1844 
1845    if (!tex->can_sample_z && tex->can_sample_s) {
1846       switch (pipe_format) {
1847       case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1848          /* Save memory by not allocating the S plane. */
1849          pipe_format = PIPE_FORMAT_Z32_FLOAT;
1850          break;
1851       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1852       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1853          /* Save memory bandwidth by not copying the
1854           * stencil part during flush.
1855           *
1856           * This potentially increases memory bandwidth
1857           * if an application uses both Z and S texturing
1858           * simultaneously (a flushed Z24S8 texture
1859           * would be stored compactly), but how often
1860           * does that really happen?
1861           */
1862          pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1863          break;
1864       default:;
1865       }
1866    } else if (!tex->can_sample_s && tex->can_sample_z) {
1867       assert(util_format_has_stencil(util_format_description(pipe_format)));
1868 
1869       /* DB->CB copies to an 8bpp surface don't work. */
1870       pipe_format = PIPE_FORMAT_X24S8_UINT;
1871    }
1872 
1873    memset(&resource, 0, sizeof(resource));
1874    resource.target = texture->target;
1875    resource.format = pipe_format;
1876    resource.width0 = texture->width0;
1877    resource.height0 = texture->height0;
1878    resource.depth0 = texture->depth0;
1879    resource.array_size = texture->array_size;
1880    resource.last_level = texture->last_level;
1881    resource.nr_samples = texture->nr_samples;
1882    resource.nr_storage_samples = texture->nr_storage_samples;
1883    resource.usage = PIPE_USAGE_DEFAULT;
1884    resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1885    resource.flags = texture->flags | SI_RESOURCE_FLAG_FLUSHED_DEPTH;
1886 
1887    tex->flushed_depth_texture =
1888       (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1889    if (!tex->flushed_depth_texture) {
1890       PRINT_ERR("failed to create temporary texture to hold flushed depth\n");
1891       return false;
1892    }
1893    return true;
1894 }
1895 
1896 /**
1897  * Initialize the pipe_resource descriptor to be of the same size as the box,
1898  * which is supposed to hold a subregion of the texture "orig" at the given
1899  * mipmap level.
1900  */
si_init_temp_resource_from_box(struct pipe_resource * res,struct pipe_resource * orig,const struct pipe_box * box,unsigned level,unsigned usage,unsigned flags)1901 static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig,
1902                                            const struct pipe_box *box, unsigned level,
1903                                            unsigned usage, unsigned flags)
1904 {
1905    struct si_texture *tex = (struct si_texture *)orig;
1906    enum pipe_format orig_format = tex->multi_plane_format != PIPE_FORMAT_NONE ?
1907       tex->multi_plane_format : orig->format;
1908 
1909    memset(res, 0, sizeof(*res));
1910    res->format = orig_format;
1911    res->width0 = box->width;
1912    res->height0 = box->height;
1913    res->depth0 = 1;
1914    res->array_size = 1;
1915    res->usage = usage;
1916    res->flags = flags;
1917 
1918    if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig_format)) {
1919       /* Transfer resources are allocated with linear tiling, which is
1920        * not supported for compressed formats.
1921        */
1922       unsigned blocksize = util_format_get_blocksize(orig_format);
1923 
1924       if (blocksize == 8) {
1925          res->format = PIPE_FORMAT_R16G16B16A16_UINT;
1926       } else {
1927          assert(blocksize == 16);
1928          res->format = PIPE_FORMAT_R32G32B32A32_UINT;
1929       }
1930 
1931       res->width0 = util_format_get_nblocksx(orig_format, box->width);
1932       res->height0 = util_format_get_nblocksy(orig_format, box->height);
1933    }
1934 
1935    /* We must set the correct texture target and dimensions for a 3D box. */
1936    if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1937       res->target = PIPE_TEXTURE_2D_ARRAY;
1938       res->array_size = box->depth;
1939    } else {
1940       res->target = PIPE_TEXTURE_2D;
1941    }
1942 }
1943 
si_can_invalidate_texture(struct si_screen * sscreen,struct si_texture * tex,unsigned transfer_usage,const struct pipe_box * box)1944 static bool si_can_invalidate_texture(struct si_screen *sscreen, struct si_texture *tex,
1945                                       unsigned transfer_usage, const struct pipe_box *box)
1946 {
1947    return !tex->buffer.b.is_shared && !(tex->surface.flags & RADEON_SURF_IMPORTED) &&
1948           !(transfer_usage & PIPE_MAP_READ) && tex->buffer.b.b.last_level == 0 &&
1949           util_texrange_covers_whole_level(&tex->buffer.b.b, 0, box->x, box->y, box->z, box->width,
1950                                            box->height, box->depth);
1951 }
1952 
si_texture_invalidate_storage(struct si_context * sctx,struct si_texture * tex)1953 static void si_texture_invalidate_storage(struct si_context *sctx, struct si_texture *tex)
1954 {
1955    struct si_screen *sscreen = sctx->screen;
1956 
1957    /* There is no point in discarding depth and tiled buffers. */
1958    assert(!tex->is_depth);
1959    assert(tex->surface.is_linear);
1960 
1961    /* Reallocate the buffer in the same pipe_resource. */
1962    si_alloc_resource(sscreen, &tex->buffer);
1963 
1964    /* Initialize the CMASK base address (needed even without CMASK). */
1965    tex->cmask_base_address_reg = (tex->buffer.gpu_address + tex->surface.cmask_offset) >> 8;
1966 
1967    p_atomic_inc(&sscreen->dirty_tex_counter);
1968 
1969    sctx->num_alloc_tex_transfer_bytes += tex->surface.total_size;
1970 }
1971 
si_texture_transfer_map(struct pipe_context * ctx,struct pipe_resource * texture,unsigned level,unsigned usage,const struct pipe_box * box,struct pipe_transfer ** ptransfer)1972 static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resource *texture,
1973                                      unsigned level, unsigned usage, const struct pipe_box *box,
1974                                      struct pipe_transfer **ptransfer)
1975 {
1976    struct si_context *sctx = (struct si_context *)ctx;
1977    struct si_texture *tex = (struct si_texture *)texture;
1978    struct si_transfer *trans;
1979    struct si_resource *buf;
1980    uint64_t offset = 0;
1981    char *map;
1982    bool use_staging_texture = tex->buffer.flags & RADEON_FLAG_ENCRYPTED;
1983    unsigned real_level = texture->nr_samples > 1 ? 0 : level;
1984 
1985    assert(texture->target != PIPE_BUFFER);
1986    assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
1987    assert(box->width && box->height && box->depth);
1988 
1989    if (tex->buffer.b.b.flags & SI_RESOURCE_AUX_PLANE)
1990       return NULL;
1991 
1992    if ((tex->buffer.flags & RADEON_FLAG_ENCRYPTED) && usage & PIPE_MAP_READ)
1993       return NULL;
1994 
1995    if (tex->is_depth || tex->buffer.flags & RADEON_FLAG_SPARSE) {
1996       /* Depth and sparse textures use staging unconditionally. */
1997       use_staging_texture = true;
1998    } else {
1999       /* Degrade the tile mode if we get too many transfers on APUs.
2000        * On dGPUs, the staging texture is always faster.
2001        * Only count uploads that are at least 4x4 pixels large.
2002        */
2003       if (!sctx->screen->info.has_dedicated_vram && real_level == 0 && box->width >= 4 &&
2004           box->height >= 4 && p_atomic_inc_return(&tex->num_level0_transfers) == 10) {
2005          bool can_invalidate = si_can_invalidate_texture(sctx->screen, tex, usage, box);
2006 
2007          si_reallocate_texture_inplace(sctx, tex, PIPE_BIND_LINEAR, can_invalidate);
2008       }
2009 
2010       /* Tiled textures need to be converted into a linear texture for CPU
2011        * access. The staging texture is always linear and is placed in GART.
2012        *
2013        * dGPU use a staging texture for VRAM, so that we don't map it and
2014        * don't relocate it to GTT.
2015        *
2016        * Reading from VRAM or GTT WC is slow, always use the staging
2017        * texture in this case.
2018        *
2019        * Use the staging texture for uploads if the underlying BO
2020        * is busy.
2021        */
2022       if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) ||
2023           (tex->buffer.domains & RADEON_DOMAIN_VRAM && sctx->screen->info.has_dedicated_vram))
2024          use_staging_texture = true;
2025       else if (usage & PIPE_MAP_READ)
2026          use_staging_texture =
2027             tex->buffer.domains & RADEON_DOMAIN_VRAM || tex->buffer.flags & RADEON_FLAG_GTT_WC;
2028       /* Write & linear only: */
2029       else if (si_cs_is_buffer_referenced(sctx, tex->buffer.buf, RADEON_USAGE_READWRITE) ||
2030                !sctx->ws->buffer_wait(sctx->ws, tex->buffer.buf, 0, RADEON_USAGE_READWRITE)) {
2031          /* It's busy. */
2032          if (si_can_invalidate_texture(sctx->screen, tex, usage, box))
2033             si_texture_invalidate_storage(sctx, tex);
2034          else
2035             use_staging_texture = true;
2036       }
2037    }
2038 
2039    trans = CALLOC_STRUCT(si_transfer);
2040    if (!trans)
2041       return NULL;
2042    pipe_resource_reference(&trans->b.b.resource, texture);
2043    trans->b.b.level = level;
2044    trans->b.b.usage = usage;
2045    trans->b.b.box = *box;
2046 
2047    if (use_staging_texture) {
2048       struct pipe_resource resource;
2049       struct si_texture *staging;
2050       unsigned bo_usage = usage & PIPE_MAP_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
2051       unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR | SI_RESOURCE_FLAG_DRIVER_INTERNAL;
2052 
2053       si_init_temp_resource_from_box(&resource, texture, box, real_level, bo_usage,
2054                                      bo_flags);
2055 
2056       /* Since depth-stencil textures don't support linear tiling,
2057        * blit from ZS to color and vice versa. u_blitter will do
2058        * the packing for these formats.
2059        */
2060       if (tex->is_depth)
2061          resource.format = util_blitter_get_color_format_for_zs(resource.format);
2062 
2063       /* Create the temporary texture. */
2064       staging = (struct si_texture *)ctx->screen->resource_create(ctx->screen, &resource);
2065       if (!staging) {
2066          PRINT_ERR("failed to create temporary texture to hold untiled copy\n");
2067          goto fail_trans;
2068       }
2069       trans->staging = &staging->buffer;
2070 
2071       /* Just get the strides. */
2072       si_texture_get_offset(sctx->screen, staging, 0, NULL, &trans->b.b.stride,
2073                             &trans->b.b.layer_stride);
2074 
2075       if (usage & PIPE_MAP_READ)
2076          si_copy_to_staging_texture(ctx, trans);
2077       else
2078          usage |= PIPE_MAP_UNSYNCHRONIZED;
2079 
2080       buf = trans->staging;
2081    } else {
2082       /* the resource is mapped directly */
2083       offset = si_texture_get_offset(sctx->screen, tex, real_level, box, &trans->b.b.stride,
2084                                      &trans->b.b.layer_stride);
2085       buf = &tex->buffer;
2086    }
2087 
2088    /* Always unmap texture CPU mappings on 32-bit architectures, so that
2089     * we don't run out of the CPU address space.
2090     */
2091    if (sizeof(void *) == 4)
2092       usage |= RADEON_MAP_TEMPORARY;
2093 
2094    if (!(map = si_buffer_map(sctx, buf, usage)))
2095       goto fail_trans;
2096 
2097    *ptransfer = &trans->b.b;
2098    return map + offset;
2099 
2100 fail_trans:
2101    si_resource_reference(&trans->staging, NULL);
2102    pipe_resource_reference(&trans->b.b.resource, NULL);
2103    FREE(trans);
2104    return NULL;
2105 }
2106 
si_texture_transfer_unmap(struct pipe_context * ctx,struct pipe_transfer * transfer)2107 static void si_texture_transfer_unmap(struct pipe_context *ctx, struct pipe_transfer *transfer)
2108 {
2109    struct si_context *sctx = (struct si_context *)ctx;
2110    struct si_transfer *stransfer = (struct si_transfer *)transfer;
2111    struct pipe_resource *texture = transfer->resource;
2112    struct si_texture *tex = (struct si_texture *)texture;
2113 
2114    /* Always unmap texture CPU mappings on 32-bit architectures, so that
2115     * we don't run out of the CPU address space.
2116     */
2117    if (sizeof(void *) == 4) {
2118       struct si_resource *buf = stransfer->staging ? stransfer->staging : &tex->buffer;
2119 
2120       sctx->ws->buffer_unmap(sctx->ws, buf->buf);
2121    }
2122 
2123    if ((transfer->usage & PIPE_MAP_WRITE) && stransfer->staging)
2124       si_copy_from_staging_texture(ctx, stransfer);
2125 
2126    if (stransfer->staging) {
2127       sctx->num_alloc_tex_transfer_bytes += stransfer->staging->buf->size;
2128       si_resource_reference(&stransfer->staging, NULL);
2129    }
2130 
2131    /* Heuristic for {upload, draw, upload, draw, ..}:
2132     *
2133     * Flush the gfx IB if we've allocated too much texture storage.
2134     *
2135     * The idea is that we don't want to build IBs that use too much
2136     * memory and put pressure on the kernel memory manager and we also
2137     * want to make temporary and invalidated buffers go idle ASAP to
2138     * decrease the total memory usage or make them reusable. The memory
2139     * usage will be slightly higher than given here because of the buffer
2140     * cache in the winsys.
2141     *
2142     * The result is that the kernel memory manager is never a bottleneck.
2143     */
2144    if (sctx->num_alloc_tex_transfer_bytes > (uint64_t)sctx->screen->info.gart_size_kb * 1024 / 4) {
2145       si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
2146       sctx->num_alloc_tex_transfer_bytes = 0;
2147    }
2148 
2149    pipe_resource_reference(&transfer->resource, NULL);
2150    FREE(transfer);
2151 }
2152 
2153 /* Return if it's allowed to reinterpret one format as another with DCC enabled.
2154  */
vi_dcc_formats_compatible(struct si_screen * sscreen,enum pipe_format format1,enum pipe_format format2)2155 bool vi_dcc_formats_compatible(struct si_screen *sscreen, enum pipe_format format1,
2156                                enum pipe_format format2)
2157 {
2158    const struct util_format_description *desc1, *desc2;
2159 
2160    /* All formats are compatible on GFX11. */
2161    if (sscreen->info.gfx_level >= GFX11)
2162       return true;
2163 
2164    /* No format change - exit early. */
2165    if (format1 == format2)
2166       return true;
2167 
2168    format1 = ac_simplify_cb_format(format1);
2169    format2 = ac_simplify_cb_format(format2);
2170 
2171    /* Check again after format adjustments. */
2172    if (format1 == format2)
2173       return true;
2174 
2175    desc1 = util_format_description(format1);
2176    desc2 = util_format_description(format2);
2177 
2178    if (desc1->layout != UTIL_FORMAT_LAYOUT_PLAIN || desc2->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2179       return false;
2180 
2181    /* Float and non-float are totally incompatible. */
2182    if ((desc1->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) !=
2183        (desc2->channel[0].type == UTIL_FORMAT_TYPE_FLOAT))
2184       return false;
2185 
2186    /* Channel sizes must match across DCC formats.
2187     * Comparing just the first 2 channels should be enough.
2188     */
2189    if (desc1->channel[0].size != desc2->channel[0].size ||
2190        (desc1->nr_channels >= 2 && desc1->channel[1].size != desc2->channel[1].size))
2191       return false;
2192 
2193    /* Everything below is not needed if the driver never uses the DCC
2194     * clear code with the value of 1.
2195     */
2196 
2197    /* If the clear values are all 1 or all 0, this constraint can be
2198     * ignored. */
2199    if (ac_alpha_is_on_msb(&sscreen->info, format1) != ac_alpha_is_on_msb(&sscreen->info, format2))
2200       return false;
2201 
2202    /* Channel types must match if the clear value of 1 is used.
2203     * The type categories are only float, signed, unsigned.
2204     * NORM and INT are always compatible.
2205     */
2206    if (desc1->channel[0].type != desc2->channel[0].type ||
2207        (desc1->nr_channels >= 2 && desc1->channel[1].type != desc2->channel[1].type))
2208       return false;
2209 
2210    return true;
2211 }
2212 
vi_dcc_formats_are_incompatible(struct pipe_resource * tex,unsigned level,enum pipe_format view_format)2213 bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level,
2214                                      enum pipe_format view_format)
2215 {
2216    struct si_texture *stex = (struct si_texture *)tex;
2217 
2218    return vi_dcc_enabled(stex, level) &&
2219           !vi_dcc_formats_compatible(si_screen(tex->screen), tex->format, view_format);
2220 }
2221 
2222 /* This can't be merged with the above function, because
2223  * vi_dcc_formats_compatible should be called only when DCC is enabled. */
vi_disable_dcc_if_incompatible_format(struct si_context * sctx,struct pipe_resource * tex,unsigned level,enum pipe_format view_format)2224 void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, struct pipe_resource *tex,
2225                                            unsigned level, enum pipe_format view_format)
2226 {
2227    struct si_texture *stex = (struct si_texture *)tex;
2228 
2229    if (vi_dcc_formats_are_incompatible(tex, level, view_format))
2230       if (!si_texture_disable_dcc(sctx, stex))
2231          si_decompress_dcc(sctx, stex);
2232 }
2233 
si_create_surface(struct pipe_context * pipe,struct pipe_resource * tex,const struct pipe_surface * templ)2234 static struct pipe_surface *si_create_surface(struct pipe_context *pipe, struct pipe_resource *tex,
2235                                               const struct pipe_surface *templ)
2236 {
2237    unsigned level = templ->u.tex.level;
2238    unsigned width = u_minify(tex->width0, level);
2239    unsigned height = u_minify(tex->height0, level);
2240    unsigned width0 = tex->width0;
2241    unsigned height0 = tex->height0;
2242 
2243    if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
2244       const struct util_format_description *tex_desc = util_format_description(tex->format);
2245       const struct util_format_description *templ_desc = util_format_description(templ->format);
2246 
2247       assert(tex_desc->block.bits == templ_desc->block.bits);
2248 
2249       /* Adjust size of surface if and only if the block width or
2250        * height is changed. */
2251       if (tex_desc->block.width != templ_desc->block.width ||
2252           tex_desc->block.height != templ_desc->block.height) {
2253          unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
2254          unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
2255 
2256          width = nblks_x * templ_desc->block.width;
2257          height = nblks_y * templ_desc->block.height;
2258 
2259          width0 = util_format_get_nblocksx(tex->format, width0);
2260          height0 = util_format_get_nblocksy(tex->format, height0);
2261       }
2262    }
2263 
2264    struct si_surface *surface = CALLOC_STRUCT(si_surface);
2265 
2266    if (!surface)
2267       return NULL;
2268 
2269    assert(templ->u.tex.first_layer <= util_max_layer(tex, templ->u.tex.level));
2270    assert(templ->u.tex.last_layer <= util_max_layer(tex, templ->u.tex.level));
2271 
2272    pipe_reference_init(&surface->base.reference, 1);
2273    pipe_resource_reference(&surface->base.texture, tex);
2274    surface->base.context = pipe;
2275    surface->base.format = templ->format;
2276    surface->base.width = width;
2277    surface->base.height = height;
2278    surface->base.u = templ->u;
2279 
2280    surface->width0 = width0;
2281    surface->height0 = height0;
2282 
2283    surface->dcc_incompatible =
2284       tex->target != PIPE_BUFFER &&
2285       vi_dcc_formats_are_incompatible(tex, templ->u.tex.level, templ->format);
2286    return &surface->base;
2287 }
2288 
si_surface_destroy(struct pipe_context * pipe,struct pipe_surface * surface)2289 static void si_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surface)
2290 {
2291    pipe_resource_reference(&surface->texture, NULL);
2292    FREE(surface);
2293 }
2294 
2295 static struct pipe_memory_object *
si_memobj_from_handle(struct pipe_screen * screen,struct winsys_handle * whandle,bool dedicated)2296 si_memobj_from_handle(struct pipe_screen *screen, struct winsys_handle *whandle, bool dedicated)
2297 {
2298    struct si_screen *sscreen = (struct si_screen *)screen;
2299    struct si_memory_object *memobj = CALLOC_STRUCT(si_memory_object);
2300    struct pb_buffer_lean *buf = NULL;
2301 
2302    if (!memobj)
2303       return NULL;
2304 
2305    buf = sscreen->ws->buffer_from_handle(sscreen->ws, whandle, sscreen->info.max_alignment, false);
2306    if (!buf) {
2307       free(memobj);
2308       return NULL;
2309    }
2310 
2311    memobj->b.dedicated = dedicated;
2312    memobj->buf = buf;
2313    memobj->stride = whandle->stride;
2314 
2315    return (struct pipe_memory_object *)memobj;
2316 }
2317 
si_memobj_destroy(struct pipe_screen * screen,struct pipe_memory_object * _memobj)2318 static void si_memobj_destroy(struct pipe_screen *screen, struct pipe_memory_object *_memobj)
2319 {
2320    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2321 
2322    radeon_bo_reference(((struct si_screen*)screen)->ws, &memobj->buf, NULL);
2323    free(memobj);
2324 }
2325 
si_resource_from_memobj(struct pipe_screen * screen,const struct pipe_resource * templ,struct pipe_memory_object * _memobj,uint64_t offset)2326 static struct pipe_resource *si_resource_from_memobj(struct pipe_screen *screen,
2327                                                     const struct pipe_resource *templ,
2328                                                     struct pipe_memory_object *_memobj,
2329                                                     uint64_t offset)
2330 {
2331    struct si_screen *sscreen = (struct si_screen *)screen;
2332    struct si_memory_object *memobj = (struct si_memory_object *)_memobj;
2333    struct pipe_resource *res;
2334 
2335    if (templ->target == PIPE_BUFFER)
2336       res = si_buffer_from_winsys_buffer(screen, templ, memobj->buf, offset);
2337    else
2338       res = si_texture_from_winsys_buffer(sscreen, templ, memobj->buf,
2339                                           memobj->stride,
2340                                           offset, DRM_FORMAT_MOD_INVALID,
2341                                           PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE | PIPE_HANDLE_USAGE_SHADER_WRITE,
2342                                           memobj->b.dedicated);
2343 
2344    if (!res)
2345       return NULL;
2346 
2347    /* si_texture_from_winsys_buffer doesn't increment refcount of
2348     * memobj->buf, so increment it here.
2349     */
2350    struct pb_buffer_lean *buf = NULL;
2351    radeon_bo_reference(sscreen->ws, &buf, memobj->buf);
2352    return res;
2353 }
2354 
si_check_resource_capability(struct pipe_screen * screen,struct pipe_resource * resource,unsigned bind)2355 static bool si_check_resource_capability(struct pipe_screen *screen, struct pipe_resource *resource,
2356                                          unsigned bind)
2357 {
2358    struct si_texture *tex = (struct si_texture *)resource;
2359 
2360    /* Buffers only support the linear flag. */
2361    if (resource->target == PIPE_BUFFER)
2362       return (bind & ~PIPE_BIND_LINEAR) == 0;
2363 
2364    if (bind & PIPE_BIND_LINEAR && !tex->surface.is_linear)
2365       return false;
2366 
2367    if (bind & PIPE_BIND_SCANOUT && !tex->surface.is_displayable)
2368       return false;
2369 
2370    /* TODO: PIPE_BIND_CURSOR - do we care? */
2371    return true;
2372 }
2373 
si_get_sparse_texture_virtual_page_size(struct pipe_screen * screen,enum pipe_texture_target target,bool multi_sample,enum pipe_format format,unsigned offset,unsigned size,int * x,int * y,int * z)2374 static int si_get_sparse_texture_virtual_page_size(struct pipe_screen *screen,
2375                                                    enum pipe_texture_target target,
2376                                                    bool multi_sample,
2377                                                    enum pipe_format format,
2378                                                    unsigned offset, unsigned size,
2379                                                    int *x, int *y, int *z)
2380 {
2381    struct si_screen *sscreen = (struct si_screen *)screen;
2382 
2383    /* Only support one type of page size. */
2384    if (offset != 0)
2385       return 0;
2386 
2387    static const int page_size_2d[][3] = {
2388       { 256, 256, 1 }, /* 8bpp   */
2389       { 256, 128, 1 }, /* 16bpp  */
2390       { 128, 128, 1 }, /* 32bpp  */
2391       { 128, 64,  1 }, /* 64bpp  */
2392       { 64,  64,  1 }, /* 128bpp */
2393    };
2394    static const int page_size_3d[][3] = {
2395       { 64,  32,  32 }, /* 8bpp   */
2396       { 32,  32,  32 }, /* 16bpp  */
2397       { 32,  32,  16 }, /* 32bpp  */
2398       { 32,  16,  16 }, /* 64bpp  */
2399       { 16,  16,  16 }, /* 128bpp */
2400    };
2401 
2402    const int (*page_sizes)[3];
2403 
2404    /* Supported targets. */
2405    switch (target) {
2406    case PIPE_TEXTURE_2D:
2407    case PIPE_TEXTURE_CUBE:
2408    case PIPE_TEXTURE_RECT:
2409    case PIPE_TEXTURE_2D_ARRAY:
2410    case PIPE_TEXTURE_CUBE_ARRAY:
2411       page_sizes = page_size_2d;
2412       break;
2413    case PIPE_TEXTURE_3D:
2414       page_sizes = page_size_3d;
2415       break;
2416    default:
2417       return 0;
2418    }
2419 
2420    /* ARB_sparse_texture2 need to query supported virtual page x/y/z without
2421     * knowing the actual sample count. So we need to return a fixed virtual page
2422     * x/y/z for all sample count which means the virtual page size can not be fixed
2423     * to 64KB.
2424     *
2425     * Only enabled for GFX9. GFX10+ removed MS texture support. By specification
2426     * ARB_sparse_texture2 need MS texture support, but we relax it by just return
2427     * no page size for GFX10+ to keep shader query capbility.
2428     */
2429    if (multi_sample && sscreen->info.gfx_level != GFX9)
2430       return 0;
2431 
2432    /* Unsupported formats. */
2433    /* TODO: support these formats. */
2434    if (util_format_is_depth_or_stencil(format) ||
2435        util_format_get_num_planes(format) > 1 ||
2436        util_format_is_compressed(format))
2437       return 0;
2438 
2439    int blk_size = util_format_get_blocksize(format);
2440    /* We don't support any non-power-of-two bpp formats, so
2441     * pipe_screen->is_format_supported() should already filter out these formats.
2442     */
2443    assert(util_is_power_of_two_nonzero(blk_size));
2444 
2445    if (size) {
2446       unsigned index = util_logbase2(blk_size);
2447       if (x) *x = page_sizes[index][0];
2448       if (y) *y = page_sizes[index][1];
2449       if (z) *z = page_sizes[index][2];
2450    }
2451 
2452    return 1;
2453 }
2454 
si_init_screen_texture_functions(struct si_screen * sscreen)2455 void si_init_screen_texture_functions(struct si_screen *sscreen)
2456 {
2457    sscreen->b.resource_from_handle = si_texture_from_handle;
2458    sscreen->b.resource_get_handle = si_texture_get_handle;
2459    sscreen->b.resource_get_param = si_resource_get_param;
2460    sscreen->b.resource_get_info = si_texture_get_info;
2461    sscreen->b.resource_from_memobj = si_resource_from_memobj;
2462    sscreen->b.memobj_create_from_handle = si_memobj_from_handle;
2463    sscreen->b.memobj_destroy = si_memobj_destroy;
2464    sscreen->b.check_resource_capability = si_check_resource_capability;
2465    sscreen->b.get_sparse_texture_virtual_page_size =
2466       si_get_sparse_texture_virtual_page_size;
2467 
2468    /* By not setting it the frontend will fall back to non-modifier create,
2469     * which works around some applications using modifiers that are not
2470     * allowed in combination with lack of error reporting in
2471     * gbm_dri_surface_create */
2472    if (sscreen->info.gfx_level >= GFX9 && sscreen->info.kernel_has_modifiers) {
2473       sscreen->b.resource_create_with_modifiers = si_texture_create_with_modifiers;
2474       sscreen->b.query_dmabuf_modifiers = si_query_dmabuf_modifiers;
2475       sscreen->b.is_dmabuf_modifier_supported = si_is_dmabuf_modifier_supported;
2476       sscreen->b.get_dmabuf_modifier_planes = si_get_dmabuf_modifier_planes;
2477    }
2478 }
2479 
si_init_context_texture_functions(struct si_context * sctx)2480 void si_init_context_texture_functions(struct si_context *sctx)
2481 {
2482    sctx->b.texture_map = si_texture_transfer_map;
2483    sctx->b.texture_unmap = si_texture_transfer_unmap;
2484    sctx->b.create_surface = si_create_surface;
2485    sctx->b.surface_destroy = si_surface_destroy;
2486 }
2487