xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_image.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 #include "pipe/p_state.h"
13 
14 #include "freedreno_resource.h"
15 #include "freedreno_state.h"
16 
17 #include "fd6_image.h"
18 #include "fd6_pack.h"
19 #include "fd6_resource.h"
20 #include "fd6_screen.h"
21 #include "fd6_texture.h"
22 
23 static const uint8_t swiz_identity[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
24                                          PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W};
25 
26 static uint64_t
rsc_iova(struct pipe_resource * prsc,unsigned offset)27 rsc_iova(struct pipe_resource *prsc, unsigned offset)
28 {
29    if (!prsc)
30       return 0;
31    return fd_bo_get_iova(fd_resource(prsc)->bo) + offset;
32 }
33 
34 static void
fd6_ssbo_descriptor(struct fd_context * ctx,const struct pipe_shader_buffer * buf,uint32_t * descriptor)35 fd6_ssbo_descriptor(struct fd_context *ctx,
36                     const struct pipe_shader_buffer *buf, uint32_t *descriptor)
37 {
38    fdl6_buffer_view_init(
39       descriptor,
40       PIPE_FORMAT_R32_UINT,
41       swiz_identity, rsc_iova(buf->buffer, buf->buffer_offset),
42       buf->buffer_size);
43 }
44 
45 static void
fd6_image_descriptor(struct fd_context * ctx,const struct pipe_image_view * buf,uint32_t * descriptor)46 fd6_image_descriptor(struct fd_context *ctx, const struct pipe_image_view *buf,
47                      uint32_t *descriptor)
48 {
49    if (buf->resource->target == PIPE_BUFFER) {
50       uint32_t size = fd_clamp_buffer_size(buf->format, buf->u.buf.size,
51                                            A4XX_MAX_TEXEL_BUFFER_ELEMENTS_UINT);
52 
53       fdl6_buffer_view_init(descriptor, buf->format, swiz_identity,
54                             rsc_iova(buf->resource, buf->u.buf.offset),
55                             size);
56    } else {
57       struct fdl_view_args args = {
58          .chip = ctx->screen->gen,
59 
60          .iova = rsc_iova(buf->resource, 0),
61 
62          .base_miplevel = buf->u.tex.level,
63          .level_count = 1,
64 
65          .base_array_layer = buf->u.tex.first_layer,
66          .layer_count = buf->u.tex.last_layer - buf->u.tex.first_layer + 1,
67 
68          .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z,
69                   PIPE_SWIZZLE_W},
70          .format = buf->format,
71 
72          .type = fdl_type_from_pipe_target(buf->resource->target),
73          .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
74                             FDL_CHROMA_LOCATION_COSITED_EVEN},
75          .ubwc_fc_mutable = false,
76       };
77 
78       /* fdl6_view makes the storage descriptor treat cubes like a 2D array (so
79        * you can reference a specific layer), but we need to do that for the
80        * texture descriptor as well to get our layer.
81        */
82       if (args.type == FDL_VIEW_TYPE_CUBE)
83          args.type = FDL_VIEW_TYPE_2D;
84 
85       struct fdl6_view view;
86       struct fd_resource *rsc = fd_resource(buf->resource);
87       const struct fdl_layout *layouts[3] = { &rsc->layout, NULL, NULL };
88       fdl6_view_init(&view, layouts, &args,
89                      ctx->screen->info->a6xx.has_z24uint_s8uint);
90 
91       memcpy(descriptor, view.storage_descriptor, sizeof(view.storage_descriptor));
92    }
93 }
94 
95 static struct fd6_descriptor_set *
descriptor_set(struct fd_context * ctx,enum pipe_shader_type shader)96 descriptor_set(struct fd_context *ctx, enum pipe_shader_type shader)
97    assert_dt
98 {
99    struct fd6_context *fd6_ctx = fd6_context(ctx);
100 
101    if (shader == PIPE_SHADER_COMPUTE)
102       return &fd6_ctx->cs_descriptor_set;
103 
104    unsigned idx = ir3_shader_descriptor_set(shader);
105    assert(idx < ARRAY_SIZE(fd6_ctx->descriptor_sets));
106    return &fd6_ctx->descriptor_sets[idx];
107 }
108 
109 static void
clear_descriptor(struct fd6_descriptor_set * set,unsigned slot)110 clear_descriptor(struct fd6_descriptor_set *set, unsigned slot)
111 {
112    /* The 2nd dword of the descriptor contains the width and height.
113     * so a non-zero value means the slot was previously valid and
114     * must be cleared.  We can't leave dangling descriptors as the
115     * shader could use variable indexing into the set of IBOs to
116     * get at them.  See piglit arb_shader_image_load_store-invalid.
117     */
118    if (!set->descriptor[slot][1])
119       return;
120 
121    fd6_descriptor_set_invalidate(set);
122 
123    memset(set->descriptor[slot], 0, sizeof(set->descriptor[slot]));
124 }
125 
126 static void
validate_image_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_image_view * img)127 validate_image_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
128                           unsigned slot, struct pipe_image_view *img)
129 {
130    struct fd_resource *rsc = fd_resource(img->resource);
131 
132    if (!rsc || (rsc->seqno == set->seqno[slot]))
133       return;
134 
135    fd6_descriptor_set_invalidate(set);
136 
137    fd6_image_descriptor(ctx, img, set->descriptor[slot]);
138    set->seqno[slot] = rsc->seqno;
139 }
140 
141 static void
validate_buffer_descriptor(struct fd_context * ctx,struct fd6_descriptor_set * set,unsigned slot,struct pipe_shader_buffer * buf)142 validate_buffer_descriptor(struct fd_context *ctx, struct fd6_descriptor_set *set,
143                            unsigned slot, struct pipe_shader_buffer *buf)
144 {
145    struct fd_resource *rsc = fd_resource(buf->buffer);
146 
147    if (!rsc || (rsc->seqno == set->seqno[slot]))
148       return;
149 
150    fd6_descriptor_set_invalidate(set);
151 
152    fd6_ssbo_descriptor(ctx, buf, set->descriptor[slot]);
153    set->seqno[slot] = rsc->seqno;
154 }
155 
156 /* Build bindless descriptor state, returns ownership of state reference */
157 template <chip CHIP>
158 struct fd_ringbuffer *
fd6_build_bindless_state(struct fd_context * ctx,enum pipe_shader_type shader,bool append_fb_read)159 fd6_build_bindless_state(struct fd_context *ctx, enum pipe_shader_type shader,
160                          bool append_fb_read)
161 {
162    struct fd_shaderbuf_stateobj *bufso = &ctx->shaderbuf[shader];
163    struct fd_shaderimg_stateobj *imgso = &ctx->shaderimg[shader];
164    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
165 
166    struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(
167       ctx->batch->submit, 16 * 4, FD_RINGBUFFER_STREAMING);
168 
169    /* Don't re-use a previous descriptor set if appending the
170     * fb-read descriptor, as that can change across batches.
171     * The normal descriptor slots are safe to re-use even if
172     * the state is dirtied due to batch flush, but the fb-read
173     * slot is not.
174     */
175    if (unlikely(append_fb_read))
176       fd6_descriptor_set_invalidate(set);
177 
178    /*
179     * Re-validate the descriptor slots, ie. in the case that
180     * the resource gets rebound due to use with non-UBWC
181     * compatible view format, etc.
182     *
183     * While we are at it, attach the BOs to the ring.
184     */
185 
186    u_foreach_bit (b, bufso->enabled_mask) {
187       struct pipe_shader_buffer *buf = &bufso->sb[b];
188       unsigned idx = b + IR3_BINDLESS_SSBO_OFFSET;
189       validate_buffer_descriptor(ctx, set, idx, buf);
190    }
191 
192    u_foreach_bit (b, imgso->enabled_mask) {
193       struct pipe_image_view *img = &imgso->si[b];
194       unsigned idx = b + IR3_BINDLESS_IMAGE_OFFSET;
195       validate_image_descriptor(ctx, set, idx, img);
196    }
197 
198    if (!set->bo) {
199       set->bo = fd_bo_new(
200             ctx->dev, sizeof(set->descriptor),
201             /* Use same flags as ringbuffer so hits the same heap,
202              * because those will already have the FD_RELOC_DUMP
203              * flag set:
204              */
205             FD_BO_GPUREADONLY | FD_BO_CACHED_COHERENT,
206             "%s bindless", _mesa_shader_stage_to_abbrev(shader));
207       fd_bo_mark_for_dump(set->bo);
208 
209       uint32_t *desc_buf = (uint32_t *)fd_bo_map(set->bo);
210 
211       memcpy(desc_buf, set->descriptor, sizeof(set->descriptor));
212 
213       if (unlikely(append_fb_read)) {
214          /* Reserve A6XX_MAX_RENDER_TARGETS image slots for fb-read */
215          unsigned idx = IR3_BINDLESS_DESC_COUNT - 1 - A6XX_MAX_RENDER_TARGETS;
216 
217          for (int i = 0; i < ctx->batch->framebuffer.nr_cbufs; i++) {
218             /* This is patched with the appropriate descriptor for GMEM or
219              * sysmem rendering path in fd6_gmem
220              */
221             struct fd_cs_patch patch = {
222                .cs = &desc_buf[(idx + i) * FDL6_TEX_CONST_DWORDS],
223                .val = i,
224             };
225             util_dynarray_append(&ctx->batch->fb_read_patches,
226                                  __typeof__(patch), patch);
227          }
228       }
229    }
230 
231    /*
232     * Build stateobj emitting reg writes to configure the descriptor
233     * set and CP_LOAD_STATE packets to preload the state.
234     *
235     * Note that unless the app is using the max # of SSBOs there will
236     * be a gap between the IBO descriptors used for SSBOs and for images,
237     * so emit this as two CP_LOAD_STATE packets:
238     */
239 
240    unsigned idx = ir3_shader_descriptor_set(shader);
241 
242    fd_ringbuffer_attach_bo(ring, set->bo);
243 
244    if (shader == PIPE_SHADER_COMPUTE) {
245       OUT_REG(ring,
246          HLSQ_INVALIDATE_CMD(
247             CHIP,
248             .cs_bindless = CHIP == A6XX ? 0x1f : 0xff,
249          )
250       );
251       OUT_REG(ring, SP_CS_BINDLESS_BASE_DESCRIPTOR(CHIP,
252             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
253       ));
254       OUT_REG(ring, A6XX_HLSQ_CS_BINDLESS_BASE_DESCRIPTOR(
255             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
256       ));
257 
258       if (bufso->enabled_mask) {
259          OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
260             CP_LOAD_STATE6_0(
261                   .dst_off     = IR3_BINDLESS_SSBO_OFFSET,
262                   .state_type  = ST6_IBO,
263                   .state_src   = SS6_BINDLESS,
264                   .state_block = SB6_CS_SHADER,
265                   .num_unit    = util_last_bit(bufso->enabled_mask),
266             ),
267             CP_LOAD_STATE6_EXT_SRC_ADDR(
268                   /* This isn't actually an address: */
269                   .qword = (idx << 28) |
270                      IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
271             ),
272          );
273       }
274 
275       if (imgso->enabled_mask) {
276          OUT_PKT(ring, CP_LOAD_STATE6_FRAG,
277             CP_LOAD_STATE6_0(
278                   .dst_off     = IR3_BINDLESS_IMAGE_OFFSET,
279                   .state_type  = ST6_IBO,
280                   .state_src   = SS6_BINDLESS,
281                   .state_block = SB6_CS_SHADER,
282                   .num_unit    = util_last_bit(imgso->enabled_mask),
283             ),
284             CP_LOAD_STATE6_EXT_SRC_ADDR(
285                   /* This isn't actually an address: */
286                   .qword = (idx << 28) |
287                      IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
288             ),
289          );
290       }
291    } else {
292       OUT_REG(ring,
293          HLSQ_INVALIDATE_CMD(
294             CHIP,
295             .gfx_bindless = CHIP == A6XX ? 0x1f : 0xff,
296          )
297       );
298       OUT_REG(ring, SP_BINDLESS_BASE_DESCRIPTOR(CHIP,
299             idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
300       ));
301       if (CHIP == A6XX) {
302          OUT_REG(ring, A6XX_HLSQ_BINDLESS_BASE_DESCRIPTOR(
303                idx, .desc_size = BINDLESS_DESCRIPTOR_64B, .bo = set->bo,
304          ));
305       }
306 
307       if (bufso->enabled_mask) {
308          OUT_PKT(ring, CP_LOAD_STATE6,
309             CP_LOAD_STATE6_0(
310                   .dst_off     = IR3_BINDLESS_SSBO_OFFSET,
311                   .state_type  = ST6_SHADER,
312                   .state_src   = SS6_BINDLESS,
313                   .state_block = SB6_IBO,
314                   .num_unit    = util_last_bit(bufso->enabled_mask),
315             ),
316             CP_LOAD_STATE6_EXT_SRC_ADDR(
317                   /* This isn't actually an address: */
318                   .qword = (idx << 28) |
319                      IR3_BINDLESS_SSBO_OFFSET * FDL6_TEX_CONST_DWORDS,
320             ),
321          );
322       }
323 
324       if (imgso->enabled_mask) {
325          OUT_PKT(ring, CP_LOAD_STATE6,
326             CP_LOAD_STATE6_0(
327                   .dst_off     = IR3_BINDLESS_IMAGE_OFFSET,
328                   .state_type  = ST6_SHADER,
329                   .state_src   = SS6_BINDLESS,
330                   .state_block = SB6_IBO,
331                   .num_unit    = util_last_bit(imgso->enabled_mask),
332             ),
333             CP_LOAD_STATE6_EXT_SRC_ADDR(
334                   /* This isn't actually an address: */
335                   .qword = (idx << 28) |
336                      IR3_BINDLESS_IMAGE_OFFSET * FDL6_TEX_CONST_DWORDS,
337             ),
338          );
339       }
340    }
341 
342    return ring;
343 }
344 FD_GENX(fd6_build_bindless_state);
345 
346 static void
fd6_set_shader_buffers(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,const struct pipe_shader_buffer * buffers,unsigned writable_bitmask)347 fd6_set_shader_buffers(struct pipe_context *pctx, enum pipe_shader_type shader,
348                        unsigned start, unsigned count,
349                        const struct pipe_shader_buffer *buffers,
350                        unsigned writable_bitmask)
351    in_dt
352 {
353    struct fd_context *ctx = fd_context(pctx);
354    struct fd_shaderbuf_stateobj *so = &ctx->shaderbuf[shader];
355    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
356 
357    fd_set_shader_buffers(pctx, shader, start, count, buffers, writable_bitmask);
358 
359    for (unsigned i = 0; i < count; i++) {
360       unsigned n = i + start;
361       unsigned slot = n + IR3_BINDLESS_SSBO_OFFSET;
362       struct pipe_shader_buffer *buf = &so->sb[n];
363 
364       /* invalidate descriptor: */
365       set->seqno[slot] = 0;
366 
367       if (!buf->buffer) {
368          clear_descriptor(set, slot);
369          continue;
370       }
371 
372       /* update descriptor: */
373       validate_buffer_descriptor(ctx, set, slot, buf);
374    }
375 }
376 
377 static void
fd6_set_shader_images(struct pipe_context * pctx,enum pipe_shader_type shader,unsigned start,unsigned count,unsigned unbind_num_trailing_slots,const struct pipe_image_view * images)378 fd6_set_shader_images(struct pipe_context *pctx, enum pipe_shader_type shader,
379                       unsigned start, unsigned count,
380                       unsigned unbind_num_trailing_slots,
381                       const struct pipe_image_view *images)
382    in_dt
383 {
384    struct fd_context *ctx = fd_context(pctx);
385    struct fd_shaderimg_stateobj *so = &ctx->shaderimg[shader];
386    struct fd6_descriptor_set *set = descriptor_set(ctx, shader);
387 
388    fd_set_shader_images(pctx, shader, start, count, unbind_num_trailing_slots,
389                         images);
390 
391    for (unsigned i = 0; i < count; i++) {
392       unsigned n = i + start;
393       unsigned slot = n + IR3_BINDLESS_IMAGE_OFFSET;
394       struct pipe_image_view *buf = &so->si[n];
395 
396       /* invalidate descriptor: */
397       set->seqno[slot] = 0;
398 
399       if (!buf->resource) {
400          clear_descriptor(set, slot);
401          continue;
402       }
403 
404       struct fd_resource *rsc = fd_resource(buf->resource);
405 
406       if (buf->shader_access & (PIPE_IMAGE_ACCESS_COHERENT |
407                                 PIPE_IMAGE_ACCESS_VOLATILE)) {
408          /* UBWC compression cannot be used with coherent/volatile access
409           * due to the extra caching (CCU) involved:
410           */
411          if (rsc->layout.ubwc) {
412             bool linear =
413                   fd6_check_valid_format(rsc, buf->format) == DEMOTE_TO_LINEAR;
414 
415             perf_debug_ctx(ctx,
416                            "%" PRSC_FMT ": demoted to %suncompressed due to coherent/volatile use as %s",
417                            PRSC_ARGS(&rsc->b.b), linear ? "linear+" : "",
418                            util_format_short_name(buf->format));
419 
420             fd_resource_uncompress(ctx, rsc, linear);
421          }
422       } else {
423          fd6_validate_format(ctx, rsc, buf->format);
424       }
425 
426       /* update descriptor: */
427       validate_image_descriptor(ctx, set, slot, buf);
428    }
429 
430    for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
431       unsigned slot = i + start + count + IR3_BINDLESS_IMAGE_OFFSET;
432 
433       set->seqno[slot] = 0;
434       clear_descriptor(set, slot);
435    }
436 }
437 
438 void
fd6_image_init(struct pipe_context * pctx)439 fd6_image_init(struct pipe_context *pctx)
440 {
441    pctx->set_shader_buffers = fd6_set_shader_buffers;
442    pctx->set_shader_images = fd6_set_shader_images;
443 }
444