xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/panfrost/pan_afbc_cso.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2023 Amazon.com, Inc. or its affiliates
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "pan_afbc_cso.h"
25 #include "nir/pipe_nir.h"
26 #include "nir_builder.h"
27 #include "pan_context.h"
28 #include "pan_resource.h"
29 #include "pan_screen.h"
30 
31 #define panfrost_afbc_add_info_ubo(name, b)                                    \
32    nir_variable *info_ubo = nir_variable_create(                               \
33       b.shader, nir_var_mem_ubo,                                               \
34       glsl_array_type(glsl_uint_type(),                                        \
35                       sizeof(struct panfrost_afbc_##name##_info) / 4, 0),      \
36       "info_ubo");                                                             \
37    info_ubo->data.driver_location = 0;
38 
39 #define panfrost_afbc_get_info_field(name, b, field)                           \
40    nir_load_ubo(                                                               \
41       (b), 1, sizeof(((struct panfrost_afbc_##name##_info *)0)->field) * 8,    \
42       nir_imm_int(b, 0),                                                       \
43       nir_imm_int(b, offsetof(struct panfrost_afbc_##name##_info, field)),     \
44       .align_mul = 4, .range = ~0)
45 
46 static nir_def *
read_afbc_header(nir_builder * b,nir_def * buf,nir_def * idx)47 read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx)
48 {
49    nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE);
50    return nir_load_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16,
51                           AFBC_HEADER_BYTES_PER_TILE / 4, 32);
52 }
53 
54 static void
write_afbc_header(nir_builder * b,nir_def * buf,nir_def * idx,nir_def * hdr)55 write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr)
56 {
57    nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE);
58    nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF);
59 }
60 
61 static nir_def *
get_morton_index(nir_builder * b,nir_def * idx,nir_def * src_stride,nir_def * dst_stride)62 get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride,
63                  nir_def *dst_stride)
64 {
65    nir_def *x = nir_umod(b, idx, dst_stride);
66    nir_def *y = nir_udiv(b, idx, dst_stride);
67 
68    nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride);
69    offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6));
70 
71    x = nir_iand_imm(b, x, 0x7);
72    x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13);
73    x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15);
74    y = nir_iand_imm(b, y, 0x7);
75    y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13);
76    y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15);
77    nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1));
78 
79    return nir_iadd(b, offset, tile_idx);
80 }
81 
82 static nir_def *
get_superblock_size(nir_builder * b,unsigned arch,nir_def * hdr,nir_def * uncompressed_size)83 get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr,
84                     nir_def *uncompressed_size)
85 {
86    nir_def *size = nir_imm_int(b, 0);
87 
88    unsigned body_base_ptr_len = 32;
89    unsigned nr_subblocks = 16;
90    unsigned sz_len = 6; /* bits */
91    nir_def *words[4];
92    nir_def *mask = nir_imm_int(b, (1 << sz_len) - 1);
93    nir_def *is_solid_color = nir_imm_bool(b, false);
94 
95    for (int i = 0; i < 4; i++)
96       words[i] = nir_channel(b, hdr, i);
97 
98    /* Sum up all of the subblock sizes */
99    for (int i = 0; i < nr_subblocks; i++) {
100       nir_def *subblock_size;
101       unsigned bitoffset = body_base_ptr_len + (i * sz_len);
102       unsigned start = bitoffset / 32;
103       unsigned end = (bitoffset + (sz_len - 1)) / 32;
104       unsigned offset = bitoffset % 32;
105 
106       /* Handle differently if the size field is split between two words
107        * of the header */
108       if (start != end) {
109          subblock_size = nir_ior(b, nir_ushr_imm(b, words[start], offset),
110                                  nir_ishl_imm(b, words[end], 32 - offset));
111          subblock_size = nir_iand(b, subblock_size, mask);
112       } else {
113          subblock_size =
114             nir_ubitfield_extract_imm(b, words[start], offset, sz_len);
115       }
116       subblock_size = nir_bcsel(b, nir_ieq_imm(b, subblock_size, 1),
117                                 uncompressed_size, subblock_size);
118       size = nir_iadd(b, size, subblock_size);
119 
120       /* When the first subblock size is set to zero, the whole superblock is
121        * filled with a solid color specified in the header */
122       if (arch >= 7 && i == 0)
123          is_solid_color = nir_ieq_imm(b, size, 0);
124    }
125 
126    return (arch >= 7)
127              ? nir_bcsel(b, is_solid_color, nir_imm_zero(b, 1, 32), size)
128              : size;
129 }
130 
131 static nir_def *
get_packed_offset(nir_builder * b,nir_def * metadata,nir_def * idx,nir_def ** out_size)132 get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx,
133                   nir_def **out_size)
134 {
135    nir_def *metadata_offset =
136       nir_u2u64(b, nir_imul_imm(b, idx, sizeof(struct pan_afbc_block_info)));
137    nir_def *range_ptr = nir_iadd(b, metadata, metadata_offset);
138    nir_def *entry = nir_load_global(b, range_ptr, 4,
139                                     sizeof(struct pan_afbc_block_info) / 4, 32);
140    nir_def *offset =
141       nir_channel(b, entry, offsetof(struct pan_afbc_block_info, offset) / 4);
142 
143    if (out_size)
144       *out_size =
145          nir_channel(b, entry, offsetof(struct pan_afbc_block_info, size) / 4);
146 
147    return nir_u2u64(b, offset);
148 }
149 
150 #define MAX_LINE_SIZE 16
151 
152 static void
copy_superblock(nir_builder * b,nir_def * dst,nir_def * dst_idx,nir_def * hdr_sz,nir_def * src,nir_def * src_idx,nir_def * metadata,nir_def * meta_idx,unsigned align)153 copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
154                 nir_def *src, nir_def *src_idx, nir_def *metadata,
155                 nir_def *meta_idx, unsigned align)
156 {
157    nir_def *hdr = read_afbc_header(b, src, src_idx);
158    nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0));
159    nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr);
160 
161    nir_def *size;
162    nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size);
163    nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz);
164    nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr);
165 
166    /* Replace the `base_body_ptr` field if not zero (solid color) */
167    nir_def *hdr2 =
168       nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0);
169    hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2);
170    write_afbc_header(b, dst, dst_idx, hdr);
171 
172    nir_variable *offset_var =
173       nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
174    nir_store_var(b, offset_var, nir_imm_int(b, 0), 1);
175    nir_loop *loop = nir_push_loop(b);
176    {
177       nir_def *offset = nir_load_var(b, offset_var);
178       nir_if *loop_check = nir_push_if(b, nir_uge(b, offset, size));
179       nir_jump(b, nir_jump_break);
180       nir_push_else(b, loop_check);
181       unsigned line_sz = align <= MAX_LINE_SIZE ? align : MAX_LINE_SIZE;
182       for (unsigned i = 0; i < align / line_sz; ++i) {
183          nir_def *src_line = nir_iadd(b, src_bodyptr, nir_u2u64(b, offset));
184          nir_def *dst_line = nir_iadd(b, dst_bodyptr, nir_u2u64(b, offset));
185          nir_store_global(
186             b, dst_line, line_sz,
187             nir_load_global(b, src_line, line_sz, line_sz / 4, 32), ~0);
188          offset = nir_iadd_imm(b, offset, line_sz);
189       }
190       nir_store_var(b, offset_var, offset, 0x1);
191       nir_pop_if(b, loop_check);
192    }
193    nir_pop_loop(b, loop);
194 }
195 
196 #define panfrost_afbc_size_get_info_field(b, field)                            \
197    panfrost_afbc_get_info_field(size, b, field)
198 
199 static nir_shader *
panfrost_afbc_create_size_shader(struct panfrost_screen * screen,unsigned bpp,unsigned align)200 panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp,
201                                  unsigned align)
202 {
203    struct panfrost_device *dev = pan_device(&screen->base);
204 
205    nir_builder b = nir_builder_init_simple_shader(
206       MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(),
207       "panfrost_afbc_size(bpp=%d)", bpp);
208 
209    panfrost_afbc_add_info_ubo(size, b);
210 
211    nir_def *coord = nir_load_global_invocation_id(&b, 32);
212    nir_def *block_idx = nir_channel(&b, coord, 0);
213    nir_def *src = panfrost_afbc_size_get_info_field(&b, src);
214    nir_def *metadata = panfrost_afbc_size_get_info_field(&b, metadata);
215    nir_def *uncompressed_size = nir_imm_int(&b, 4 * 4 * bpp / 8); /* bytes */
216 
217    nir_def *hdr = read_afbc_header(&b, src, block_idx);
218    nir_def *size = get_superblock_size(&b, dev->arch, hdr, uncompressed_size);
219    size = nir_iand(&b, nir_iadd(&b, size, nir_imm_int(&b, align - 1)),
220                    nir_inot(&b, nir_imm_int(&b, align - 1)));
221 
222    nir_def *offset = nir_u2u64(
223       &b,
224       nir_iadd(&b,
225                nir_imul_imm(&b, block_idx, sizeof(struct pan_afbc_block_info)),
226                nir_imm_int(&b, offsetof(struct pan_afbc_block_info, size))));
227    nir_store_global(&b, nir_iadd(&b, metadata, offset), 4, size, 0x1);
228 
229    return b.shader;
230 }
231 
232 #define panfrost_afbc_pack_get_info_field(b, field)                            \
233    panfrost_afbc_get_info_field(pack, b, field)
234 
235 static nir_shader *
panfrost_afbc_create_pack_shader(struct panfrost_screen * screen,unsigned align,bool tiled)236 panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align,
237                                  bool tiled)
238 {
239    nir_builder b = nir_builder_init_simple_shader(
240       MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(),
241       "panfrost_afbc_pack");
242 
243    panfrost_afbc_add_info_ubo(pack, b);
244 
245    nir_def *coord = nir_load_global_invocation_id(&b, 32);
246    nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride);
247    nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride);
248    nir_def *dst_idx = nir_channel(&b, coord, 0);
249    nir_def *src_idx =
250       tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx;
251    nir_def *src = panfrost_afbc_pack_get_info_field(&b, src);
252    nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst);
253    nir_def *header_size =
254       nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size));
255    nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata);
256 
257    copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata,
258                    src_idx, align);
259 
260    return b.shader;
261 }
262 
263 struct pan_afbc_shader_data *
panfrost_afbc_get_shaders(struct panfrost_context * ctx,struct panfrost_resource * rsrc,unsigned align)264 panfrost_afbc_get_shaders(struct panfrost_context *ctx,
265                           struct panfrost_resource *rsrc, unsigned align)
266 {
267    struct pipe_context *pctx = &ctx->base;
268    struct panfrost_screen *screen = pan_screen(ctx->base.screen);
269    bool tiled = rsrc->image.layout.modifier & AFBC_FORMAT_MOD_TILED;
270    struct pan_afbc_shader_key key = {
271       .bpp = util_format_get_blocksizebits(rsrc->base.format),
272       .align = align,
273       .tiled = tiled,
274    };
275 
276    pthread_mutex_lock(&ctx->afbc_shaders.lock);
277    struct hash_entry *he =
278       _mesa_hash_table_search(ctx->afbc_shaders.shaders, &key);
279    struct pan_afbc_shader_data *shader = he ? he->data : NULL;
280    pthread_mutex_unlock(&ctx->afbc_shaders.lock);
281 
282    if (shader)
283       return shader;
284 
285    shader = rzalloc(ctx->afbc_shaders.shaders, struct pan_afbc_shader_data);
286    shader->key = key;
287    _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader);
288 
289 #define COMPILE_SHADER(name, ...)                                              \
290    {                                                                           \
291       nir_shader *nir =                                                        \
292          panfrost_afbc_create_##name##_shader(screen, __VA_ARGS__);            \
293       nir->info.num_ubos = 1;                                                  \
294       shader->name##_cso = pipe_shader_from_nir(pctx, nir);                    \
295    }
296 
297    COMPILE_SHADER(size, key.bpp, key.align);
298    COMPILE_SHADER(pack, key.align, key.tiled);
299 
300 #undef COMPILE_SHADER
301 
302    pthread_mutex_lock(&ctx->afbc_shaders.lock);
303    _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader);
304    pthread_mutex_unlock(&ctx->afbc_shaders.lock);
305 
306    return shader;
307 }
308 
309 DERIVE_HASH_TABLE(pan_afbc_shader_key);
310 
311 void
panfrost_afbc_context_init(struct panfrost_context * ctx)312 panfrost_afbc_context_init(struct panfrost_context *ctx)
313 {
314    ctx->afbc_shaders.shaders = pan_afbc_shader_key_table_create(NULL);
315    pthread_mutex_init(&ctx->afbc_shaders.lock, NULL);
316 }
317 
318 void
panfrost_afbc_context_destroy(struct panfrost_context * ctx)319 panfrost_afbc_context_destroy(struct panfrost_context *ctx)
320 {
321    _mesa_hash_table_destroy(ctx->afbc_shaders.shaders, NULL);
322    pthread_mutex_destroy(&ctx->afbc_shaders.lock);
323 }
324