1 /*
2 * Copyright (C) 2023 Amazon.com, Inc. or its affiliates
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "pan_afbc_cso.h"
25 #include "nir/pipe_nir.h"
26 #include "nir_builder.h"
27 #include "pan_context.h"
28 #include "pan_resource.h"
29 #include "pan_screen.h"
30
31 #define panfrost_afbc_add_info_ubo(name, b) \
32 nir_variable *info_ubo = nir_variable_create( \
33 b.shader, nir_var_mem_ubo, \
34 glsl_array_type(glsl_uint_type(), \
35 sizeof(struct panfrost_afbc_##name##_info) / 4, 0), \
36 "info_ubo"); \
37 info_ubo->data.driver_location = 0;
38
39 #define panfrost_afbc_get_info_field(name, b, field) \
40 nir_load_ubo( \
41 (b), 1, sizeof(((struct panfrost_afbc_##name##_info *)0)->field) * 8, \
42 nir_imm_int(b, 0), \
43 nir_imm_int(b, offsetof(struct panfrost_afbc_##name##_info, field)), \
44 .align_mul = 4, .range = ~0)
45
46 static nir_def *
read_afbc_header(nir_builder * b,nir_def * buf,nir_def * idx)47 read_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx)
48 {
49 nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE);
50 return nir_load_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16,
51 AFBC_HEADER_BYTES_PER_TILE / 4, 32);
52 }
53
54 static void
write_afbc_header(nir_builder * b,nir_def * buf,nir_def * idx,nir_def * hdr)55 write_afbc_header(nir_builder *b, nir_def *buf, nir_def *idx, nir_def *hdr)
56 {
57 nir_def *offset = nir_imul_imm(b, idx, AFBC_HEADER_BYTES_PER_TILE);
58 nir_store_global(b, nir_iadd(b, buf, nir_u2u64(b, offset)), 16, hdr, 0xF);
59 }
60
61 static nir_def *
get_morton_index(nir_builder * b,nir_def * idx,nir_def * src_stride,nir_def * dst_stride)62 get_morton_index(nir_builder *b, nir_def *idx, nir_def *src_stride,
63 nir_def *dst_stride)
64 {
65 nir_def *x = nir_umod(b, idx, dst_stride);
66 nir_def *y = nir_udiv(b, idx, dst_stride);
67
68 nir_def *offset = nir_imul(b, nir_iand_imm(b, y, ~0x7), src_stride);
69 offset = nir_iadd(b, offset, nir_ishl_imm(b, nir_ushr_imm(b, x, 3), 6));
70
71 x = nir_iand_imm(b, x, 0x7);
72 x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 2)), 0x13);
73 x = nir_iand_imm(b, nir_ior(b, x, nir_ishl_imm(b, x, 1)), 0x15);
74 y = nir_iand_imm(b, y, 0x7);
75 y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 2)), 0x13);
76 y = nir_iand_imm(b, nir_ior(b, y, nir_ishl_imm(b, y, 1)), 0x15);
77 nir_def *tile_idx = nir_ior(b, x, nir_ishl_imm(b, y, 1));
78
79 return nir_iadd(b, offset, tile_idx);
80 }
81
82 static nir_def *
get_superblock_size(nir_builder * b,unsigned arch,nir_def * hdr,nir_def * uncompressed_size)83 get_superblock_size(nir_builder *b, unsigned arch, nir_def *hdr,
84 nir_def *uncompressed_size)
85 {
86 nir_def *size = nir_imm_int(b, 0);
87
88 unsigned body_base_ptr_len = 32;
89 unsigned nr_subblocks = 16;
90 unsigned sz_len = 6; /* bits */
91 nir_def *words[4];
92 nir_def *mask = nir_imm_int(b, (1 << sz_len) - 1);
93 nir_def *is_solid_color = nir_imm_bool(b, false);
94
95 for (int i = 0; i < 4; i++)
96 words[i] = nir_channel(b, hdr, i);
97
98 /* Sum up all of the subblock sizes */
99 for (int i = 0; i < nr_subblocks; i++) {
100 nir_def *subblock_size;
101 unsigned bitoffset = body_base_ptr_len + (i * sz_len);
102 unsigned start = bitoffset / 32;
103 unsigned end = (bitoffset + (sz_len - 1)) / 32;
104 unsigned offset = bitoffset % 32;
105
106 /* Handle differently if the size field is split between two words
107 * of the header */
108 if (start != end) {
109 subblock_size = nir_ior(b, nir_ushr_imm(b, words[start], offset),
110 nir_ishl_imm(b, words[end], 32 - offset));
111 subblock_size = nir_iand(b, subblock_size, mask);
112 } else {
113 subblock_size =
114 nir_ubitfield_extract_imm(b, words[start], offset, sz_len);
115 }
116 subblock_size = nir_bcsel(b, nir_ieq_imm(b, subblock_size, 1),
117 uncompressed_size, subblock_size);
118 size = nir_iadd(b, size, subblock_size);
119
120 /* When the first subblock size is set to zero, the whole superblock is
121 * filled with a solid color specified in the header */
122 if (arch >= 7 && i == 0)
123 is_solid_color = nir_ieq_imm(b, size, 0);
124 }
125
126 return (arch >= 7)
127 ? nir_bcsel(b, is_solid_color, nir_imm_zero(b, 1, 32), size)
128 : size;
129 }
130
131 static nir_def *
get_packed_offset(nir_builder * b,nir_def * metadata,nir_def * idx,nir_def ** out_size)132 get_packed_offset(nir_builder *b, nir_def *metadata, nir_def *idx,
133 nir_def **out_size)
134 {
135 nir_def *metadata_offset =
136 nir_u2u64(b, nir_imul_imm(b, idx, sizeof(struct pan_afbc_block_info)));
137 nir_def *range_ptr = nir_iadd(b, metadata, metadata_offset);
138 nir_def *entry = nir_load_global(b, range_ptr, 4,
139 sizeof(struct pan_afbc_block_info) / 4, 32);
140 nir_def *offset =
141 nir_channel(b, entry, offsetof(struct pan_afbc_block_info, offset) / 4);
142
143 if (out_size)
144 *out_size =
145 nir_channel(b, entry, offsetof(struct pan_afbc_block_info, size) / 4);
146
147 return nir_u2u64(b, offset);
148 }
149
150 #define MAX_LINE_SIZE 16
151
152 static void
copy_superblock(nir_builder * b,nir_def * dst,nir_def * dst_idx,nir_def * hdr_sz,nir_def * src,nir_def * src_idx,nir_def * metadata,nir_def * meta_idx,unsigned align)153 copy_superblock(nir_builder *b, nir_def *dst, nir_def *dst_idx, nir_def *hdr_sz,
154 nir_def *src, nir_def *src_idx, nir_def *metadata,
155 nir_def *meta_idx, unsigned align)
156 {
157 nir_def *hdr = read_afbc_header(b, src, src_idx);
158 nir_def *src_body_base_ptr = nir_u2u64(b, nir_channel(b, hdr, 0));
159 nir_def *src_bodyptr = nir_iadd(b, src, src_body_base_ptr);
160
161 nir_def *size;
162 nir_def *dst_offset = get_packed_offset(b, metadata, meta_idx, &size);
163 nir_def *dst_body_base_ptr = nir_iadd(b, dst_offset, hdr_sz);
164 nir_def *dst_bodyptr = nir_iadd(b, dst, dst_body_base_ptr);
165
166 /* Replace the `base_body_ptr` field if not zero (solid color) */
167 nir_def *hdr2 =
168 nir_vector_insert_imm(b, hdr, nir_u2u32(b, dst_body_base_ptr), 0);
169 hdr = nir_bcsel(b, nir_ieq_imm(b, src_body_base_ptr, 0), hdr, hdr2);
170 write_afbc_header(b, dst, dst_idx, hdr);
171
172 nir_variable *offset_var =
173 nir_local_variable_create(b->impl, glsl_uint_type(), "offset");
174 nir_store_var(b, offset_var, nir_imm_int(b, 0), 1);
175 nir_loop *loop = nir_push_loop(b);
176 {
177 nir_def *offset = nir_load_var(b, offset_var);
178 nir_if *loop_check = nir_push_if(b, nir_uge(b, offset, size));
179 nir_jump(b, nir_jump_break);
180 nir_push_else(b, loop_check);
181 unsigned line_sz = align <= MAX_LINE_SIZE ? align : MAX_LINE_SIZE;
182 for (unsigned i = 0; i < align / line_sz; ++i) {
183 nir_def *src_line = nir_iadd(b, src_bodyptr, nir_u2u64(b, offset));
184 nir_def *dst_line = nir_iadd(b, dst_bodyptr, nir_u2u64(b, offset));
185 nir_store_global(
186 b, dst_line, line_sz,
187 nir_load_global(b, src_line, line_sz, line_sz / 4, 32), ~0);
188 offset = nir_iadd_imm(b, offset, line_sz);
189 }
190 nir_store_var(b, offset_var, offset, 0x1);
191 nir_pop_if(b, loop_check);
192 }
193 nir_pop_loop(b, loop);
194 }
195
196 #define panfrost_afbc_size_get_info_field(b, field) \
197 panfrost_afbc_get_info_field(size, b, field)
198
199 static nir_shader *
panfrost_afbc_create_size_shader(struct panfrost_screen * screen,unsigned bpp,unsigned align)200 panfrost_afbc_create_size_shader(struct panfrost_screen *screen, unsigned bpp,
201 unsigned align)
202 {
203 struct panfrost_device *dev = pan_device(&screen->base);
204
205 nir_builder b = nir_builder_init_simple_shader(
206 MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(),
207 "panfrost_afbc_size(bpp=%d)", bpp);
208
209 panfrost_afbc_add_info_ubo(size, b);
210
211 nir_def *coord = nir_load_global_invocation_id(&b, 32);
212 nir_def *block_idx = nir_channel(&b, coord, 0);
213 nir_def *src = panfrost_afbc_size_get_info_field(&b, src);
214 nir_def *metadata = panfrost_afbc_size_get_info_field(&b, metadata);
215 nir_def *uncompressed_size = nir_imm_int(&b, 4 * 4 * bpp / 8); /* bytes */
216
217 nir_def *hdr = read_afbc_header(&b, src, block_idx);
218 nir_def *size = get_superblock_size(&b, dev->arch, hdr, uncompressed_size);
219 size = nir_iand(&b, nir_iadd(&b, size, nir_imm_int(&b, align - 1)),
220 nir_inot(&b, nir_imm_int(&b, align - 1)));
221
222 nir_def *offset = nir_u2u64(
223 &b,
224 nir_iadd(&b,
225 nir_imul_imm(&b, block_idx, sizeof(struct pan_afbc_block_info)),
226 nir_imm_int(&b, offsetof(struct pan_afbc_block_info, size))));
227 nir_store_global(&b, nir_iadd(&b, metadata, offset), 4, size, 0x1);
228
229 return b.shader;
230 }
231
232 #define panfrost_afbc_pack_get_info_field(b, field) \
233 panfrost_afbc_get_info_field(pack, b, field)
234
235 static nir_shader *
panfrost_afbc_create_pack_shader(struct panfrost_screen * screen,unsigned align,bool tiled)236 panfrost_afbc_create_pack_shader(struct panfrost_screen *screen, unsigned align,
237 bool tiled)
238 {
239 nir_builder b = nir_builder_init_simple_shader(
240 MESA_SHADER_COMPUTE, screen->vtbl.get_compiler_options(),
241 "panfrost_afbc_pack");
242
243 panfrost_afbc_add_info_ubo(pack, b);
244
245 nir_def *coord = nir_load_global_invocation_id(&b, 32);
246 nir_def *src_stride = panfrost_afbc_pack_get_info_field(&b, src_stride);
247 nir_def *dst_stride = panfrost_afbc_pack_get_info_field(&b, dst_stride);
248 nir_def *dst_idx = nir_channel(&b, coord, 0);
249 nir_def *src_idx =
250 tiled ? get_morton_index(&b, dst_idx, src_stride, dst_stride) : dst_idx;
251 nir_def *src = panfrost_afbc_pack_get_info_field(&b, src);
252 nir_def *dst = panfrost_afbc_pack_get_info_field(&b, dst);
253 nir_def *header_size =
254 nir_u2u64(&b, panfrost_afbc_pack_get_info_field(&b, header_size));
255 nir_def *metadata = panfrost_afbc_pack_get_info_field(&b, metadata);
256
257 copy_superblock(&b, dst, dst_idx, header_size, src, src_idx, metadata,
258 src_idx, align);
259
260 return b.shader;
261 }
262
263 struct pan_afbc_shader_data *
panfrost_afbc_get_shaders(struct panfrost_context * ctx,struct panfrost_resource * rsrc,unsigned align)264 panfrost_afbc_get_shaders(struct panfrost_context *ctx,
265 struct panfrost_resource *rsrc, unsigned align)
266 {
267 struct pipe_context *pctx = &ctx->base;
268 struct panfrost_screen *screen = pan_screen(ctx->base.screen);
269 bool tiled = rsrc->image.layout.modifier & AFBC_FORMAT_MOD_TILED;
270 struct pan_afbc_shader_key key = {
271 .bpp = util_format_get_blocksizebits(rsrc->base.format),
272 .align = align,
273 .tiled = tiled,
274 };
275
276 pthread_mutex_lock(&ctx->afbc_shaders.lock);
277 struct hash_entry *he =
278 _mesa_hash_table_search(ctx->afbc_shaders.shaders, &key);
279 struct pan_afbc_shader_data *shader = he ? he->data : NULL;
280 pthread_mutex_unlock(&ctx->afbc_shaders.lock);
281
282 if (shader)
283 return shader;
284
285 shader = rzalloc(ctx->afbc_shaders.shaders, struct pan_afbc_shader_data);
286 shader->key = key;
287 _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader);
288
289 #define COMPILE_SHADER(name, ...) \
290 { \
291 nir_shader *nir = \
292 panfrost_afbc_create_##name##_shader(screen, __VA_ARGS__); \
293 nir->info.num_ubos = 1; \
294 shader->name##_cso = pipe_shader_from_nir(pctx, nir); \
295 }
296
297 COMPILE_SHADER(size, key.bpp, key.align);
298 COMPILE_SHADER(pack, key.align, key.tiled);
299
300 #undef COMPILE_SHADER
301
302 pthread_mutex_lock(&ctx->afbc_shaders.lock);
303 _mesa_hash_table_insert(ctx->afbc_shaders.shaders, &shader->key, shader);
304 pthread_mutex_unlock(&ctx->afbc_shaders.lock);
305
306 return shader;
307 }
308
309 DERIVE_HASH_TABLE(pan_afbc_shader_key);
310
311 void
panfrost_afbc_context_init(struct panfrost_context * ctx)312 panfrost_afbc_context_init(struct panfrost_context *ctx)
313 {
314 ctx->afbc_shaders.shaders = pan_afbc_shader_key_table_create(NULL);
315 pthread_mutex_init(&ctx->afbc_shaders.lock, NULL);
316 }
317
318 void
panfrost_afbc_context_destroy(struct panfrost_context * ctx)319 panfrost_afbc_context_destroy(struct panfrost_context *ctx)
320 {
321 _mesa_hash_table_destroy(ctx->afbc_shaders.shaders, NULL);
322 pthread_mutex_destroy(&ctx->afbc_shaders.lock);
323 }
324