1 /*
2 * Copyright © 2024 Collabora Ltd.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "util/u_dynarray.h"
7
8 #include "nir_builder.h"
9
10 #include "vk_blend.h"
11 #include "vk_format.h"
12 #include "vk_graphics_state.h"
13 #include "vk_log.h"
14
15 #include "pan_shader.h"
16
17 #include "panvk_blend.h"
18 #include "panvk_device.h"
19 #include "panvk_shader.h"
20
21 DERIVE_HASH_TABLE(pan_blend_shader_key);
22
23 VkResult
panvk_per_arch(blend_shader_cache_init)24 panvk_per_arch(blend_shader_cache_init)(struct panvk_device *dev)
25 {
26 struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
27
28 simple_mtx_init(&cache->lock, mtx_plain);
29
30 struct panvk_pool_properties bin_pool_props = {
31 .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
32 .slab_size = 16 * 1024,
33 .label = "blend shaders",
34 .owns_bos = true,
35 .prealloc = false,
36 .needs_locking = false,
37 };
38 panvk_pool_init(&cache->bin_pool, dev, NULL, &bin_pool_props);
39
40 cache->ht = pan_blend_shader_key_table_create(NULL);
41 if (!cache->ht) {
42 panvk_pool_cleanup(&cache->bin_pool);
43 simple_mtx_destroy(&cache->lock);
44 return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
45 "couldn't create blend shader hash table");
46 }
47
48 return VK_SUCCESS;
49 }
50
51 void
panvk_per_arch(blend_shader_cache_cleanup)52 panvk_per_arch(blend_shader_cache_cleanup)(struct panvk_device *dev)
53 {
54 struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
55
56 hash_table_foreach_remove(cache->ht, he)
57 vk_free(&dev->vk.alloc, he->data);
58
59 _mesa_hash_table_destroy(cache->ht, NULL);
60 panvk_pool_cleanup(&cache->bin_pool);
61 simple_mtx_destroy(&cache->lock);
62 }
63
64 static bool
lower_load_blend_const(nir_builder * b,nir_instr * instr,UNUSED void * data)65 lower_load_blend_const(nir_builder *b, nir_instr *instr, UNUSED void *data)
66 {
67 if (instr->type != nir_instr_type_intrinsic)
68 return false;
69
70 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
71
72 if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
73 return false;
74
75 b->cursor = nir_before_instr(instr);
76
77 unsigned offset = offsetof(struct panvk_graphics_sysvals, blend.constants);
78 nir_def *blend_consts = nir_load_push_constant(
79 b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0),
80 /* Push constants are placed first, and then come the sysvals. */
81 .base = offset + 256,
82 .range = intr->def.num_components * intr->def.bit_size / 8);
83
84 nir_def_rewrite_uses(&intr->def, blend_consts);
85 return true;
86 }
87
88 static VkResult
get_blend_shader_locked(struct panvk_device * dev,const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt,mali_ptr * shader_addr)89 get_blend_shader_locked(struct panvk_device *dev,
90 const struct pan_blend_state *state,
91 nir_alu_type src0_type, nir_alu_type src1_type,
92 unsigned rt, mali_ptr *shader_addr)
93 {
94 struct panvk_physical_device *pdev =
95 to_panvk_physical_device(dev->vk.physical);
96 struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
97 struct pan_blend_shader_key key = {
98 .format = state->rts[rt].format,
99 .src0_type = src0_type,
100 .src1_type = src1_type,
101 .rt = rt,
102 .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
103 .logicop_enable = state->logicop_enable,
104 .logicop_func = state->logicop_func,
105 .nr_samples = state->rts[rt].nr_samples,
106 .equation = state->rts[rt].equation,
107 };
108
109 assert(state->logicop_enable ||
110 !pan_blend_is_opaque(state->rts[rt].equation));
111 assert(state->rts[rt].equation.color_mask != 0);
112 simple_mtx_assert_locked(&dev->blend_shader_cache.lock);
113
114 struct hash_entry *he = _mesa_hash_table_search(cache->ht, &key);
115 struct panvk_blend_shader *shader = he ? he->data : NULL;
116
117 if (shader)
118 goto out;
119
120 shader = vk_zalloc(&dev->vk.alloc, sizeof(*shader), 8,
121 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
122 if (!shader)
123 return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
124 "couldn't allocate blend shader object");
125
126 nir_shader *nir =
127 GENX(pan_blend_create_shader)(state, src0_type, src1_type, rt);
128
129 NIR_PASS_V(nir, nir_shader_instructions_pass, lower_load_blend_const,
130 nir_metadata_control_flow, NULL);
131
132 /* Compile the NIR shader */
133 struct panfrost_compile_inputs inputs = {
134 .gpu_id = pdev->kmod.props.gpu_prod_id,
135 .no_ubo_to_push = true,
136 .is_blend = true,
137 .blend =
138 {
139 .nr_samples = key.nr_samples,
140 .bifrost_blend_desc =
141 GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false),
142 },
143 };
144
145 pan_shader_preprocess(nir, inputs.gpu_id);
146
147 enum pipe_format rt_formats[8] = {0};
148 rt_formats[rt] = key.format;
149 NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), rt_formats);
150
151 struct pan_shader_info info;
152 struct util_dynarray binary;
153
154 util_dynarray_init(&binary, nir);
155 GENX(pan_shader_compile)(nir, &inputs, &binary, &info);
156
157 shader->key = key;
158 shader->binary = pan_pool_upload_aligned(&cache->bin_pool.base, binary.data,
159 binary.size, 128);
160
161 ralloc_free(nir);
162
163 _mesa_hash_table_insert(cache->ht, &shader->key, shader);
164
165 out:
166 *shader_addr = shader->binary;
167 return VK_SUCCESS;
168 }
169
170 static VkResult
get_blend_shader(struct panvk_device * dev,const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt,mali_ptr * shader_addr)171 get_blend_shader(struct panvk_device *dev, const struct pan_blend_state *state,
172 nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt,
173 mali_ptr *shader_addr)
174 {
175 struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
176 VkResult result;
177
178 simple_mtx_lock(&cache->lock);
179 result = get_blend_shader_locked(dev, state, src0_type, src1_type, rt,
180 shader_addr);
181 simple_mtx_unlock(&cache->lock);
182
183 return result;
184 }
185
186 static void
emit_blend_desc(const struct pan_shader_info * fs_info,mali_ptr fs_code,const struct pan_blend_state * state,unsigned rt_idx,mali_ptr blend_shader,uint16_t constant,struct mali_blend_packed * bd)187 emit_blend_desc(const struct pan_shader_info *fs_info, mali_ptr fs_code,
188 const struct pan_blend_state *state, unsigned rt_idx,
189 mali_ptr blend_shader, uint16_t constant,
190 struct mali_blend_packed *bd)
191 {
192 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
193
194 pan_pack(bd, BLEND, cfg) {
195 if (!state->rt_count || !rt->equation.color_mask) {
196 cfg.enable = false;
197 cfg.internal.mode = MALI_BLEND_MODE_OFF;
198 continue;
199 }
200
201 cfg.srgb = util_format_is_srgb(rt->format);
202 cfg.load_destination = pan_blend_reads_dest(rt->equation);
203 cfg.round_to_fb_precision = true;
204 cfg.constant = constant;
205
206 if (blend_shader) {
207 /* Blend and fragment shaders must be in the same 4G region. */
208 assert((blend_shader >> 32) == (fs_code >> 32));
209 /* Blend shader must be 16-byte aligned. */
210 assert((blend_shader & 15) == 0);
211 /* Fragment shader return address must be 8-byte aligned. */
212 assert((fs_code & 7) == 0);
213
214 cfg.internal.mode = MALI_BLEND_MODE_SHADER;
215 cfg.internal.shader.pc = (uint32_t)blend_shader;
216
217 #if PAN_ARCH <= 7
218 uint32_t ret_offset = fs_info->bifrost.blend[rt_idx].return_offset;
219
220 /* If ret_offset is zero, we assume the BLEND is a terminal
221 * instruction and set return_value to zero, to let the
222 * blend shader jump to address zero, which terminates the
223 * thread.
224 */
225 cfg.internal.shader.return_value =
226 ret_offset ? fs_code + ret_offset : 0;
227 #endif
228 } else {
229 bool opaque = pan_blend_is_opaque(rt->equation);
230
231 cfg.internal.mode =
232 opaque ? MALI_BLEND_MODE_OPAQUE : MALI_BLEND_MODE_FIXED_FUNCTION;
233
234 pan_blend_to_fixed_function_equation(rt->equation, &cfg.equation);
235
236 /* If we want the conversion to work properly, num_comps must be set to
237 * 4.
238 */
239 cfg.internal.fixed_function.num_comps = 4;
240 cfg.internal.fixed_function.conversion.memory_format =
241 GENX(panfrost_dithered_format_from_pipe_format)(rt->format, false);
242
243 #if PAN_ARCH >= 7
244 if (cfg.internal.mode == MALI_BLEND_MODE_FIXED_FUNCTION &&
245 (cfg.internal.fixed_function.conversion.memory_format & 0xff) ==
246 MALI_RGB_COMPONENT_ORDER_RGB1) {
247 /* fixed function does not like RGB1 as the component order */
248 /* force this field to be the RGBA. */
249 cfg.internal.fixed_function.conversion.memory_format &= ~0xff;
250 cfg.internal.fixed_function.conversion.memory_format |=
251 MALI_RGB_COMPONENT_ORDER_RGBA;
252 }
253 #endif
254
255 cfg.internal.fixed_function.rt = rt_idx;
256
257 #if PAN_ARCH <= 7
258 if (fs_info->fs.untyped_color_outputs) {
259 nir_alu_type type = fs_info->bifrost.blend[rt_idx].type;
260
261 cfg.internal.fixed_function.conversion.register_format =
262 GENX(pan_fixup_blend_type)(type, rt->format);
263 } else {
264 cfg.internal.fixed_function.conversion.register_format =
265 fs_info->bifrost.blend[rt_idx].format;
266 }
267
268 if (!opaque) {
269 cfg.internal.fixed_function.alpha_zero_nop =
270 pan_blend_alpha_zero_nop(rt->equation);
271 cfg.internal.fixed_function.alpha_one_store =
272 pan_blend_alpha_one_store(rt->equation);
273 }
274 #endif
275 }
276 }
277 }
278
279 static uint16_t
get_ff_blend_constant(const struct pan_blend_state * state,unsigned rt_idx,unsigned const_idx)280 get_ff_blend_constant(const struct pan_blend_state *state, unsigned rt_idx,
281 unsigned const_idx)
282 {
283 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
284
285 /* On Bifrost, the blend constant is expressed with a UNORM of the
286 * size of the target format. The value is then shifted such that
287 * used bits are in the MSB.
288 */
289 const struct util_format_description *format_desc =
290 util_format_description(rt->format);
291 unsigned chan_size = 0;
292 for (unsigned c = 0; c < format_desc->nr_channels; c++)
293 chan_size = MAX2(format_desc->channel[c].size, chan_size);
294 float factor = ((1 << chan_size) - 1) << (16 - chan_size);
295
296 return (uint16_t)(state->constants[const_idx] * factor);
297 }
298
299 static bool
blend_needs_shader(const struct pan_blend_state * state,unsigned rt_idx,unsigned * ff_blend_constant)300 blend_needs_shader(const struct pan_blend_state *state, unsigned rt_idx,
301 unsigned *ff_blend_constant)
302 {
303 const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
304
305 /* LogicOp requires a blend shader, unless it's a NOOP, in which case we just
306 * disable blending.
307 */
308 if (state->logicop_enable)
309 return state->logicop_func != PIPE_LOGICOP_NOOP;
310
311 /* If the output is opaque, we don't need a blend shader, no matter the
312 * format.
313 */
314 if (pan_blend_is_opaque(rt->equation))
315 return false;
316
317 /* Not all formats can be blended by fixed-function hardware */
318 if (!GENX(panfrost_blendable_format_from_pipe_format)(rt->format)->internal)
319 return true;
320
321 unsigned constant_mask = pan_blend_constant_mask(rt->equation);
322
323 /* v6 doesn't support blend constants in FF blend equations. */
324 if (constant_mask && PAN_ARCH == 6)
325 return true;
326
327 if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
328 return true;
329
330 /* v7+ only uses the constant from RT 0. If we're not RT0, all previous
331 * RTs using FF with a blend constant need to have the same constant,
332 * otherwise we need a blend shader.
333 */
334 unsigned blend_const = ~0;
335 if (constant_mask) {
336 blend_const =
337 get_ff_blend_constant(state, rt_idx, ffs(constant_mask) - 1);
338
339 if (*ff_blend_constant != ~0 && blend_const != *ff_blend_constant)
340 return true;
341 }
342
343 bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
344 if (!pan_blend_can_fixed_function(rt->equation, supports_2src))
345 return true;
346
347 /* Update the fixed function blend constant, if we use it. */
348 if (blend_const != ~0)
349 *ff_blend_constant = blend_const;
350
351 return false;
352 }
353
354 VkResult
panvk_per_arch(blend_emit_descs)355 panvk_per_arch(blend_emit_descs)(
356 struct panvk_device *dev, const struct vk_color_blend_state *cb,
357 const VkFormat *color_attachment_formats, uint8_t *color_attachment_samples,
358 const struct pan_shader_info *fs_info, mali_ptr fs_code,
359 struct mali_blend_packed *bds, struct panvk_blend_info *blend_info)
360 {
361 struct pan_blend_state bs = {
362 .logicop_enable = cb->logic_op_enable,
363 .logicop_func = vk_logic_op_to_pipe(cb->logic_op),
364 .rt_count = cb->attachment_count,
365 .constants =
366 {
367 cb->blend_constants[0],
368 cb->blend_constants[1],
369 cb->blend_constants[2],
370 cb->blend_constants[3],
371 },
372 };
373 mali_ptr blend_shaders[8] = {};
374 /* All bits set to one encodes unused fixed-function blend constant. */
375 unsigned ff_blend_constant = ~0;
376
377 memset(blend_info, 0, sizeof(*blend_info));
378 for (uint8_t i = 0; i < cb->attachment_count; i++) {
379 struct pan_blend_rt_state *rt = &bs.rts[i];
380
381 if (!(cb->color_write_enables & BITFIELD_BIT(i))) {
382 rt->equation.color_mask = 0;
383 continue;
384 }
385
386 if (bs.logicop_enable && bs.logicop_func == PIPE_LOGICOP_NOOP) {
387 rt->equation.color_mask = 0;
388 continue;
389 }
390
391 if (color_attachment_formats[i] == VK_FORMAT_UNDEFINED) {
392 rt->equation.color_mask = 0;
393 continue;
394 }
395
396 if (!cb->attachments[i].write_mask) {
397 rt->equation.color_mask = 0;
398 continue;
399 }
400
401 rt->format = vk_format_to_pipe_format(color_attachment_formats[i]);
402
403 rt->nr_samples = color_attachment_samples[i];
404 rt->equation.blend_enable = cb->attachments[i].blend_enable;
405 rt->equation.color_mask = cb->attachments[i].write_mask;
406 rt->equation.rgb_func =
407 vk_blend_op_to_pipe(cb->attachments[i].color_blend_op);
408 rt->equation.rgb_src_factor =
409 vk_blend_factor_to_pipe(cb->attachments[i].src_color_blend_factor);
410 rt->equation.rgb_dst_factor =
411 vk_blend_factor_to_pipe(cb->attachments[i].dst_color_blend_factor);
412 rt->equation.alpha_func =
413 vk_blend_op_to_pipe(cb->attachments[i].alpha_blend_op);
414 rt->equation.alpha_src_factor =
415 vk_blend_factor_to_pipe(cb->attachments[i].src_alpha_blend_factor);
416 rt->equation.alpha_dst_factor =
417 vk_blend_factor_to_pipe(cb->attachments[i].dst_alpha_blend_factor);
418
419 bool dest_has_alpha = util_format_has_alpha(rt->format);
420 if (!dest_has_alpha) {
421 rt->equation.rgb_src_factor =
422 util_blend_dst_alpha_to_one(rt->equation.rgb_src_factor);
423 rt->equation.rgb_dst_factor =
424 util_blend_dst_alpha_to_one(rt->equation.rgb_dst_factor);
425
426 rt->equation.alpha_src_factor =
427 util_blend_dst_alpha_to_one(rt->equation.alpha_src_factor);
428 rt->equation.alpha_dst_factor =
429 util_blend_dst_alpha_to_one(rt->equation.alpha_dst_factor);
430 }
431
432 blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation);
433
434 if (blend_needs_shader(&bs, i, &ff_blend_constant)) {
435 nir_alu_type src0_type = fs_info->bifrost.blend[i].type;
436 nir_alu_type src1_type = fs_info->bifrost.blend_src1_type;
437
438 VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type, i,
439 &blend_shaders[i]);
440 if (result != VK_SUCCESS)
441 return result;
442
443 blend_info->shader_loads_blend_const |=
444 pan_blend_constant_mask(rt->equation) != 0;
445 blend_info->needs_shader = true;
446 }
447 }
448
449 /* Set the blend constant to zero if it's not used by any of the blend ops. */
450 if (ff_blend_constant == ~0)
451 ff_blend_constant = 0;
452
453 /* Now that we've collected all the information, we can emit. */
454 for (uint8_t i = 0; i < MAX2(cb->attachment_count, 1); i++) {
455 emit_blend_desc(fs_info, fs_code, &bs, i, blend_shaders[i],
456 ff_blend_constant, &bds[i]);
457 }
458
459 return VK_SUCCESS;
460 }
461