xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/panvk_vX_blend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "util/u_dynarray.h"
7 
8 #include "nir_builder.h"
9 
10 #include "vk_blend.h"
11 #include "vk_format.h"
12 #include "vk_graphics_state.h"
13 #include "vk_log.h"
14 
15 #include "pan_shader.h"
16 
17 #include "panvk_blend.h"
18 #include "panvk_device.h"
19 #include "panvk_shader.h"
20 
21 DERIVE_HASH_TABLE(pan_blend_shader_key);
22 
23 VkResult
panvk_per_arch(blend_shader_cache_init)24 panvk_per_arch(blend_shader_cache_init)(struct panvk_device *dev)
25 {
26    struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
27 
28    simple_mtx_init(&cache->lock, mtx_plain);
29 
30    struct panvk_pool_properties bin_pool_props = {
31       .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
32       .slab_size = 16 * 1024,
33       .label = "blend shaders",
34       .owns_bos = true,
35       .prealloc = false,
36       .needs_locking = false,
37    };
38    panvk_pool_init(&cache->bin_pool, dev, NULL, &bin_pool_props);
39 
40    cache->ht = pan_blend_shader_key_table_create(NULL);
41    if (!cache->ht) {
42       panvk_pool_cleanup(&cache->bin_pool);
43       simple_mtx_destroy(&cache->lock);
44       return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
45                        "couldn't create blend shader hash table");
46    }
47 
48    return VK_SUCCESS;
49 }
50 
51 void
panvk_per_arch(blend_shader_cache_cleanup)52 panvk_per_arch(blend_shader_cache_cleanup)(struct panvk_device *dev)
53 {
54    struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
55 
56    hash_table_foreach_remove(cache->ht, he)
57       vk_free(&dev->vk.alloc, he->data);
58 
59    _mesa_hash_table_destroy(cache->ht, NULL);
60    panvk_pool_cleanup(&cache->bin_pool);
61    simple_mtx_destroy(&cache->lock);
62 }
63 
64 static bool
lower_load_blend_const(nir_builder * b,nir_instr * instr,UNUSED void * data)65 lower_load_blend_const(nir_builder *b, nir_instr *instr, UNUSED void *data)
66 {
67    if (instr->type != nir_instr_type_intrinsic)
68       return false;
69 
70    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
71 
72    if (intr->intrinsic != nir_intrinsic_load_blend_const_color_rgba)
73       return false;
74 
75    b->cursor = nir_before_instr(instr);
76 
77    unsigned offset = offsetof(struct panvk_graphics_sysvals, blend.constants);
78    nir_def *blend_consts = nir_load_push_constant(
79       b, intr->def.num_components, intr->def.bit_size, nir_imm_int(b, 0),
80       /* Push constants are placed first, and then come the sysvals. */
81       .base = offset + 256,
82       .range = intr->def.num_components * intr->def.bit_size / 8);
83 
84    nir_def_rewrite_uses(&intr->def, blend_consts);
85    return true;
86 }
87 
88 static VkResult
get_blend_shader_locked(struct panvk_device * dev,const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt,mali_ptr * shader_addr)89 get_blend_shader_locked(struct panvk_device *dev,
90                         const struct pan_blend_state *state,
91                         nir_alu_type src0_type, nir_alu_type src1_type,
92                         unsigned rt, mali_ptr *shader_addr)
93 {
94    struct panvk_physical_device *pdev =
95       to_panvk_physical_device(dev->vk.physical);
96    struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
97    struct pan_blend_shader_key key = {
98       .format = state->rts[rt].format,
99       .src0_type = src0_type,
100       .src1_type = src1_type,
101       .rt = rt,
102       .has_constants = pan_blend_constant_mask(state->rts[rt].equation) != 0,
103       .logicop_enable = state->logicop_enable,
104       .logicop_func = state->logicop_func,
105       .nr_samples = state->rts[rt].nr_samples,
106       .equation = state->rts[rt].equation,
107    };
108 
109    assert(state->logicop_enable ||
110           !pan_blend_is_opaque(state->rts[rt].equation));
111    assert(state->rts[rt].equation.color_mask != 0);
112    simple_mtx_assert_locked(&dev->blend_shader_cache.lock);
113 
114    struct hash_entry *he = _mesa_hash_table_search(cache->ht, &key);
115    struct panvk_blend_shader *shader = he ? he->data : NULL;
116 
117    if (shader)
118       goto out;
119 
120    shader = vk_zalloc(&dev->vk.alloc, sizeof(*shader), 8,
121                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
122    if (!shader)
123       return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
124                        "couldn't allocate blend shader object");
125 
126    nir_shader *nir =
127       GENX(pan_blend_create_shader)(state, src0_type, src1_type, rt);
128 
129    NIR_PASS_V(nir, nir_shader_instructions_pass, lower_load_blend_const,
130               nir_metadata_control_flow, NULL);
131 
132    /* Compile the NIR shader */
133    struct panfrost_compile_inputs inputs = {
134       .gpu_id = pdev->kmod.props.gpu_prod_id,
135       .no_ubo_to_push = true,
136       .is_blend = true,
137       .blend =
138          {
139             .nr_samples = key.nr_samples,
140             .bifrost_blend_desc =
141                GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false),
142          },
143    };
144 
145    pan_shader_preprocess(nir, inputs.gpu_id);
146 
147    enum pipe_format rt_formats[8] = {0};
148    rt_formats[rt] = key.format;
149    NIR_PASS_V(nir, GENX(pan_inline_rt_conversion), rt_formats);
150 
151    struct pan_shader_info info;
152    struct util_dynarray binary;
153 
154    util_dynarray_init(&binary, nir);
155    GENX(pan_shader_compile)(nir, &inputs, &binary, &info);
156 
157    shader->key = key;
158    shader->binary = pan_pool_upload_aligned(&cache->bin_pool.base, binary.data,
159                                             binary.size, 128);
160 
161    ralloc_free(nir);
162 
163    _mesa_hash_table_insert(cache->ht, &shader->key, shader);
164 
165 out:
166    *shader_addr = shader->binary;
167    return VK_SUCCESS;
168 }
169 
170 static VkResult
get_blend_shader(struct panvk_device * dev,const struct pan_blend_state * state,nir_alu_type src0_type,nir_alu_type src1_type,unsigned rt,mali_ptr * shader_addr)171 get_blend_shader(struct panvk_device *dev, const struct pan_blend_state *state,
172                  nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt,
173                  mali_ptr *shader_addr)
174 {
175    struct panvk_blend_shader_cache *cache = &dev->blend_shader_cache;
176    VkResult result;
177 
178    simple_mtx_lock(&cache->lock);
179    result = get_blend_shader_locked(dev, state, src0_type, src1_type, rt,
180                                     shader_addr);
181    simple_mtx_unlock(&cache->lock);
182 
183    return result;
184 }
185 
186 static void
emit_blend_desc(const struct pan_shader_info * fs_info,mali_ptr fs_code,const struct pan_blend_state * state,unsigned rt_idx,mali_ptr blend_shader,uint16_t constant,struct mali_blend_packed * bd)187 emit_blend_desc(const struct pan_shader_info *fs_info, mali_ptr fs_code,
188                 const struct pan_blend_state *state, unsigned rt_idx,
189                 mali_ptr blend_shader, uint16_t constant,
190                 struct mali_blend_packed *bd)
191 {
192    const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
193 
194    pan_pack(bd, BLEND, cfg) {
195       if (!state->rt_count || !rt->equation.color_mask) {
196          cfg.enable = false;
197          cfg.internal.mode = MALI_BLEND_MODE_OFF;
198          continue;
199       }
200 
201       cfg.srgb = util_format_is_srgb(rt->format);
202       cfg.load_destination = pan_blend_reads_dest(rt->equation);
203       cfg.round_to_fb_precision = true;
204       cfg.constant = constant;
205 
206       if (blend_shader) {
207          /* Blend and fragment shaders must be in the same 4G region. */
208          assert((blend_shader >> 32) == (fs_code >> 32));
209          /* Blend shader must be 16-byte aligned. */
210          assert((blend_shader & 15) == 0);
211          /* Fragment shader return address must be 8-byte aligned. */
212          assert((fs_code & 7) == 0);
213 
214          cfg.internal.mode = MALI_BLEND_MODE_SHADER;
215          cfg.internal.shader.pc = (uint32_t)blend_shader;
216 
217 #if PAN_ARCH <= 7
218          uint32_t ret_offset = fs_info->bifrost.blend[rt_idx].return_offset;
219 
220          /* If ret_offset is zero, we assume the BLEND is a terminal
221           * instruction and set return_value to zero, to let the
222           * blend shader jump to address zero, which terminates the
223           * thread.
224           */
225          cfg.internal.shader.return_value =
226             ret_offset ? fs_code + ret_offset : 0;
227 #endif
228       } else {
229          bool opaque = pan_blend_is_opaque(rt->equation);
230 
231          cfg.internal.mode =
232             opaque ? MALI_BLEND_MODE_OPAQUE : MALI_BLEND_MODE_FIXED_FUNCTION;
233 
234          pan_blend_to_fixed_function_equation(rt->equation, &cfg.equation);
235 
236          /* If we want the conversion to work properly, num_comps must be set to
237           * 4.
238           */
239          cfg.internal.fixed_function.num_comps = 4;
240          cfg.internal.fixed_function.conversion.memory_format =
241             GENX(panfrost_dithered_format_from_pipe_format)(rt->format, false);
242 
243 #if PAN_ARCH >= 7
244          if (cfg.internal.mode == MALI_BLEND_MODE_FIXED_FUNCTION &&
245              (cfg.internal.fixed_function.conversion.memory_format & 0xff) ==
246                 MALI_RGB_COMPONENT_ORDER_RGB1) {
247             /* fixed function does not like RGB1 as the component order */
248             /* force this field to be the RGBA. */
249             cfg.internal.fixed_function.conversion.memory_format &= ~0xff;
250             cfg.internal.fixed_function.conversion.memory_format |=
251                MALI_RGB_COMPONENT_ORDER_RGBA;
252          }
253 #endif
254 
255          cfg.internal.fixed_function.rt = rt_idx;
256 
257 #if PAN_ARCH <= 7
258          if (fs_info->fs.untyped_color_outputs) {
259             nir_alu_type type = fs_info->bifrost.blend[rt_idx].type;
260 
261             cfg.internal.fixed_function.conversion.register_format =
262                GENX(pan_fixup_blend_type)(type, rt->format);
263          } else {
264             cfg.internal.fixed_function.conversion.register_format =
265                fs_info->bifrost.blend[rt_idx].format;
266          }
267 
268          if (!opaque) {
269             cfg.internal.fixed_function.alpha_zero_nop =
270                pan_blend_alpha_zero_nop(rt->equation);
271             cfg.internal.fixed_function.alpha_one_store =
272                pan_blend_alpha_one_store(rt->equation);
273          }
274 #endif
275       }
276    }
277 }
278 
279 static uint16_t
get_ff_blend_constant(const struct pan_blend_state * state,unsigned rt_idx,unsigned const_idx)280 get_ff_blend_constant(const struct pan_blend_state *state, unsigned rt_idx,
281                       unsigned const_idx)
282 {
283    const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
284 
285    /* On Bifrost, the blend constant is expressed with a UNORM of the
286     * size of the target format. The value is then shifted such that
287     * used bits are in the MSB.
288     */
289    const struct util_format_description *format_desc =
290       util_format_description(rt->format);
291    unsigned chan_size = 0;
292    for (unsigned c = 0; c < format_desc->nr_channels; c++)
293       chan_size = MAX2(format_desc->channel[c].size, chan_size);
294    float factor = ((1 << chan_size) - 1) << (16 - chan_size);
295 
296    return (uint16_t)(state->constants[const_idx] * factor);
297 }
298 
299 static bool
blend_needs_shader(const struct pan_blend_state * state,unsigned rt_idx,unsigned * ff_blend_constant)300 blend_needs_shader(const struct pan_blend_state *state, unsigned rt_idx,
301                    unsigned *ff_blend_constant)
302 {
303    const struct pan_blend_rt_state *rt = &state->rts[rt_idx];
304 
305    /* LogicOp requires a blend shader, unless it's a NOOP, in which case we just
306     * disable blending.
307     */
308    if (state->logicop_enable)
309       return state->logicop_func != PIPE_LOGICOP_NOOP;
310 
311    /* If the output is opaque, we don't need a blend shader, no matter the
312     * format.
313     */
314    if (pan_blend_is_opaque(rt->equation))
315       return false;
316 
317    /* Not all formats can be blended by fixed-function hardware */
318    if (!GENX(panfrost_blendable_format_from_pipe_format)(rt->format)->internal)
319       return true;
320 
321    unsigned constant_mask = pan_blend_constant_mask(rt->equation);
322 
323    /* v6 doesn't support blend constants in FF blend equations. */
324    if (constant_mask && PAN_ARCH == 6)
325       return true;
326 
327    if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
328       return true;
329 
330    /* v7+ only uses the constant from RT 0. If we're not RT0, all previous
331     * RTs using FF with a blend constant need to have the same constant,
332     * otherwise we need a blend shader.
333     */
334    unsigned blend_const = ~0;
335    if (constant_mask) {
336       blend_const =
337          get_ff_blend_constant(state, rt_idx, ffs(constant_mask) - 1);
338 
339       if (*ff_blend_constant != ~0 && blend_const != *ff_blend_constant)
340          return true;
341    }
342 
343    bool supports_2src = pan_blend_supports_2src(PAN_ARCH);
344    if (!pan_blend_can_fixed_function(rt->equation, supports_2src))
345       return true;
346 
347    /* Update the fixed function blend constant, if we use it. */
348    if (blend_const != ~0)
349       *ff_blend_constant = blend_const;
350 
351    return false;
352 }
353 
354 VkResult
panvk_per_arch(blend_emit_descs)355 panvk_per_arch(blend_emit_descs)(
356    struct panvk_device *dev, const struct vk_color_blend_state *cb,
357    const VkFormat *color_attachment_formats, uint8_t *color_attachment_samples,
358    const struct pan_shader_info *fs_info, mali_ptr fs_code,
359    struct mali_blend_packed *bds, struct panvk_blend_info *blend_info)
360 {
361    struct pan_blend_state bs = {
362       .logicop_enable = cb->logic_op_enable,
363       .logicop_func = vk_logic_op_to_pipe(cb->logic_op),
364       .rt_count = cb->attachment_count,
365       .constants =
366          {
367             cb->blend_constants[0],
368             cb->blend_constants[1],
369             cb->blend_constants[2],
370             cb->blend_constants[3],
371          },
372    };
373    mali_ptr blend_shaders[8] = {};
374    /* All bits set to one encodes unused fixed-function blend constant. */
375    unsigned ff_blend_constant = ~0;
376 
377    memset(blend_info, 0, sizeof(*blend_info));
378    for (uint8_t i = 0; i < cb->attachment_count; i++) {
379       struct pan_blend_rt_state *rt = &bs.rts[i];
380 
381       if (!(cb->color_write_enables & BITFIELD_BIT(i))) {
382          rt->equation.color_mask = 0;
383          continue;
384       }
385 
386       if (bs.logicop_enable && bs.logicop_func == PIPE_LOGICOP_NOOP) {
387          rt->equation.color_mask = 0;
388          continue;
389       }
390 
391       if (color_attachment_formats[i] == VK_FORMAT_UNDEFINED) {
392          rt->equation.color_mask = 0;
393          continue;
394       }
395 
396       if (!cb->attachments[i].write_mask) {
397          rt->equation.color_mask = 0;
398          continue;
399       }
400 
401       rt->format = vk_format_to_pipe_format(color_attachment_formats[i]);
402 
403       rt->nr_samples = color_attachment_samples[i];
404       rt->equation.blend_enable = cb->attachments[i].blend_enable;
405       rt->equation.color_mask = cb->attachments[i].write_mask;
406       rt->equation.rgb_func =
407          vk_blend_op_to_pipe(cb->attachments[i].color_blend_op);
408       rt->equation.rgb_src_factor =
409          vk_blend_factor_to_pipe(cb->attachments[i].src_color_blend_factor);
410       rt->equation.rgb_dst_factor =
411          vk_blend_factor_to_pipe(cb->attachments[i].dst_color_blend_factor);
412       rt->equation.alpha_func =
413          vk_blend_op_to_pipe(cb->attachments[i].alpha_blend_op);
414       rt->equation.alpha_src_factor =
415          vk_blend_factor_to_pipe(cb->attachments[i].src_alpha_blend_factor);
416       rt->equation.alpha_dst_factor =
417          vk_blend_factor_to_pipe(cb->attachments[i].dst_alpha_blend_factor);
418 
419       bool dest_has_alpha = util_format_has_alpha(rt->format);
420       if (!dest_has_alpha) {
421          rt->equation.rgb_src_factor =
422             util_blend_dst_alpha_to_one(rt->equation.rgb_src_factor);
423          rt->equation.rgb_dst_factor =
424             util_blend_dst_alpha_to_one(rt->equation.rgb_dst_factor);
425 
426          rt->equation.alpha_src_factor =
427             util_blend_dst_alpha_to_one(rt->equation.alpha_src_factor);
428          rt->equation.alpha_dst_factor =
429             util_blend_dst_alpha_to_one(rt->equation.alpha_dst_factor);
430       }
431 
432       blend_info->any_dest_read |= pan_blend_reads_dest(rt->equation);
433 
434       if (blend_needs_shader(&bs, i, &ff_blend_constant)) {
435          nir_alu_type src0_type = fs_info->bifrost.blend[i].type;
436          nir_alu_type src1_type = fs_info->bifrost.blend_src1_type;
437 
438          VkResult result = get_blend_shader(dev, &bs, src0_type, src1_type, i,
439                                             &blend_shaders[i]);
440          if (result != VK_SUCCESS)
441             return result;
442 
443          blend_info->shader_loads_blend_const |=
444             pan_blend_constant_mask(rt->equation) != 0;
445          blend_info->needs_shader = true;
446       }
447    }
448 
449    /* Set the blend constant to zero if it's not used by any of the blend ops. */
450    if (ff_blend_constant == ~0)
451       ff_blend_constant = 0;
452 
453    /* Now that we've collected all the information, we can emit. */
454    for (uint8_t i = 0; i < MAX2(cb->attachment_count, 1); i++) {
455       emit_blend_desc(fs_info, fs_code, &bs, i, blend_shaders[i],
456                       ff_blend_constant, &bds[i]);
457    }
458 
459    return VK_SUCCESS;
460 }
461