xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_pipeline_graphics.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "meta/radv_meta.h"
12 #include "nir/nir.h"
13 #include "nir/nir_builder.h"
14 #include "nir/nir_serialize.h"
15 #include "nir/nir_xfb_info.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/mesa-sha1.h"
20 #include "util/os_time.h"
21 #include "util/u_atomic.h"
22 #include "radv_cs.h"
23 #include "radv_debug.h"
24 #include "radv_entrypoints.h"
25 #include "radv_formats.h"
26 #include "radv_physical_device.h"
27 #include "radv_pipeline_binary.h"
28 #include "radv_pipeline_cache.h"
29 #include "radv_rmv.h"
30 #include "radv_shader.h"
31 #include "radv_shader_args.h"
32 #include "vk_nir_convert_ycbcr.h"
33 #include "vk_pipeline.h"
34 #include "vk_render_pass.h"
35 #include "vk_util.h"
36 
37 #include "util/u_debug.h"
38 #include "ac_binary.h"
39 #include "ac_formats.h"
40 #include "ac_nir.h"
41 #include "ac_shader_util.h"
42 #include "aco_interface.h"
43 #include "sid.h"
44 
45 static bool
radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state * state)46 radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state *state)
47 {
48    if (!state->fsr)
49       return false;
50 
51    return state->fsr->fragment_size.width != 1 || state->fsr->fragment_size.height != 1 ||
52           state->fsr->combiner_ops[0] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR ||
53           state->fsr->combiner_ops[1] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
54 }
55 
56 static bool
radv_is_vrs_enabled(const struct vk_graphics_pipeline_state * state)57 radv_is_vrs_enabled(const struct vk_graphics_pipeline_state *state)
58 {
59    return radv_is_static_vrs_enabled(state) || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_FSR);
60 }
61 
62 static bool
radv_pipeline_has_ds_attachments(const struct vk_render_pass_state * rp)63 radv_pipeline_has_ds_attachments(const struct vk_render_pass_state *rp)
64 {
65    return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
66 }
67 
68 static bool
radv_pipeline_has_color_attachments(const struct vk_render_pass_state * rp)69 radv_pipeline_has_color_attachments(const struct vk_render_pass_state *rp)
70 {
71    for (uint32_t i = 0; i < rp->color_attachment_count; ++i) {
72       if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
73          return true;
74    }
75 
76    return false;
77 }
78 
79 /**
80  * Get rid of DST in the blend factors by commuting the operands:
81  *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
82  */
83 void
radv_blend_remove_dst(VkBlendOp * func,VkBlendFactor * src_factor,VkBlendFactor * dst_factor,VkBlendFactor expected_dst,VkBlendFactor replacement_src)84 radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
85                       VkBlendFactor replacement_src)
86 {
87    if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
88       *src_factor = VK_BLEND_FACTOR_ZERO;
89       *dst_factor = replacement_src;
90 
91       /* Commuting the operands requires reversing subtractions. */
92       if (*func == VK_BLEND_OP_SUBTRACT)
93          *func = VK_BLEND_OP_REVERSE_SUBTRACT;
94       else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
95          *func = VK_BLEND_OP_SUBTRACT;
96    }
97 }
98 
99 static unsigned
radv_choose_spi_color_format(const struct radv_device * device,VkFormat vk_format,bool blend_enable,bool blend_need_alpha)100 radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
101                              bool blend_need_alpha)
102 {
103    const struct radv_physical_device *pdev = radv_device_physical(device);
104    const struct util_format_description *desc = vk_format_description(vk_format);
105    bool use_rbplus = pdev->info.rbplus_allowed;
106    struct ac_spi_color_formats formats = {0};
107    unsigned format, ntype, swap;
108 
109    format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
110    ntype = ac_get_cb_number_type(desc->format);
111    swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false);
112 
113    ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
114 
115    if (blend_enable && blend_need_alpha)
116       return formats.blend_alpha;
117    else if (blend_need_alpha)
118       return formats.alpha;
119    else if (blend_enable)
120       return formats.blend;
121    else
122       return formats.normal;
123 }
124 
125 static bool
format_is_int8(VkFormat format)126 format_is_int8(VkFormat format)
127 {
128    const struct util_format_description *desc = vk_format_description(format);
129    int channel = vk_format_get_first_non_void_channel(format);
130 
131    return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
132 }
133 
134 static bool
format_is_int10(VkFormat format)135 format_is_int10(VkFormat format)
136 {
137    const struct util_format_description *desc = vk_format_description(format);
138 
139    if (desc->nr_channels != 4)
140       return false;
141    for (unsigned i = 0; i < 4; i++) {
142       if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
143          return true;
144    }
145    return false;
146 }
147 
148 static bool
format_is_float32(VkFormat format)149 format_is_float32(VkFormat format)
150 {
151    const struct util_format_description *desc = vk_format_description(format);
152    int channel = vk_format_get_first_non_void_channel(format);
153 
154    return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
155 }
156 
157 unsigned
radv_compact_spi_shader_col_format(uint32_t spi_shader_col_format)158 radv_compact_spi_shader_col_format(uint32_t spi_shader_col_format)
159 {
160    unsigned value = 0, num_mrts = 0;
161    unsigned i, num_targets;
162 
163    /* Compute the number of MRTs. */
164    num_targets = DIV_ROUND_UP(util_last_bit(spi_shader_col_format), 4);
165 
166    /* Remove holes in spi_shader_col_format. */
167    for (i = 0; i < num_targets; i++) {
168       unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
169 
170       if (spi_format) {
171          value |= spi_format << (num_mrts * 4);
172          num_mrts++;
173       }
174    }
175 
176    return value;
177 }
178 
179 /*
180  * Ordered so that for each i,
181  * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
182  */
183 const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
184    VK_FORMAT_R32_SFLOAT,         VK_FORMAT_R32G32_SFLOAT,           VK_FORMAT_R8G8B8A8_UNORM,
185    VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_SNORM,      VK_FORMAT_R16G16B16A16_UINT,
186    VK_FORMAT_R16G16B16A16_SINT,  VK_FORMAT_R32G32B32A32_SFLOAT,     VK_FORMAT_R8G8B8A8_UINT,
187    VK_FORMAT_R8G8B8A8_SINT,      VK_FORMAT_A2R10G10B10_UINT_PACK32, VK_FORMAT_A2R10G10B10_SINT_PACK32,
188 };
189 
190 unsigned
radv_format_meta_fs_key(struct radv_device * device,VkFormat format)191 radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
192 {
193    unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
194    assert(col_format != V_028714_SPI_SHADER_32_AR);
195 
196    bool is_int8 = format_is_int8(format);
197    bool is_int10 = format_is_int10(format);
198 
199    if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
200       return 8;
201    else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
202       return 9;
203    else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
204       return 10;
205    else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
206       return 11;
207    else {
208       if (col_format >= V_028714_SPI_SHADER_32_AR)
209          --col_format; /* Skip V_028714_SPI_SHADER_32_AR  since there is no such VkFormat */
210 
211       --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
212       return col_format;
213    }
214 }
215 
216 static bool
radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)217 radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state *state,
218                               VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
219 {
220    /* Use a PS epilog when the fragment shader is compiled without the fragment output interface. */
221    if ((state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) &&
222        (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
223        !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT))
224       return true;
225 
226    /* These dynamic states need to compile PS epilogs on-demand. */
227    if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
228        BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
229        BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) ||
230        BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
231        BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
232       return true;
233 
234    return false;
235 }
236 
237 static bool
radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)238 radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline *pipeline,
239                                   const struct vk_graphics_pipeline_state *state)
240 {
241    VkPipelineCreateFlags2KHR create_flags = pipeline->base.create_flags;
242    if (state->rp)
243       create_flags |= state->pipeline_flags;
244 
245    return (create_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0;
246 }
247 
248 static void
radv_pipeline_init_multisample_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state)249 radv_pipeline_init_multisample_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
250                                      const VkGraphicsPipelineCreateInfo *pCreateInfo,
251                                      const struct vk_graphics_pipeline_state *state)
252 {
253    struct radv_multisample_state *ms = &pipeline->ms;
254 
255    /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
256     *
257     * "Sample shading is enabled for a graphics pipeline:
258     *
259     * - If the interface of the fragment shader entry point of the
260     *   graphics pipeline includes an input variable decorated
261     *   with SampleId or SamplePosition. In this case
262     *   minSampleShadingFactor takes the value 1.0.
263     * - Else if the sampleShadingEnable member of the
264     *   VkPipelineMultisampleStateCreateInfo structure specified
265     *   when creating the graphics pipeline is set to VK_TRUE. In
266     *   this case minSampleShadingFactor takes the value of
267     *   VkPipelineMultisampleStateCreateInfo::minSampleShading.
268     *
269     * Otherwise, sample shading is considered disabled."
270     */
271    if (state->ms && state->ms->sample_shading_enable) {
272       ms->sample_shading_enable = true;
273       ms->min_sample_shading = state->ms->min_sample_shading;
274    }
275 }
276 
277 static uint32_t
radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)278 radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)
279 {
280    switch (prim) {
281    case TESS_PRIMITIVE_TRIANGLES:
282    case TESS_PRIMITIVE_QUADS:
283       return V_028A6C_TRISTRIP;
284    case TESS_PRIMITIVE_ISOLINES:
285       return V_028A6C_LINESTRIP;
286    default:
287       assert(0);
288       return 0;
289    }
290 }
291 
292 static uint64_t
radv_dynamic_state_mask(VkDynamicState state)293 radv_dynamic_state_mask(VkDynamicState state)
294 {
295    switch (state) {
296    case VK_DYNAMIC_STATE_VIEWPORT:
297    case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:
298       return RADV_DYNAMIC_VIEWPORT;
299    case VK_DYNAMIC_STATE_SCISSOR:
300    case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:
301       return RADV_DYNAMIC_SCISSOR;
302    case VK_DYNAMIC_STATE_LINE_WIDTH:
303       return RADV_DYNAMIC_LINE_WIDTH;
304    case VK_DYNAMIC_STATE_DEPTH_BIAS:
305       return RADV_DYNAMIC_DEPTH_BIAS;
306    case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
307       return RADV_DYNAMIC_BLEND_CONSTANTS;
308    case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
309       return RADV_DYNAMIC_DEPTH_BOUNDS;
310    case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
311       return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
312    case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
313       return RADV_DYNAMIC_STENCIL_WRITE_MASK;
314    case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
315       return RADV_DYNAMIC_STENCIL_REFERENCE;
316    case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
317       return RADV_DYNAMIC_DISCARD_RECTANGLE;
318    case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
319       return RADV_DYNAMIC_SAMPLE_LOCATIONS;
320    case VK_DYNAMIC_STATE_LINE_STIPPLE_KHR:
321       return RADV_DYNAMIC_LINE_STIPPLE;
322    case VK_DYNAMIC_STATE_CULL_MODE:
323       return RADV_DYNAMIC_CULL_MODE;
324    case VK_DYNAMIC_STATE_FRONT_FACE:
325       return RADV_DYNAMIC_FRONT_FACE;
326    case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:
327       return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
328    case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:
329       return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
330    case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:
331       return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
332    case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:
333       return RADV_DYNAMIC_DEPTH_COMPARE_OP;
334    case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:
335       return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
336    case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:
337       return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
338    case VK_DYNAMIC_STATE_STENCIL_OP:
339       return RADV_DYNAMIC_STENCIL_OP;
340    case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE:
341       return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
342    case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
343       return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
344    case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT:
345       return RADV_DYNAMIC_PATCH_CONTROL_POINTS;
346    case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:
347       return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
348    case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:
349       return RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
350    case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
351       return RADV_DYNAMIC_LOGIC_OP;
352    case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:
353       return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
354    case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
355       return RADV_DYNAMIC_COLOR_WRITE_ENABLE;
356    case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT:
357       return RADV_DYNAMIC_VERTEX_INPUT;
358    case VK_DYNAMIC_STATE_POLYGON_MODE_EXT:
359       return RADV_DYNAMIC_POLYGON_MODE;
360    case VK_DYNAMIC_STATE_TESSELLATION_DOMAIN_ORIGIN_EXT:
361       return RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
362    case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT:
363       return RADV_DYNAMIC_LOGIC_OP_ENABLE;
364    case VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT:
365       return RADV_DYNAMIC_LINE_STIPPLE_ENABLE;
366    case VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT:
367       return RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE;
368    case VK_DYNAMIC_STATE_SAMPLE_MASK_EXT:
369       return RADV_DYNAMIC_SAMPLE_MASK;
370    case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT:
371       return RADV_DYNAMIC_DEPTH_CLIP_ENABLE;
372    case VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT:
373       return RADV_DYNAMIC_CONSERVATIVE_RAST_MODE;
374    case VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT:
375       return RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE;
376    case VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT:
377       return RADV_DYNAMIC_PROVOKING_VERTEX_MODE;
378    case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT:
379       return RADV_DYNAMIC_DEPTH_CLAMP_ENABLE;
380    case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT:
381       return RADV_DYNAMIC_COLOR_WRITE_MASK;
382    case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT:
383       return RADV_DYNAMIC_COLOR_BLEND_ENABLE;
384    case VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT:
385       return RADV_DYNAMIC_RASTERIZATION_SAMPLES;
386    case VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT:
387       return RADV_DYNAMIC_LINE_RASTERIZATION_MODE;
388    case VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT:
389       return RADV_DYNAMIC_COLOR_BLEND_EQUATION;
390    case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT:
391       return RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE;
392    case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT:
393       return RADV_DYNAMIC_DISCARD_RECTANGLE_MODE;
394    case VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT:
395       return RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
396    case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT:
397       return RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE;
398    case VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT:
399       return RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE;
400    default:
401       unreachable("Unhandled dynamic state");
402    }
403 }
404 
405 #define RADV_DYNAMIC_CB_STATES                                                                                         \
406    (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE |                           \
407     RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION |              \
408     RADV_DYNAMIC_BLEND_CONSTANTS)
409 
410 static bool
radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb)411 radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb)
412 {
413    /* If we don't know then we have to assume that blend may be enabled. cb may also be NULL in this
414     * case.
415     */
416    if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK))
417       return true;
418 
419    /* If we have the blend enable state, then cb being NULL indicates no attachments are written. */
420    if (cb) {
421       for (uint32_t i = 0; i < cb->attachment_count; i++) {
422          if (cb->attachments[i].write_mask && cb->attachments[i].blend_enable)
423             return true;
424       }
425    }
426 
427    return false;
428 }
429 
430 static uint64_t
radv_pipeline_needed_dynamic_state(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)431 radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
432                                    const struct vk_graphics_pipeline_state *state)
433 {
434    const struct radv_physical_device *pdev = radv_device_physical(device);
435    bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
436    bool raster_enabled =
437       !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
438    uint64_t states = RADV_DYNAMIC_ALL;
439 
440    if (pdev->info.gfx_level < GFX10_3)
441       states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
442 
443    /* Disable dynamic states that are useless to mesh shading. */
444    if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
445       if (!raster_enabled)
446          return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
447 
448       states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
449                   RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY);
450    }
451 
452    /* Disable dynamic states that are useless when rasterization is disabled. */
453    if (!raster_enabled) {
454       states = RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
455                RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
456                RADV_DYNAMIC_VERTEX_INPUT;
457 
458       if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
459          states |= RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
460 
461       return states;
462    }
463 
464    if (!state->rs->depth_bias.enable && !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
465       states &= ~RADV_DYNAMIC_DEPTH_BIAS;
466 
467    if (!(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) &&
468        (!state->ds || !state->ds->depth.bounds_test.enable))
469       states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
470 
471    if (!(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) &&
472        (!state->ds || !state->ds->stencil.test_enable))
473       states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE |
474                   RADV_DYNAMIC_STENCIL_OP);
475 
476    if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && !state->dr->rectangle_count)
477       states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
478 
479    if (!(pipeline->dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) &&
480        (!state->ms || !state->ms->sample_locations_enable))
481       states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
482 
483    if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, state->cb))
484       states &= ~RADV_DYNAMIC_BLEND_CONSTANTS;
485 
486    if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT))
487       states &= ~(RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN);
488 
489    return states;
490 }
491 
492 struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param(const struct radv_device * device,struct radv_shader * const * shaders)493 radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
494 {
495    const struct radv_physical_device *pdev = radv_device_physical(device);
496    struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
497 
498    ia_multi_vgt_param.ia_switch_on_eoi = false;
499    if (shaders[MESA_SHADER_FRAGMENT] && shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
500       ia_multi_vgt_param.ia_switch_on_eoi = true;
501    if (shaders[MESA_SHADER_GEOMETRY] && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
502       ia_multi_vgt_param.ia_switch_on_eoi = true;
503    if (shaders[MESA_SHADER_TESS_CTRL]) {
504       const struct radv_shader *tes = radv_get_shader(shaders, MESA_SHADER_TESS_EVAL);
505 
506       /* SWITCH_ON_EOI must be set if PrimID is used. */
507       if (shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || tes->info.uses_prim_id ||
508           (tes->info.merged_shader_compiled_separately && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id))
509          ia_multi_vgt_param.ia_switch_on_eoi = true;
510    }
511 
512    ia_multi_vgt_param.partial_vs_wave = false;
513    if (shaders[MESA_SHADER_TESS_CTRL]) {
514       /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
515       if ((pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_PITCAIRN ||
516            pdev->info.family == CHIP_BONAIRE) &&
517           shaders[MESA_SHADER_GEOMETRY])
518          ia_multi_vgt_param.partial_vs_wave = true;
519       /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
520       if (pdev->info.has_distributed_tess) {
521          if (shaders[MESA_SHADER_GEOMETRY]) {
522             if (pdev->info.gfx_level <= GFX8)
523                ia_multi_vgt_param.partial_es_wave = true;
524          } else {
525             ia_multi_vgt_param.partial_vs_wave = true;
526          }
527       }
528    }
529 
530    if (shaders[MESA_SHADER_GEOMETRY]) {
531       /* On these chips there is the possibility of a hang if the
532        * pipeline uses a GS and partial_vs_wave is not set.
533        *
534        * This mostly does not hit 4-SE chips, as those typically set
535        * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
536        * with GS due to another workaround.
537        *
538        * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
539        */
540       if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_FIJI || pdev->info.family == CHIP_POLARIS10 ||
541           pdev->info.family == CHIP_POLARIS11 || pdev->info.family == CHIP_POLARIS12 ||
542           pdev->info.family == CHIP_VEGAM) {
543          ia_multi_vgt_param.partial_vs_wave = true;
544       }
545    }
546 
547    ia_multi_vgt_param.base =
548       /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
549       S_028AA8_MAX_PRIMGRP_IN_WAVE(pdev->info.gfx_level == GFX8 ? 2 : 0) |
550       S_030960_EN_INST_OPT_BASIC(pdev->info.gfx_level >= GFX9) | S_030960_EN_INST_OPT_ADV(pdev->info.gfx_level >= GFX9);
551 
552    return ia_multi_vgt_param;
553 }
554 
555 static uint32_t
radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo * vi,uint32_t attrib_binding)556 radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t attrib_binding)
557 {
558    for (uint32_t i = 0; i < vi->vertexBindingDescriptionCount; i++) {
559       const VkVertexInputBindingDescription *input_binding = &vi->pVertexBindingDescriptions[i];
560 
561       if (input_binding->binding == attrib_binding)
562          return input_binding->stride;
563    }
564 
565    return 0;
566 }
567 
568 #define ALL_GRAPHICS_LIB_FLAGS                                                                                         \
569    (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |                                                      \
570     VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |                                                   \
571     VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |                                                             \
572     VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
573 
574 static VkGraphicsPipelineLibraryFlagBitsEXT
shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)575 shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)
576 {
577    assert(util_bitcount(stage) == 1);
578    switch (stage) {
579    case VK_SHADER_STAGE_VERTEX_BIT:
580    case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
581    case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
582    case VK_SHADER_STAGE_GEOMETRY_BIT:
583    case VK_SHADER_STAGE_TASK_BIT_EXT:
584    case VK_SHADER_STAGE_MESH_BIT_EXT:
585       return VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT;
586    case VK_SHADER_STAGE_FRAGMENT_BIT:
587       return VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
588    default:
589       unreachable("Invalid shader stage");
590    }
591 }
592 
593 static void
radv_graphics_pipeline_import_layout(struct radv_pipeline_layout * dst,const struct radv_pipeline_layout * src)594 radv_graphics_pipeline_import_layout(struct radv_pipeline_layout *dst, const struct radv_pipeline_layout *src)
595 {
596    for (uint32_t s = 0; s < src->num_sets; s++) {
597       if (!src->set[s].layout)
598          continue;
599 
600       radv_pipeline_layout_add_set(dst, s, src->set[s].layout);
601    }
602 
603    dst->independent_sets |= src->independent_sets;
604    dst->push_constant_size = MAX2(dst->push_constant_size, src->push_constant_size);
605 }
606 
607 static void
radv_pipeline_import_graphics_info(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)608 radv_pipeline_import_graphics_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
609                                    const VkGraphicsPipelineCreateInfo *pCreateInfo)
610 {
611    /* Mark all states declared dynamic at pipeline creation. */
612    if (pCreateInfo->pDynamicState) {
613       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
614       for (uint32_t s = 0; s < count; s++) {
615          pipeline->dynamic_states |= radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
616       }
617    }
618 
619    /* Mark all active stages at pipeline creation. */
620    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
621       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
622 
623       pipeline->active_stages |= sinfo->stage;
624    }
625 
626    if (pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_EXT) {
627       pipeline->last_vgt_api_stage = MESA_SHADER_MESH;
628    } else {
629       pipeline->last_vgt_api_stage = util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
630    }
631 }
632 
633 static bool
radv_should_import_lib_binaries(const VkPipelineCreateFlags2KHR create_flags)634 radv_should_import_lib_binaries(const VkPipelineCreateFlags2KHR create_flags)
635 {
636    return !(create_flags & (VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT |
637                             VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT));
638 }
639 
640 static void
radv_graphics_pipeline_import_lib(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,struct radv_graphics_lib_pipeline * lib)641 radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
642                                   struct radv_graphics_lib_pipeline *lib)
643 {
644    bool import_binaries = false;
645 
646    /* There should be no common blocks between a lib we import and the current
647     * pipeline we're building.
648     */
649    assert((pipeline->active_stages & lib->base.active_stages) == 0);
650 
651    pipeline->dynamic_states |= lib->base.dynamic_states;
652    pipeline->active_stages |= lib->base.active_stages;
653 
654    /* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */
655    if (lib->base.has_pipeline_binaries || radv_should_import_lib_binaries(pipeline->base.create_flags)) {
656       import_binaries = true;
657    }
658 
659    if (import_binaries) {
660       /* Import the compiled shaders. */
661       for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
662          if (!lib->base.base.shaders[s])
663             continue;
664 
665          pipeline->base.shaders[s] = radv_shader_ref(lib->base.base.shaders[s]);
666       }
667 
668       /* Import the GS copy shader if present. */
669       if (lib->base.base.gs_copy_shader) {
670          assert(!pipeline->base.gs_copy_shader);
671          pipeline->base.gs_copy_shader = radv_shader_ref(lib->base.base.gs_copy_shader);
672       }
673    }
674 }
675 
676 static void
radv_pipeline_init_input_assembly_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)677 radv_pipeline_init_input_assembly_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
678 {
679    pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param(device, pipeline->base.shaders);
680 }
681 
682 static bool
radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)683 radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline *pipeline,
684                                     const struct vk_graphics_pipeline_state *state)
685 {
686    VkPipelineCreateFlags2KHR create_flags = pipeline->base.create_flags;
687    if (state->rp)
688       create_flags |= state->pipeline_flags;
689 
690    return (create_flags & VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
691 }
692 
693 void
radv_get_viewport_xform(const VkViewport * viewport,float scale[3],float translate[3])694 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3])
695 {
696    float x = viewport->x;
697    float y = viewport->y;
698    float half_width = 0.5f * viewport->width;
699    float half_height = 0.5f * viewport->height;
700    double n = viewport->minDepth;
701    double f = viewport->maxDepth;
702 
703    scale[0] = half_width;
704    translate[0] = half_width + x;
705    scale[1] = half_height;
706    translate[1] = half_height + y;
707 
708    scale[2] = (f - n);
709    translate[2] = n;
710 }
711 
712 static void
radv_pipeline_init_dynamic_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,const VkGraphicsPipelineCreateInfo * pCreateInfo)713 radv_pipeline_init_dynamic_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
714                                  const struct vk_graphics_pipeline_state *state,
715                                  const VkGraphicsPipelineCreateInfo *pCreateInfo)
716 {
717    uint64_t needed_states = radv_pipeline_needed_dynamic_state(device, pipeline, state);
718    struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
719    uint64_t states = needed_states;
720 
721    /* Initialize non-zero values for default dynamic state. */
722    dynamic->vk.rs.line.width = 1.0f;
723    dynamic->vk.fsr.fragment_size.width = 1u;
724    dynamic->vk.fsr.fragment_size.height = 1u;
725    dynamic->vk.ds.depth.bounds_test.max = 1.0f;
726    dynamic->vk.ds.stencil.front.compare_mask = ~0;
727    dynamic->vk.ds.stencil.front.write_mask = ~0;
728    dynamic->vk.ds.stencil.back.compare_mask = ~0;
729    dynamic->vk.ds.stencil.back.write_mask = ~0;
730    dynamic->vk.ms.rasterization_samples = VK_SAMPLE_COUNT_1_BIT;
731 
732    pipeline->needed_dynamic_state = needed_states;
733 
734    states &= ~pipeline->dynamic_states;
735 
736    /* Input assembly. */
737    if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
738       dynamic->vk.ia.primitive_topology = radv_translate_prim(state->ia->primitive_topology);
739    }
740 
741    if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
742       dynamic->vk.ia.primitive_restart_enable = state->ia->primitive_restart_enable;
743    }
744 
745    /* Tessellation. */
746    if (states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
747       dynamic->vk.ts.patch_control_points = state->ts->patch_control_points;
748    }
749 
750    if (states & RADV_DYNAMIC_TESS_DOMAIN_ORIGIN) {
751       dynamic->vk.ts.domain_origin = state->ts->domain_origin;
752    }
753 
754    /* Viewport. */
755    if (needed_states & RADV_DYNAMIC_VIEWPORT) {
756       dynamic->vk.vp.viewport_count = state->vp->viewport_count;
757       if (states & RADV_DYNAMIC_VIEWPORT) {
758          typed_memcpy(dynamic->vk.vp.viewports, state->vp->viewports, state->vp->viewport_count);
759          for (unsigned i = 0; i < dynamic->vk.vp.viewport_count; i++)
760             radv_get_viewport_xform(&dynamic->vk.vp.viewports[i], dynamic->hw_vp.xform[i].scale,
761                                     dynamic->hw_vp.xform[i].translate);
762       }
763    }
764 
765    if (needed_states & RADV_DYNAMIC_SCISSOR) {
766       dynamic->vk.vp.scissor_count = state->vp->scissor_count;
767       if (states & RADV_DYNAMIC_SCISSOR) {
768          typed_memcpy(dynamic->vk.vp.scissors, state->vp->scissors, state->vp->scissor_count);
769       }
770    }
771 
772    if (states & RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) {
773       dynamic->vk.vp.depth_clip_negative_one_to_one = state->vp->depth_clip_negative_one_to_one;
774    }
775 
776    /* Discard rectangles. */
777    if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
778       dynamic->vk.dr.rectangle_count = state->dr->rectangle_count;
779       if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
780          typed_memcpy(dynamic->vk.dr.rectangles, state->dr->rectangles, state->dr->rectangle_count);
781       }
782    }
783 
784    /* Rasterization. */
785    if (states & RADV_DYNAMIC_LINE_WIDTH) {
786       dynamic->vk.rs.line.width = state->rs->line.width;
787    }
788 
789    if (states & RADV_DYNAMIC_DEPTH_BIAS) {
790       dynamic->vk.rs.depth_bias.constant = state->rs->depth_bias.constant;
791       dynamic->vk.rs.depth_bias.clamp = state->rs->depth_bias.clamp;
792       dynamic->vk.rs.depth_bias.slope = state->rs->depth_bias.slope;
793       dynamic->vk.rs.depth_bias.representation = state->rs->depth_bias.representation;
794    }
795 
796    if (states & RADV_DYNAMIC_CULL_MODE) {
797       dynamic->vk.rs.cull_mode = state->rs->cull_mode;
798    }
799 
800    if (states & RADV_DYNAMIC_FRONT_FACE) {
801       dynamic->vk.rs.front_face = state->rs->front_face;
802    }
803 
804    if (states & RADV_DYNAMIC_LINE_STIPPLE) {
805       dynamic->vk.rs.line.stipple.factor = state->rs->line.stipple.factor;
806       dynamic->vk.rs.line.stipple.pattern = state->rs->line.stipple.pattern;
807    }
808 
809    if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
810       dynamic->vk.rs.depth_bias.enable = state->rs->depth_bias.enable;
811    }
812 
813    if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
814       dynamic->vk.rs.rasterizer_discard_enable = state->rs->rasterizer_discard_enable;
815    }
816 
817    if (states & RADV_DYNAMIC_POLYGON_MODE) {
818       dynamic->vk.rs.polygon_mode = radv_translate_fill(state->rs->polygon_mode);
819    }
820 
821    if (states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) {
822       dynamic->vk.rs.line.stipple.enable = state->rs->line.stipple.enable;
823    }
824 
825    if (states & RADV_DYNAMIC_DEPTH_CLIP_ENABLE) {
826       dynamic->vk.rs.depth_clip_enable = state->rs->depth_clip_enable;
827    }
828 
829    if (states & RADV_DYNAMIC_CONSERVATIVE_RAST_MODE) {
830       dynamic->vk.rs.conservative_mode = state->rs->conservative_mode;
831    }
832 
833    if (states & RADV_DYNAMIC_PROVOKING_VERTEX_MODE) {
834       dynamic->vk.rs.provoking_vertex = state->rs->provoking_vertex;
835    }
836 
837    if (states & RADV_DYNAMIC_DEPTH_CLAMP_ENABLE) {
838       dynamic->vk.rs.depth_clamp_enable = state->rs->depth_clamp_enable;
839    }
840 
841    if (states & RADV_DYNAMIC_LINE_RASTERIZATION_MODE) {
842       dynamic->vk.rs.line.mode = state->rs->line.mode;
843    }
844 
845    /* Fragment shading rate. */
846    if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
847       dynamic->vk.fsr = *state->fsr;
848    }
849 
850    /* Multisample. */
851    if (states & RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE) {
852       dynamic->vk.ms.alpha_to_coverage_enable = state->ms->alpha_to_coverage_enable;
853    }
854 
855    if (states & RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE) {
856       dynamic->vk.ms.alpha_to_one_enable = state->ms->alpha_to_one_enable;
857    }
858 
859    if (states & RADV_DYNAMIC_SAMPLE_MASK) {
860       dynamic->vk.ms.sample_mask = state->ms->sample_mask & 0xffff;
861    }
862 
863    if (states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) {
864       dynamic->vk.ms.rasterization_samples = state->ms->rasterization_samples;
865    }
866 
867    if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) {
868       dynamic->vk.ms.sample_locations_enable = state->ms->sample_locations_enable;
869    }
870 
871    if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
872       unsigned count = state->ms->sample_locations->per_pixel * state->ms->sample_locations->grid_size.width *
873                        state->ms->sample_locations->grid_size.height;
874 
875       dynamic->sample_location.per_pixel = state->ms->sample_locations->per_pixel;
876       dynamic->sample_location.grid_size = state->ms->sample_locations->grid_size;
877       dynamic->sample_location.count = count;
878       typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, count);
879    }
880 
881    /* Depth stencil. */
882    /* If there is no depthstencil attachment, then don't read
883     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
884     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
885     * no need to override the depthstencil defaults in
886     * radv_pipeline::dynamic_state when there is no depthstencil attachment.
887     *
888     * Section 9.2 of the Vulkan 1.0.15 spec says:
889     *
890     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
891     *    disabled or if the subpass of the render pass the pipeline is created
892     *    against does not use a depth/stencil attachment.
893     */
894    if (needed_states && radv_pipeline_has_ds_attachments(state->rp)) {
895       if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
896          dynamic->vk.ds.depth.bounds_test.min = state->ds->depth.bounds_test.min;
897          dynamic->vk.ds.depth.bounds_test.max = state->ds->depth.bounds_test.max;
898       }
899 
900       if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
901          dynamic->vk.ds.stencil.front.compare_mask = state->ds->stencil.front.compare_mask;
902          dynamic->vk.ds.stencil.back.compare_mask = state->ds->stencil.back.compare_mask;
903       }
904 
905       if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
906          dynamic->vk.ds.stencil.front.write_mask = state->ds->stencil.front.write_mask;
907          dynamic->vk.ds.stencil.back.write_mask = state->ds->stencil.back.write_mask;
908       }
909 
910       if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
911          dynamic->vk.ds.stencil.front.reference = state->ds->stencil.front.reference;
912          dynamic->vk.ds.stencil.back.reference = state->ds->stencil.back.reference;
913       }
914 
915       if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
916          dynamic->vk.ds.depth.test_enable = state->ds->depth.test_enable;
917       }
918 
919       if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
920          dynamic->vk.ds.depth.write_enable = state->ds->depth.write_enable;
921       }
922 
923       if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
924          dynamic->vk.ds.depth.compare_op = state->ds->depth.compare_op;
925       }
926 
927       if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
928          dynamic->vk.ds.depth.bounds_test.enable = state->ds->depth.bounds_test.enable;
929       }
930 
931       if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
932          dynamic->vk.ds.stencil.test_enable = state->ds->stencil.test_enable;
933       }
934 
935       if (states & RADV_DYNAMIC_STENCIL_OP) {
936          dynamic->vk.ds.stencil.front.op.compare = state->ds->stencil.front.op.compare;
937          dynamic->vk.ds.stencil.front.op.fail = state->ds->stencil.front.op.fail;
938          dynamic->vk.ds.stencil.front.op.pass = state->ds->stencil.front.op.pass;
939          dynamic->vk.ds.stencil.front.op.depth_fail = state->ds->stencil.front.op.depth_fail;
940 
941          dynamic->vk.ds.stencil.back.op.compare = state->ds->stencil.back.op.compare;
942          dynamic->vk.ds.stencil.back.op.fail = state->ds->stencil.back.op.fail;
943          dynamic->vk.ds.stencil.back.op.pass = state->ds->stencil.back.op.pass;
944          dynamic->vk.ds.stencil.back.op.depth_fail = state->ds->stencil.back.op.depth_fail;
945       }
946    }
947 
948    /* Color blend. */
949    /* Section 9.2 of the Vulkan 1.0.15 spec says:
950     *
951     *    pColorBlendState is [...] NULL if the pipeline has rasterization
952     *    disabled or if the subpass of the render pass the pipeline is
953     *    created against does not use any color attachments.
954     */
955    if (states & RADV_DYNAMIC_BLEND_CONSTANTS) {
956       typed_memcpy(dynamic->vk.cb.blend_constants, state->cb->blend_constants, 4);
957    }
958 
959    if (radv_pipeline_has_color_attachments(state->rp)) {
960       if (states & RADV_DYNAMIC_LOGIC_OP) {
961          if ((pipeline->dynamic_states & RADV_DYNAMIC_LOGIC_OP_ENABLE) || state->cb->logic_op_enable) {
962             dynamic->vk.cb.logic_op = radv_translate_blend_logic_op(state->cb->logic_op);
963          }
964       }
965 
966       if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
967          dynamic->vk.cb.color_write_enables = state->cb->color_write_enables;
968       }
969 
970       if (states & RADV_DYNAMIC_LOGIC_OP_ENABLE) {
971          dynamic->vk.cb.logic_op_enable = state->cb->logic_op_enable;
972       }
973 
974       if (states & RADV_DYNAMIC_COLOR_WRITE_MASK) {
975          for (unsigned i = 0; i < state->cb->attachment_count; i++) {
976             dynamic->vk.cb.attachments[i].write_mask = state->cb->attachments[i].write_mask;
977          }
978       }
979 
980       if (states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) {
981          for (unsigned i = 0; i < state->cb->attachment_count; i++) {
982             dynamic->vk.cb.attachments[i].blend_enable = state->cb->attachments[i].blend_enable;
983          }
984       }
985 
986       if (states & RADV_DYNAMIC_COLOR_BLEND_EQUATION) {
987          for (unsigned i = 0; i < state->cb->attachment_count; i++) {
988             const struct vk_color_blend_attachment_state *att = &state->cb->attachments[i];
989 
990             dynamic->vk.cb.attachments[i].src_color_blend_factor = att->src_color_blend_factor;
991             dynamic->vk.cb.attachments[i].dst_color_blend_factor = att->dst_color_blend_factor;
992             dynamic->vk.cb.attachments[i].color_blend_op = att->color_blend_op;
993             dynamic->vk.cb.attachments[i].src_alpha_blend_factor = att->src_alpha_blend_factor;
994             dynamic->vk.cb.attachments[i].dst_alpha_blend_factor = att->dst_alpha_blend_factor;
995             dynamic->vk.cb.attachments[i].alpha_blend_op = att->alpha_blend_op;
996          }
997       }
998    }
999 
1000    if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) {
1001       dynamic->vk.dr.enable = state->dr->rectangle_count > 0;
1002    }
1003 
1004    if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_MODE) {
1005       dynamic->vk.dr.mode = state->dr->mode;
1006    }
1007 
1008    if (states & RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE) {
1009       bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pipeline, state);
1010 
1011       dynamic->feedback_loop_aspects =
1012          uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_NONE;
1013    }
1014 
1015    for (uint32_t i = 0; i < MAX_RTS; i++) {
1016       dynamic->vk.cal.color_map[i] = state->cal ? state->cal->color_map[i] : i;
1017       dynamic->vk.ial.color_map[i] = state->ial ? state->ial->color_map[i] : i;
1018    }
1019 
1020    dynamic->vk.ial.depth_att = state->ial ? state->ial->depth_att : MESA_VK_ATTACHMENT_UNUSED;
1021    dynamic->vk.ial.stencil_att = state->ial ? state->ial->stencil_att : MESA_VK_ATTACHMENT_UNUSED;
1022 
1023    pipeline->dynamic_state.mask = states;
1024 }
1025 
1026 struct radv_shader *
radv_get_shader(struct radv_shader * const * shaders,gl_shader_stage stage)1027 radv_get_shader(struct radv_shader *const *shaders, gl_shader_stage stage)
1028 {
1029    if (stage == MESA_SHADER_VERTEX) {
1030       if (shaders[MESA_SHADER_VERTEX])
1031          return shaders[MESA_SHADER_VERTEX];
1032       if (shaders[MESA_SHADER_TESS_CTRL])
1033          return shaders[MESA_SHADER_TESS_CTRL];
1034       if (shaders[MESA_SHADER_GEOMETRY])
1035          return shaders[MESA_SHADER_GEOMETRY];
1036    } else if (stage == MESA_SHADER_TESS_EVAL) {
1037       if (!shaders[MESA_SHADER_TESS_CTRL])
1038          return NULL;
1039       if (shaders[MESA_SHADER_TESS_EVAL])
1040          return shaders[MESA_SHADER_TESS_EVAL];
1041       if (shaders[MESA_SHADER_GEOMETRY])
1042          return shaders[MESA_SHADER_GEOMETRY];
1043    }
1044    return shaders[stage];
1045 }
1046 
1047 static bool
radv_should_export_multiview(const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state)1048 radv_should_export_multiview(const struct radv_shader_stage *stage, const struct radv_graphics_state_key *gfx_state)
1049 {
1050    /* Export the layer in the last VGT stage if multiview is used.
1051     * Also checks for NONE stage, which happens when we have depth-only rendering.
1052     * When the next stage is unknown (with GPL or ESO), the layer is exported unconditionally.
1053     */
1054    return gfx_state->has_multiview_view_index && radv_is_last_vgt_stage(stage) &&
1055           !(stage->nir->info.outputs_written & VARYING_BIT_LAYER);
1056 }
1057 
1058 static void
radv_remove_point_size(const struct radv_graphics_state_key * gfx_state,nir_shader * producer,nir_shader * consumer)1059 radv_remove_point_size(const struct radv_graphics_state_key *gfx_state, nir_shader *producer, nir_shader *consumer)
1060 {
1061    if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || !(producer->info.outputs_written & VARYING_BIT_PSIZ))
1062       return;
1063 
1064    /* Do not remove PSIZ if the shader uses XFB because it might be stored. */
1065    if (producer->xfb_info)
1066       return;
1067 
1068    /* Do not remove PSIZ if the rasterization primitive uses points. */
1069    if (consumer->info.stage == MESA_SHADER_FRAGMENT &&
1070        ((producer->info.stage == MESA_SHADER_TESS_EVAL && producer->info.tess.point_mode) ||
1071         (producer->info.stage == MESA_SHADER_GEOMETRY && producer->info.gs.output_primitive == MESA_PRIM_POINTS) ||
1072         (producer->info.stage == MESA_SHADER_MESH && producer->info.mesh.primitive_type == MESA_PRIM_POINTS)))
1073       return;
1074 
1075    nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1076    assert(var);
1077 
1078    /* Change PSIZ to a global variable which allows it to be DCE'd. */
1079    var->data.location = 0;
1080    var->data.mode = nir_var_shader_temp;
1081 
1082    producer->info.outputs_written &= ~VARYING_BIT_PSIZ;
1083    NIR_PASS_V(producer, nir_fixup_deref_modes);
1084    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1085    NIR_PASS(_, producer, nir_opt_dce);
1086 }
1087 
1088 static void
radv_remove_color_exports(const struct radv_graphics_state_key * gfx_state,nir_shader * nir)1089 radv_remove_color_exports(const struct radv_graphics_state_key *gfx_state, nir_shader *nir)
1090 {
1091    uint8_t color_remap[MAX_RTS];
1092    bool fixup_derefs = false;
1093 
1094    /* Do not remove color exports when a PS epilog is used because the format isn't known and the color write mask can
1095     * be dynamic. */
1096    if (gfx_state->ps.has_epilog)
1097       return;
1098 
1099    /* Shader output locations to color attachment mappings. */
1100    memset(color_remap, MESA_VK_ATTACHMENT_UNUSED, sizeof(color_remap));
1101    for (uint32_t i = 0; i < MAX_RTS; i++) {
1102       if (gfx_state->ps.epilog.color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
1103          color_remap[gfx_state->ps.epilog.color_map[i]] = i;
1104    }
1105 
1106    nir_foreach_shader_out_variable (var, nir) {
1107       int idx = var->data.location;
1108       idx -= FRAG_RESULT_DATA0;
1109 
1110       if (idx < 0)
1111          continue;
1112 
1113       const uint8_t cb_idx = color_remap[idx];
1114       unsigned col_format = (gfx_state->ps.epilog.spi_shader_col_format >> (4 * cb_idx)) & 0xf;
1115 
1116       if (col_format == V_028714_SPI_SHADER_ZERO) {
1117          /* Remove the color export if it's unused or in presence of holes. */
1118          nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1119          var->data.location = 0;
1120          var->data.mode = nir_var_shader_temp;
1121          fixup_derefs = true;
1122       }
1123    }
1124 
1125    if (fixup_derefs) {
1126       NIR_PASS_V(nir, nir_fixup_deref_modes);
1127       NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1128       NIR_PASS(_, nir, nir_opt_dce);
1129    }
1130 }
1131 
1132 static void
merge_tess_info(struct shader_info * tes_info,struct shader_info * tcs_info)1133 merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
1134 {
1135    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
1136     *
1137     *    "PointMode. Controls generation of points rather than triangles
1138     *     or lines. This functionality defaults to disabled, and is
1139     *     enabled if either shader stage includes the execution mode.
1140     *
1141     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
1142     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
1143     * and OutputVertices, it says:
1144     *
1145     *    "One mode must be set in at least one of the tessellation
1146     *     shader stages."
1147     *
1148     * So, the fields can be set in either the TCS or TES, but they must
1149     * agree if set in both.  Our backend looks at TES, so bitwise-or in
1150     * the values from the TCS.
1151     */
1152    assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
1153           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
1154    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
1155 
1156    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1157           tcs_info->tess.spacing == tes_info->tess.spacing);
1158    tes_info->tess.spacing |= tcs_info->tess.spacing;
1159 
1160    assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1161           tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1162           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
1163    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
1164    tes_info->tess.ccw |= tcs_info->tess.ccw;
1165    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
1166 
1167    /* Copy the merged info back to the TCS */
1168    tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
1169    tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
1170 }
1171 
1172 static void
radv_link_shaders(const struct radv_device * device,struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage,const struct radv_graphics_state_key * gfx_state)1173 radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
1174                   struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
1175 {
1176    const struct radv_physical_device *pdev = radv_device_physical(device);
1177    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
1178    nir_shader *producer = producer_stage->nir;
1179    nir_shader *consumer = consumer_stage->nir;
1180 
1181    if (consumer->info.stage == MESA_SHADER_FRAGMENT) {
1182       /* Lower the viewport index to zero when the last vertex stage doesn't export it. */
1183       if ((consumer->info.inputs_read & VARYING_BIT_VIEWPORT) &&
1184           !(producer->info.outputs_written & VARYING_BIT_VIEWPORT)) {
1185          NIR_PASS(_, consumer, radv_nir_lower_viewport_to_zero);
1186       }
1187    }
1188 
1189    if (producer_stage->key.optimisations_disabled || consumer_stage->key.optimisations_disabled)
1190       return;
1191 
1192    if (consumer->info.stage == MESA_SHADER_FRAGMENT && producer->info.has_transform_feedback_varyings) {
1193       nir_link_xfb_varyings(producer, consumer);
1194    }
1195 
1196    unsigned array_deref_of_vec_options =
1197       nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
1198       nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store;
1199 
1200    NIR_PASS(_, producer, nir_lower_array_deref_of_vec, nir_var_shader_out, NULL, array_deref_of_vec_options);
1201    NIR_PASS(_, consumer, nir_lower_array_deref_of_vec, nir_var_shader_in, NULL, array_deref_of_vec_options);
1202 
1203    nir_lower_io_arrays_to_elements(producer, consumer);
1204    nir_validate_shader(producer, "after nir_lower_io_arrays_to_elements");
1205    nir_validate_shader(consumer, "after nir_lower_io_arrays_to_elements");
1206 
1207    radv_nir_lower_io_to_scalar_early(producer, nir_var_shader_out);
1208    radv_nir_lower_io_to_scalar_early(consumer, nir_var_shader_in);
1209 
1210    /* Remove PSIZ from shaders when it's not needed.
1211     * This is typically produced by translation layers like Zink or D9VK.
1212     */
1213    if (gfx_state->enable_remove_point_size)
1214       radv_remove_point_size(gfx_state, producer, consumer);
1215 
1216    if (nir_link_opt_varyings(producer, consumer)) {
1217       nir_validate_shader(producer, "after nir_link_opt_varyings");
1218       nir_validate_shader(consumer, "after nir_link_opt_varyings");
1219 
1220       NIR_PASS(_, consumer, nir_opt_constant_folding);
1221       NIR_PASS(_, consumer, nir_opt_algebraic);
1222       NIR_PASS(_, consumer, nir_opt_dce);
1223    }
1224 
1225    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1226    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1227 
1228    nir_remove_unused_varyings(producer, consumer);
1229 
1230    nir_compact_varyings(producer, consumer, true);
1231 
1232    nir_validate_shader(producer, "after nir_compact_varyings");
1233    nir_validate_shader(consumer, "after nir_compact_varyings");
1234 
1235    if (producer->info.stage == MESA_SHADER_MESH) {
1236       /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */
1237       nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1238    }
1239 
1240    const bool has_geom_or_tess =
1241       consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL;
1242    const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9;
1243 
1244    if (producer->info.stage == MESA_SHADER_TESS_CTRL || producer->info.stage == MESA_SHADER_MESH ||
1245        (producer->info.stage == MESA_SHADER_VERTEX && has_geom_or_tess) ||
1246        (producer->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
1247       NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
1248 
1249       if (producer->info.stage == MESA_SHADER_TESS_CTRL)
1250          NIR_PASS(_, producer, nir_vectorize_tess_levels);
1251 
1252       NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out);
1253    }
1254 
1255    if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL ||
1256        consumer->info.stage == MESA_SHADER_TESS_EVAL) {
1257       NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
1258    }
1259 }
1260 
1261 static const gl_shader_stage graphics_shader_order[] = {
1262    MESA_SHADER_VERTEX,   MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY,
1263 
1264    MESA_SHADER_TASK,     MESA_SHADER_MESH,
1265 
1266    MESA_SHADER_FRAGMENT,
1267 };
1268 
1269 static void
radv_link_vs(const struct radv_device * device,struct radv_shader_stage * vs_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1270 radv_link_vs(const struct radv_device *device, struct radv_shader_stage *vs_stage, struct radv_shader_stage *next_stage,
1271              const struct radv_graphics_state_key *gfx_state)
1272 {
1273    assert(vs_stage->nir->info.stage == MESA_SHADER_VERTEX);
1274 
1275    if (radv_should_export_multiview(vs_stage, gfx_state)) {
1276       NIR_PASS(_, vs_stage->nir, radv_nir_export_multiview);
1277    }
1278 
1279    if (next_stage) {
1280       assert(next_stage->nir->info.stage == MESA_SHADER_TESS_CTRL ||
1281              next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1282              next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1283 
1284       radv_link_shaders(device, vs_stage, next_stage, gfx_state);
1285    }
1286 }
1287 
1288 static void
radv_link_tcs(const struct radv_device * device,struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage,const struct radv_graphics_state_key * gfx_state)1289 radv_link_tcs(const struct radv_device *device, struct radv_shader_stage *tcs_stage,
1290               struct radv_shader_stage *tes_stage, const struct radv_graphics_state_key *gfx_state)
1291 {
1292    if (!tes_stage)
1293       return;
1294 
1295    assert(tcs_stage->nir->info.stage == MESA_SHADER_TESS_CTRL);
1296    assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1297 
1298    radv_link_shaders(device, tcs_stage, tes_stage, gfx_state);
1299 
1300    /* Copy TCS info into the TES info */
1301    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
1302 }
1303 
1304 static void
radv_link_tes(const struct radv_device * device,struct radv_shader_stage * tes_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1305 radv_link_tes(const struct radv_device *device, struct radv_shader_stage *tes_stage,
1306               struct radv_shader_stage *next_stage, const struct radv_graphics_state_key *gfx_state)
1307 {
1308    assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1309 
1310    if (radv_should_export_multiview(tes_stage, gfx_state)) {
1311       NIR_PASS(_, tes_stage->nir, radv_nir_export_multiview);
1312    }
1313 
1314    if (next_stage) {
1315       assert(next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1316              next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1317 
1318       radv_link_shaders(device, tes_stage, next_stage, gfx_state);
1319    }
1320 }
1321 
1322 static void
radv_link_gs(const struct radv_device * device,struct radv_shader_stage * gs_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1323 radv_link_gs(const struct radv_device *device, struct radv_shader_stage *gs_stage, struct radv_shader_stage *fs_stage,
1324              const struct radv_graphics_state_key *gfx_state)
1325 {
1326    assert(gs_stage->nir->info.stage == MESA_SHADER_GEOMETRY);
1327 
1328    if (radv_should_export_multiview(gs_stage, gfx_state)) {
1329       NIR_PASS(_, gs_stage->nir, radv_nir_export_multiview);
1330    }
1331 
1332    if (fs_stage) {
1333       assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1334 
1335       radv_link_shaders(device, gs_stage, fs_stage, gfx_state);
1336    }
1337 }
1338 
1339 static void
radv_link_task(const struct radv_device * device,struct radv_shader_stage * task_stage,struct radv_shader_stage * mesh_stage,const struct radv_graphics_state_key * gfx_state)1340 radv_link_task(const struct radv_device *device, struct radv_shader_stage *task_stage,
1341                struct radv_shader_stage *mesh_stage, const struct radv_graphics_state_key *gfx_state)
1342 {
1343    assert(task_stage->nir->info.stage == MESA_SHADER_TASK);
1344 
1345    if (mesh_stage) {
1346       assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1347 
1348       /* Linking task and mesh shaders shouldn't do anything for now but keep it for consistency. */
1349       radv_link_shaders(device, task_stage, mesh_stage, gfx_state);
1350    }
1351 }
1352 
1353 static void
radv_link_mesh(const struct radv_device * device,struct radv_shader_stage * mesh_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1354 radv_link_mesh(const struct radv_device *device, struct radv_shader_stage *mesh_stage,
1355                struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1356 {
1357    assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1358 
1359    if (fs_stage) {
1360       assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1361 
1362       nir_foreach_shader_in_variable (var, fs_stage->nir) {
1363          /* These variables are per-primitive when used with a mesh shader. */
1364          if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || var->data.location == VARYING_SLOT_VIEWPORT ||
1365              var->data.location == VARYING_SLOT_LAYER) {
1366             var->data.per_primitive = true;
1367          }
1368       }
1369 
1370       radv_link_shaders(device, mesh_stage, fs_stage, gfx_state);
1371    }
1372 
1373    /* Lower mesh shader draw ID to zero prevent app bugs from triggering undefined behaviour. */
1374    if (mesh_stage->info.ms.has_task && BITSET_TEST(mesh_stage->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
1375       radv_nir_lower_draw_id_to_zero(mesh_stage->nir);
1376 }
1377 
1378 static void
radv_link_fs(struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1379 radv_link_fs(struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1380 {
1381    assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1382 
1383    /* Lower the view index to map on the layer. */
1384    NIR_PASS(_, fs_stage->nir, radv_nir_lower_view_index);
1385 
1386    radv_remove_color_exports(gfx_state, fs_stage->nir);
1387 }
1388 
1389 static bool
radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline * pipeline,const struct radv_graphics_state_key * gfx_state)1390 radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, const struct radv_graphics_state_key *gfx_state)
1391 {
1392    if (pipeline->base.type == RADV_PIPELINE_GRAPHICS &&
1393        !(radv_pipeline_to_graphics(&pipeline->base)->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1394       return true;
1395 
1396    if (pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB &&
1397        (gfx_state->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1398        !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1399       return true;
1400 
1401    return false;
1402 }
1403 
1404 static void
radv_remove_varyings(nir_shader * nir)1405 radv_remove_varyings(nir_shader *nir)
1406 {
1407    /* We can't demote mesh outputs to nir_var_shader_temp yet, because
1408     * they don't support array derefs of vectors.
1409     */
1410    if (nir->info.stage == MESA_SHADER_MESH)
1411       return;
1412 
1413    bool fixup_derefs = false;
1414 
1415    nir_foreach_shader_out_variable (var, nir) {
1416       if (var->data.always_active_io)
1417          continue;
1418 
1419       if (var->data.location < VARYING_SLOT_VAR0)
1420          continue;
1421 
1422       nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1423       var->data.location = 0;
1424       var->data.mode = nir_var_shader_temp;
1425       fixup_derefs = true;
1426    }
1427 
1428    if (fixup_derefs) {
1429       NIR_PASS_V(nir, nir_fixup_deref_modes);
1430       NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1431       NIR_PASS(_, nir, nir_opt_dce);
1432    }
1433 }
1434 
1435 static void
radv_graphics_shaders_link(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages)1436 radv_graphics_shaders_link(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
1437                            struct radv_shader_stage *stages)
1438 {
1439    /* Walk backwards to link */
1440    struct radv_shader_stage *next_stage = NULL;
1441    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1442       gl_shader_stage s = graphics_shader_order[i];
1443       if (!stages[s].nir)
1444          continue;
1445 
1446       switch (s) {
1447       case MESA_SHADER_VERTEX:
1448          radv_link_vs(device, &stages[s], next_stage, gfx_state);
1449          break;
1450       case MESA_SHADER_TESS_CTRL:
1451          radv_link_tcs(device, &stages[s], next_stage, gfx_state);
1452          break;
1453       case MESA_SHADER_TESS_EVAL:
1454          radv_link_tes(device, &stages[s], next_stage, gfx_state);
1455          break;
1456       case MESA_SHADER_GEOMETRY:
1457          radv_link_gs(device, &stages[s], next_stage, gfx_state);
1458          break;
1459       case MESA_SHADER_TASK:
1460          radv_link_task(device, &stages[s], next_stage, gfx_state);
1461          break;
1462       case MESA_SHADER_MESH:
1463          radv_link_mesh(device, &stages[s], next_stage, gfx_state);
1464          break;
1465       case MESA_SHADER_FRAGMENT:
1466          radv_link_fs(&stages[s], gfx_state);
1467          break;
1468       default:
1469          unreachable("Invalid graphics shader stage");
1470       }
1471 
1472       next_stage = &stages[s];
1473    }
1474 }
1475 
1476 /**
1477  * Fist pass of varying optimization.
1478  * This function is called for each shader pair from first to last.
1479  *
1480  * 1. Run some NIR passes in preparation.
1481  * 2. Optimize varyings.
1482  * 3. If either shader changed, run algebraic optimizations.
1483  */
1484 static void
radv_graphics_shaders_link_varyings_first(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1485 radv_graphics_shaders_link_varyings_first(struct radv_shader_stage *producer_stage,
1486                                           struct radv_shader_stage *consumer_stage)
1487 {
1488    nir_shader *producer = producer_stage->nir;
1489    nir_shader *consumer = consumer_stage->nir;
1490 
1491    /* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */
1492    NIR_PASS(_, producer, nir_opt_undef);
1493 
1494    /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */
1495    NIR_PASS(_, consumer, nir_opt_load_store_update_alignments);
1496 
1497    /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
1498    NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
1499    NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL);
1500 
1501    /* Eliminate useless vec->mov copies resulting from scalarization. */
1502    NIR_PASS(_, producer, nir_copy_prop);
1503 
1504    const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1505 
1506    /* Run algebraic optimizations on shaders that changed. */
1507    if (p & nir_progress_producer) {
1508       radv_optimize_nir_algebraic(producer, false, false);
1509    }
1510    if (p & nir_progress_consumer) {
1511       radv_optimize_nir_algebraic(consumer, false, false);
1512    }
1513 }
1514 
1515 /**
1516  * Second pass of varying optimization.
1517  * This function is called for each shader pair from last to fist,
1518  * after the first pass had already been called for each pair.
1519  * Done because the previous pass might have enabled additional
1520  * opportunities for optimization.
1521  *
1522  * 1. Optimize varyings again.
1523  * 2. If either shader changed, run algebraic optimizations.
1524  * 3. Run some NIR passes to clean up the shaders.
1525  */
1526 static void
radv_graphics_shaders_link_varyings_second(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1527 radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_stage,
1528                                            struct radv_shader_stage *consumer_stage)
1529 {
1530    nir_shader *producer = producer_stage->nir;
1531    nir_shader *consumer = consumer_stage->nir;
1532 
1533    const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1534 
1535    /* Run algebraic optimizations on shaders that changed. */
1536    if (p & nir_progress_producer) {
1537       radv_optimize_nir_algebraic(producer, true, false);
1538    }
1539    if (p & nir_progress_consumer) {
1540       radv_optimize_nir_algebraic(consumer, true, false);
1541    }
1542 
1543    /* Re-vectorize I/O for stages that output to memory (LDS or VRAM).
1544     * Don't vectorize FS inputs, doing so just regresses shader stats without any benefit.
1545     * There is also no benefit from re-vectorizing the outputs of the last pre-rasterization
1546     * stage here, because ac_nir_lower_ngg/legacy already takes care of that.
1547     */
1548    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1549       NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out);
1550       NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
1551    }
1552 
1553    /* Recompute driver locations of PS inputs
1554     * because the backend compiler relies on their driver locations.
1555     */
1556    if (consumer->info.stage == MESA_SHADER_FRAGMENT)
1557       nir_recompute_io_bases(consumer, nir_var_shader_in);
1558 
1559    /* Gather shader info; at least the I/O info likely changed
1560     * and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
1561     */
1562    nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1563    nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
1564 
1565    /* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
1566    if (producer->xfb_info) {
1567       nir_gather_xfb_info_from_intrinsics(producer);
1568    }
1569 }
1570 
1571 static void
radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage * vs_stage,struct radv_shader_stage * consumer_stage)1572 radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage *vs_stage,
1573                                              struct radv_shader_stage *consumer_stage)
1574 {
1575    const unsigned num_reserved_slots = util_bitcount64(consumer_stage->nir->info.inputs_read);
1576    vs_stage->info.vs.num_linked_outputs = num_reserved_slots;
1577    vs_stage->info.outputs_linked = true;
1578 
1579    switch (consumer_stage->stage) {
1580    case MESA_SHADER_TESS_CTRL: {
1581       consumer_stage->info.tcs.num_linked_inputs = num_reserved_slots;
1582       consumer_stage->info.inputs_linked = true;
1583       break;
1584    }
1585    case MESA_SHADER_GEOMETRY: {
1586       consumer_stage->info.gs.num_linked_inputs = num_reserved_slots;
1587       consumer_stage->info.inputs_linked = true;
1588       break;
1589    }
1590    default:
1591       unreachable("invalid next stage for VS");
1592    }
1593 }
1594 
1595 static void
radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage)1596 radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage *tcs_stage,
1597                                                   struct radv_shader_stage *tes_stage)
1598 {
1599    assume(tes_stage->stage == MESA_SHADER_TESS_EVAL);
1600 
1601    /* Count the number of per-vertex output slots we need to reserve for the TCS and TES. */
1602    const uint64_t per_vertex_mask =
1603       tes_stage->nir->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1604    const unsigned num_reserved_slots = util_bitcount64(per_vertex_mask);
1605 
1606    /* Count the number of per-patch output slots we need to reserve for the TCS and TES.
1607     * This is necessary because we need it to determine the patch size in VRAM.
1608     */
1609    const uint64_t tess_lvl_mask =
1610       tes_stage->nir->info.inputs_read & (VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1611    const unsigned num_reserved_patch_slots =
1612       util_bitcount64(tess_lvl_mask) + util_bitcount64(tes_stage->nir->info.patch_inputs_read);
1613 
1614    tcs_stage->info.tcs.num_linked_outputs = num_reserved_slots;
1615    tcs_stage->info.tcs.num_linked_patch_outputs = num_reserved_patch_slots;
1616    tcs_stage->info.outputs_linked = true;
1617 
1618    tes_stage->info.tes.num_linked_inputs = num_reserved_slots;
1619    tes_stage->info.tes.num_linked_patch_inputs = num_reserved_patch_slots;
1620    tes_stage->info.inputs_linked = true;
1621 }
1622 
1623 static void
radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage * tes_stage,struct radv_shader_stage * gs_stage)1624 radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage *tes_stage,
1625                                                  struct radv_shader_stage *gs_stage)
1626 {
1627    assume(gs_stage->stage == MESA_SHADER_GEOMETRY);
1628 
1629    const unsigned num_reserved_slots = util_bitcount64(gs_stage->nir->info.inputs_read);
1630    tes_stage->info.tes.num_linked_outputs = num_reserved_slots;
1631    tes_stage->info.outputs_linked = true;
1632    gs_stage->info.gs.num_linked_inputs = num_reserved_slots;
1633    gs_stage->info.inputs_linked = true;
1634 }
1635 
1636 static void
radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1637 radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage *producer_stage,
1638                                           struct radv_shader_stage *consumer_stage)
1639 {
1640    /* We don't need to fill this info for the last pre-rasterization stage. */
1641    if (consumer_stage->stage == MESA_SHADER_FRAGMENT)
1642       return;
1643 
1644    switch (producer_stage->stage) {
1645    case MESA_SHADER_VERTEX:
1646       radv_graphics_shaders_fill_linked_vs_io_info(producer_stage, consumer_stage);
1647       break;
1648 
1649    case MESA_SHADER_TESS_CTRL:
1650       radv_graphics_shaders_fill_linked_tcs_tes_io_info(producer_stage, consumer_stage);
1651       break;
1652 
1653    case MESA_SHADER_TESS_EVAL:
1654       radv_graphics_shaders_fill_linked_tes_gs_io_info(producer_stage, consumer_stage);
1655       break;
1656 
1657    default:
1658       break;
1659    }
1660 }
1661 
1662 /**
1663  * Varying optimizations performed on lowered shader I/O.
1664  *
1665  * We do this after lowering shader I/O because this is more effective
1666  * than running the same optimizations on I/O derefs.
1667  */
1668 static void
radv_graphics_shaders_link_varyings(struct radv_shader_stage * stages)1669 radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages)
1670 {
1671    /* Optimize varyings from first to last stage. */
1672    gl_shader_stage prev = MESA_SHADER_NONE;
1673    for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) {
1674       gl_shader_stage s = graphics_shader_order[i];
1675       if (!stages[s].nir)
1676          continue;
1677 
1678       if (prev != MESA_SHADER_NONE) {
1679          if (!stages[prev].key.optimisations_disabled && !stages[s].key.optimisations_disabled)
1680             radv_graphics_shaders_link_varyings_first(&stages[prev], &stages[s]);
1681       }
1682 
1683       prev = s;
1684    }
1685 
1686    /* Optimize varyings from last to first stage. */
1687    gl_shader_stage next = MESA_SHADER_NONE;
1688    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; --i) {
1689       gl_shader_stage s = graphics_shader_order[i];
1690       if (!stages[s].nir)
1691          continue;
1692 
1693       if (next != MESA_SHADER_NONE) {
1694          if (!stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled)
1695             radv_graphics_shaders_link_varyings_second(&stages[s], &stages[next]);
1696 
1697          radv_graphics_shaders_fill_linked_io_info(&stages[s], &stages[next]);
1698       }
1699 
1700       next = s;
1701    }
1702 }
1703 
1704 struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device * device,const struct radv_ps_epilog_state * state)1705 radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
1706 {
1707    const struct radv_physical_device *pdev = radv_device_physical(device);
1708    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1709    unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
1710    struct radv_ps_epilog_key key;
1711 
1712    memset(&key, 0, sizeof(key));
1713    memset(key.color_map, MESA_VK_ATTACHMENT_UNUSED, sizeof(key.color_map));
1714 
1715    for (unsigned i = 0; i < state->color_attachment_count; ++i) {
1716       unsigned cf;
1717       unsigned cb_idx = state->color_attachment_mappings[i];
1718       VkFormat fmt = state->color_attachment_formats[i];
1719 
1720       if (fmt == VK_FORMAT_UNDEFINED || !(state->color_write_mask & (0xfu << (i * 4))) ||
1721           cb_idx == MESA_VK_ATTACHMENT_UNUSED) {
1722          cf = V_028714_SPI_SHADER_ZERO;
1723       } else {
1724          bool blend_enable = state->color_blend_enable & (0xfu << (i * 4));
1725 
1726          cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
1727 
1728          if (format_is_int8(fmt))
1729             is_int8 |= 1 << i;
1730          if (format_is_int10(fmt))
1731             is_int10 |= 1 << i;
1732          if (format_is_float32(fmt))
1733             is_float32 |= 1 << i;
1734       }
1735 
1736       col_format |= cf << (4 * i);
1737 
1738       key.color_map[i] = state->color_attachment_mappings[i];
1739    }
1740 
1741    if (!(col_format & 0xf) && state->need_src_alpha & (1 << 0)) {
1742       /* When a subpass doesn't have any color attachments, write the alpha channel of MRT0 when
1743        * alpha coverage is enabled because the depth attachment needs it.
1744        */
1745       col_format |= V_028714_SPI_SHADER_32_AR;
1746       key.color_map[0] = 0;
1747    }
1748 
1749    /* The output for dual source blending should have the same format as the first output. */
1750    if (state->mrt0_is_dual_src) {
1751       assert(!(col_format >> 4));
1752       col_format |= (col_format & 0xf) << 4;
1753       key.color_map[1] = 1;
1754    }
1755 
1756    if (state->alpha_to_coverage_via_mrtz)
1757       z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
1758                                             state->alpha_to_coverage_via_mrtz);
1759 
1760    key.spi_shader_col_format = col_format;
1761    key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
1762    key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
1763    key.enable_mrt_output_nan_fixup = instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
1764    key.colors_written = state->colors_written;
1765    key.mrt0_is_dual_src = state->mrt0_is_dual_src;
1766    key.export_depth = state->export_depth;
1767    key.export_stencil = state->export_stencil;
1768    key.export_sample_mask = state->export_sample_mask;
1769    key.alpha_to_coverage_via_mrtz = state->alpha_to_coverage_via_mrtz;
1770    key.spi_shader_z_format = z_format;
1771    key.alpha_to_one = state->alpha_to_one;
1772 
1773    return key;
1774 }
1775 
1776 static struct radv_ps_epilog_key
radv_pipeline_generate_ps_epilog_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state)1777 radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state)
1778 {
1779    struct radv_ps_epilog_state ps_epilog = {0};
1780 
1781    if (state->ms && state->ms->alpha_to_coverage_enable)
1782       ps_epilog.need_src_alpha |= 0x1;
1783 
1784    if (state->cb) {
1785       for (uint32_t i = 0; i < state->cb->attachment_count; i++) {
1786          VkBlendOp eqRGB = state->cb->attachments[i].color_blend_op;
1787          VkBlendFactor srcRGB = state->cb->attachments[i].src_color_blend_factor;
1788          VkBlendFactor dstRGB = state->cb->attachments[i].dst_color_blend_factor;
1789 
1790          /* Ignore other blend targets if dual-source blending is enabled to prevent wrong
1791           * behaviour.
1792           */
1793          if (i > 0 && ps_epilog.mrt0_is_dual_src)
1794             continue;
1795 
1796          ps_epilog.color_write_mask |= (unsigned)state->cb->attachments[i].write_mask << (4 * i);
1797          if (!((ps_epilog.color_write_mask >> (i * 4)) & 0xf))
1798             continue;
1799 
1800          if (state->cb->attachments[i].blend_enable)
1801             ps_epilog.color_blend_enable |= 0xfu << (i * 4);
1802 
1803          if (!((ps_epilog.color_blend_enable >> (i * 4)) & 0xf))
1804             continue;
1805 
1806          if (i == 0 && radv_can_enable_dual_src(&state->cb->attachments[i])) {
1807             ps_epilog.mrt0_is_dual_src = true;
1808          }
1809 
1810          if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
1811             srcRGB = VK_BLEND_FACTOR_ONE;
1812             dstRGB = VK_BLEND_FACTOR_ONE;
1813          }
1814 
1815          if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
1816              srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
1817              srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
1818             ps_epilog.need_src_alpha |= 1 << i;
1819       }
1820    }
1821 
1822    if (state->rp) {
1823       ps_epilog.color_attachment_count = state->rp->color_attachment_count;
1824 
1825       for (uint32_t i = 0; i < ps_epilog.color_attachment_count; i++) {
1826          ps_epilog.color_attachment_formats[i] = state->rp->color_attachment_formats[i];
1827       }
1828    }
1829 
1830    if (state->ms)
1831       ps_epilog.alpha_to_one = state->ms->alpha_to_one_enable;
1832 
1833    for (uint32_t i = 0; i < MAX_RTS; i++) {
1834       ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i;
1835    }
1836 
1837    return radv_generate_ps_epilog_key(device, &ps_epilog);
1838 }
1839 
1840 static struct radv_graphics_state_key
radv_generate_graphics_state_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)1841 radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
1842                                  VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
1843 {
1844    const struct radv_physical_device *pdev = radv_device_physical(device);
1845    struct radv_graphics_state_key key;
1846 
1847    memset(&key, 0, sizeof(key));
1848 
1849    key.lib_flags = lib_flags;
1850    key.has_multiview_view_index = state->rp ? !!state->rp->view_mask : 0;
1851 
1852    if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI)) {
1853       key.vs.has_prolog = true;
1854    }
1855 
1856    /* Compile the pre-rasterization stages only when the vertex input interface is missing. */
1857    if ((state->shader_stages && VK_SHADER_STAGE_VERTEX_BIT) && !state->vi) {
1858       key.vs.has_prolog = true;
1859    }
1860 
1861    /* Vertex input state */
1862    if (state->vi) {
1863       u_foreach_bit (i, state->vi->attributes_valid) {
1864          uint32_t binding = state->vi->attributes[i].binding;
1865          uint32_t offset = state->vi->attributes[i].offset;
1866          enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
1867 
1868          key.vi.vertex_attribute_formats[i] = format;
1869          key.vi.vertex_attribute_bindings[i] = binding;
1870          key.vi.vertex_attribute_offsets[i] = offset;
1871          key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
1872 
1873          /* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */
1874          if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) {
1875             /* From the Vulkan spec 1.2.157:
1876              *
1877              * "If the bound pipeline state object was created with the
1878              * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE dynamic state enabled then pStrides[i]
1879              * specifies the distance in bytes between two consecutive elements within the
1880              * corresponding buffer. In this case the VkVertexInputBindingDescription::stride state
1881              * from the pipeline state object is ignored."
1882              *
1883              * Make sure the vertex attribute stride is zero to avoid computing a wrong offset if
1884              * it's initialized to something else than zero.
1885              */
1886             key.vi.vertex_attribute_strides[i] = state->vi->bindings[binding].stride;
1887          }
1888 
1889          if (state->vi->bindings[binding].input_rate) {
1890             key.vi.instance_rate_inputs |= 1u << i;
1891          }
1892 
1893          const struct ac_vtx_format_info *vtx_info =
1894             ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.family, format);
1895          unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
1896 
1897          /* If offset is misaligned, then the buffer offset must be too. Just skip updating
1898           * vertex_binding_align in this case.
1899           */
1900          if (offset % attrib_align == 0) {
1901             key.vi.vertex_binding_align[binding] = MAX2(key.vi.vertex_binding_align[binding], attrib_align);
1902          }
1903       }
1904    }
1905 
1906    if (state->ts)
1907       key.ts.patch_control_points = state->ts->patch_control_points;
1908 
1909    if (state->ms) {
1910       key.ms.sample_shading_enable = state->ms->sample_shading_enable;
1911       if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) &&
1912           state->ms->rasterization_samples > 1) {
1913          key.ms.rasterization_samples = state->ms->rasterization_samples;
1914       }
1915    }
1916 
1917    if (pdev->info.gfx_level >= GFX11 && state->ms) {
1918       key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable;
1919    }
1920 
1921    if (state->ia) {
1922       key.ia.topology = radv_translate_prim(state->ia->primitive_topology);
1923    }
1924 
1925    if (!state->vi || !(state->shader_stages & (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
1926                                                VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT))) {
1927       key.unknown_rast_prim = true;
1928    }
1929 
1930    if (pdev->info.gfx_level >= GFX10 && state->rs) {
1931       key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
1932    }
1933 
1934    key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
1935 
1936    if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) &&
1937        (pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
1938       key.adjust_frag_coord_z = true;
1939 
1940    if (radv_pipeline_needs_ps_epilog(state, lib_flags))
1941       key.ps.has_epilog = true;
1942 
1943    key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
1944 
1945    if (pdev->info.gfx_level >= GFX11) {
1946       /* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also
1947        * exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic
1948        * rendering), it's not possible to know the info at compile time and MRTZ needs to be
1949        * exported in the epilog.
1950        */
1951       key.ps.exports_mrtz_via_epilog =
1952          key.ps.has_epilog && (!state->ms || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE));
1953    }
1954 
1955    key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1956                                        (!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
1957 
1958    if (pdev->use_ngg) {
1959       VkShaderStageFlags ngg_stage;
1960 
1961       if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
1962          ngg_stage = VK_SHADER_STAGE_GEOMETRY_BIT;
1963       } else if (state->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1964          ngg_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1965       } else {
1966          ngg_stage = VK_SHADER_STAGE_VERTEX_BIT;
1967       }
1968 
1969       key.dynamic_provoking_vtx_mode =
1970          BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) &&
1971          (ngg_stage == VK_SHADER_STAGE_VERTEX_BIT || ngg_stage == VK_SHADER_STAGE_GEOMETRY_BIT);
1972    }
1973 
1974    if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && state->ia &&
1975        state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST &&
1976        !BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_POLYGON_MODE) && state->rs &&
1977        state->rs->polygon_mode != VK_POLYGON_MODE_POINT) {
1978       key.enable_remove_point_size = true;
1979    }
1980 
1981    if (device->vk.enabled_features.smoothLines) {
1982       /* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time.
1983        * This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also
1984        * simplifies the implementation.
1985        */
1986       if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1987           (state->rs && state->rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR))
1988          key.dynamic_line_rast_mode = true;
1989 
1990       /* For GPL, when the fragment shader is compiled without any pre-rasterization information,
1991        * ensure the line rasterization mode is considered dynamic because we can't know if it's
1992        * going to draw lines or not.
1993        */
1994       key.dynamic_line_rast_mode |= !!(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1995                                     !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
1996    }
1997 
1998    return key;
1999 }
2000 
2001 static struct radv_graphics_pipeline_key
radv_generate_graphics_pipeline_key(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)2002 radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2003                                     const struct vk_graphics_pipeline_state *state,
2004                                     VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
2005 {
2006    VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2007    struct radv_graphics_pipeline_key key = {0};
2008 
2009    key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags);
2010 
2011    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2012       const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
2013       gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
2014 
2015       key.stage_info[s] = radv_pipeline_get_shader_key(device, stage, create_flags, pCreateInfo->pNext);
2016 
2017       if (s == MESA_SHADER_MESH && (state->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
2018          key.stage_info[s].has_task_shader = true;
2019    }
2020 
2021    return key;
2022 }
2023 
2024 static void
radv_fill_shader_info_ngg(struct radv_device * device,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2025 radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
2026                           VkShaderStageFlagBits active_nir_stages)
2027 {
2028    const struct radv_physical_device *pdev = radv_device_physical(device);
2029    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2030 
2031    if (!pdev->cache_key.use_ngg)
2032       return;
2033 
2034    if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
2035       stages[MESA_SHADER_VERTEX].info.is_ngg = true;
2036    } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2037       stages[MESA_SHADER_TESS_EVAL].info.is_ngg = true;
2038    } else if (stages[MESA_SHADER_MESH].nir) {
2039       stages[MESA_SHADER_MESH].info.is_ngg = true;
2040    }
2041 
2042    if (pdev->info.gfx_level >= GFX11) {
2043       if (stages[MESA_SHADER_GEOMETRY].nir)
2044          stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
2045    } else {
2046       /* GFX10/GFX10.3 can't always enable NGG due to HW bugs/limitations. */
2047       if (stages[MESA_SHADER_TESS_EVAL].nir && stages[MESA_SHADER_GEOMETRY].nir &&
2048           stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations *
2049                 stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out >
2050              256) {
2051          /* Fallback to the legacy path if tessellation is
2052           * enabled with extreme geometry because
2053           * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
2054           * might hang.
2055           */
2056          stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2057       }
2058 
2059       struct radv_shader_stage *last_vgt_stage = NULL;
2060       radv_foreach_stage(i, active_nir_stages)
2061       {
2062          if (radv_is_last_vgt_stage(&stages[i])) {
2063             last_vgt_stage = &stages[i];
2064          }
2065       }
2066 
2067       if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) ||
2068           ((instance->debug_flags & RADV_DEBUG_NO_NGG_GS) && stages[MESA_SHADER_GEOMETRY].nir)) {
2069          /* NGG needs to be disabled on GFX10/GFX10.3 when:
2070           * - streamout is used because NGG streamout isn't supported
2071           * - NGG GS is explictly disabled to workaround performance issues
2072           */
2073          if (stages[MESA_SHADER_TESS_EVAL].nir)
2074             stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2075          else
2076             stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2077       }
2078 
2079       if (stages[MESA_SHADER_GEOMETRY].nir) {
2080          if (stages[MESA_SHADER_TESS_EVAL].nir)
2081             stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_TESS_EVAL].info.is_ngg;
2082          else
2083             stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_VERTEX].info.is_ngg;
2084       }
2085 
2086       /* When pre-rasterization stages are compiled separately with shader objects, NGG GS needs to
2087        * be disabled because if the next stage of VS/TES is GS and GS is unknown, it might use
2088        * streamout but it's not possible to know that when compiling VS or TES only.
2089        */
2090       if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage == MESA_SHADER_GEOMETRY &&
2091           !stages[MESA_SHADER_GEOMETRY].nir) {
2092          stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2093       } else if (stages[MESA_SHADER_TESS_EVAL].nir &&
2094                  stages[MESA_SHADER_TESS_EVAL].info.next_stage == MESA_SHADER_GEOMETRY &&
2095                  !stages[MESA_SHADER_GEOMETRY].nir) {
2096          stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2097       } else if (stages[MESA_SHADER_GEOMETRY].nir &&
2098                  (!stages[MESA_SHADER_VERTEX].nir && !stages[MESA_SHADER_TESS_EVAL].nir)) {
2099          stages[MESA_SHADER_GEOMETRY].info.is_ngg = false;
2100       }
2101    }
2102 }
2103 
2104 static bool
radv_consider_force_vrs(const struct radv_graphics_state_key * gfx_state,const struct radv_shader_stage * last_vgt_stage,const struct radv_shader_stage * fs_stage)2105 radv_consider_force_vrs(const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage *last_vgt_stage,
2106                         const struct radv_shader_stage *fs_stage)
2107 {
2108    if (!gfx_state->ps.force_vrs_enabled)
2109       return false;
2110 
2111    /* Mesh shaders aren't considered. */
2112    if (last_vgt_stage->info.stage == MESA_SHADER_MESH)
2113       return false;
2114 
2115    if (last_vgt_stage->nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
2116       return false;
2117 
2118    /* VRS has no effect if there is no pixel shader. */
2119    if (last_vgt_stage->info.next_stage == MESA_SHADER_NONE)
2120       return false;
2121 
2122    /* Do not enable if the PS uses gl_FragCoord because it breaks postprocessing in some games, or with Primitive
2123     * Ordered Pixel Shading (regardless of whether per-pixel data is addressed with gl_FragCoord or a custom
2124     * interpolator) as that'd result in races between adjacent primitives with no common fine pixels.
2125     */
2126    nir_shader *fs_shader = fs_stage->nir;
2127    if (fs_shader && (BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
2128                      fs_shader->info.fs.sample_interlock_ordered || fs_shader->info.fs.sample_interlock_unordered ||
2129                      fs_shader->info.fs.pixel_interlock_ordered || fs_shader->info.fs.pixel_interlock_unordered)) {
2130       return false;
2131    }
2132 
2133    return true;
2134 }
2135 
2136 static gl_shader_stage
radv_get_next_stage(gl_shader_stage stage,VkShaderStageFlagBits active_nir_stages)2137 radv_get_next_stage(gl_shader_stage stage, VkShaderStageFlagBits active_nir_stages)
2138 {
2139    switch (stage) {
2140    case MESA_SHADER_VERTEX:
2141       if (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2142          return MESA_SHADER_TESS_CTRL;
2143       } else if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2144          return MESA_SHADER_GEOMETRY;
2145       } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2146          return MESA_SHADER_FRAGMENT;
2147       } else {
2148          return MESA_SHADER_NONE;
2149       }
2150    case MESA_SHADER_TESS_CTRL:
2151       return MESA_SHADER_TESS_EVAL;
2152    case MESA_SHADER_TESS_EVAL:
2153       if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2154          return MESA_SHADER_GEOMETRY;
2155       } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2156          return MESA_SHADER_FRAGMENT;
2157       } else {
2158          return MESA_SHADER_NONE;
2159       }
2160    case MESA_SHADER_GEOMETRY:
2161    case MESA_SHADER_MESH:
2162       if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2163          return MESA_SHADER_FRAGMENT;
2164       } else {
2165          return MESA_SHADER_NONE;
2166       }
2167    case MESA_SHADER_TASK:
2168       return MESA_SHADER_MESH;
2169    case MESA_SHADER_FRAGMENT:
2170       return MESA_SHADER_NONE;
2171    default:
2172       unreachable("invalid graphics shader stage");
2173    }
2174 }
2175 
2176 static void
radv_fill_shader_info(struct radv_device * device,const enum radv_pipeline_type pipeline_type,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2177 radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2178                       const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages,
2179                       VkShaderStageFlagBits active_nir_stages)
2180 {
2181    radv_foreach_stage(i, active_nir_stages)
2182    {
2183       bool consider_force_vrs = false;
2184 
2185       if (radv_is_last_vgt_stage(&stages[i])) {
2186          consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
2187       }
2188 
2189       radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type,
2190                                 consider_force_vrs, &stages[i].info);
2191    }
2192 
2193    radv_nir_shader_info_link(device, gfx_state, stages);
2194 }
2195 
2196 static void
radv_declare_pipeline_args(struct radv_device * device,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,VkShaderStageFlagBits active_nir_stages)2197 radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
2198                            const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
2199 {
2200    const struct radv_physical_device *pdev = radv_device_physical(device);
2201    enum amd_gfx_level gfx_level = pdev->info.gfx_level;
2202 
2203    if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
2204       radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
2205                                MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args);
2206       stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
2207       stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
2208          stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
2209 
2210       stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs;
2211       stages[MESA_SHADER_VERTEX].info.inline_push_constant_mask =
2212          stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask;
2213       stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
2214 
2215       active_nir_stages &= ~(1 << MESA_SHADER_VERTEX);
2216       active_nir_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
2217    }
2218 
2219    if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
2220       gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2221       radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage,
2222                                &stages[MESA_SHADER_GEOMETRY].args);
2223       stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
2224       stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
2225          stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
2226 
2227       stages[pre_stage].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs;
2228       stages[pre_stage].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask;
2229       stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
2230       active_nir_stages &= ~(1 << pre_stage);
2231       active_nir_stages &= ~(1 << MESA_SHADER_GEOMETRY);
2232    }
2233 
2234    u_foreach_bit (i, active_nir_stages) {
2235       radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args);
2236       stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
2237       stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
2238    }
2239 }
2240 
2241 static struct radv_shader *
radv_create_gs_copy_shader(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * gs_stage,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,struct radv_shader_binary ** gs_copy_binary)2242 radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache,
2243                            struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state,
2244                            bool keep_executable_info, bool keep_statistic_info,
2245                            struct radv_shader_binary **gs_copy_binary)
2246 {
2247    const struct radv_physical_device *pdev = radv_device_physical(device);
2248    struct radv_instance *instance = radv_physical_device_instance(pdev);
2249 
2250    const struct radv_shader_info *gs_info = &gs_stage->info;
2251    ac_nir_gs_output_info output_info = {
2252       .streams = gs_info->gs.output_streams,
2253       .usage_mask = gs_info->gs.output_usage_mask,
2254    };
2255    nir_shader *nir = ac_nir_create_gs_copy_shader(
2256       gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
2257       gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
2258       gs_info->force_vrs_per_vertex, &output_info);
2259 
2260    nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
2261    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2262 
2263    struct radv_shader_stage gs_copy_stage = {
2264       .stage = MESA_SHADER_VERTEX,
2265       .shader_sha1 = {0},
2266       .key =
2267          {
2268             .optimisations_disabled = gs_stage->key.optimisations_disabled,
2269          },
2270    };
2271    radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info);
2272    radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false,
2273                              &gs_copy_stage.info);
2274    gs_copy_stage.info.wave_size = 64;      /* Wave32 not supported. */
2275    gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
2276    gs_copy_stage.info.so = gs_info->so;
2277    gs_copy_stage.info.outinfo = gs_info->outinfo;
2278    gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
2279    gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY;
2280 
2281    radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
2282                             &gs_copy_stage.args);
2283    gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
2284    gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
2285 
2286    NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac);
2287    NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
2288 
2289    struct radv_graphics_pipeline_key key = {0};
2290    bool dump_shader = radv_can_dump_shader(device, nir, true);
2291 
2292    if (dump_shader)
2293       simple_mtx_lock(&instance->shader_dump_mtx);
2294 
2295    *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info,
2296                                             keep_statistic_info);
2297    struct radv_shader *copy_shader =
2298       radv_shader_create(device, cache, *gs_copy_binary, keep_executable_info || dump_shader);
2299    if (copy_shader)
2300       radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, *gs_copy_binary, copy_shader, &nir, 1,
2301                                       &gs_copy_stage.info);
2302 
2303    if (dump_shader)
2304       simple_mtx_unlock(&instance->shader_dump_mtx);
2305 
2306    return copy_shader;
2307 }
2308 
2309 static void
radv_graphics_shaders_nir_to_asm(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,VkShaderStageFlagBits active_nir_stages,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2310 radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
2311                                  struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2312                                  bool keep_executable_info, bool keep_statistic_info,
2313                                  VkShaderStageFlagBits active_nir_stages, struct radv_shader **shaders,
2314                                  struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
2315                                  struct radv_shader_binary **gs_copy_binary)
2316 {
2317    const struct radv_physical_device *pdev = radv_device_physical(device);
2318    struct radv_instance *instance = radv_physical_device_instance(pdev);
2319 
2320    for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
2321       if (!(active_nir_stages & (1 << s)))
2322          continue;
2323 
2324       nir_shader *nir_shaders[2] = {stages[s].nir, NULL};
2325       unsigned shader_count = 1;
2326 
2327       /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2328       if (pdev->info.gfx_level >= GFX9 &&
2329           ((s == MESA_SHADER_GEOMETRY &&
2330             (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
2331            (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
2332          gl_shader_stage pre_stage;
2333 
2334          if (s == MESA_SHADER_GEOMETRY && (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {
2335             pre_stage = MESA_SHADER_TESS_EVAL;
2336          } else {
2337             pre_stage = MESA_SHADER_VERTEX;
2338          }
2339 
2340          nir_shaders[0] = stages[pre_stage].nir;
2341          nir_shaders[1] = stages[s].nir;
2342          shader_count = 2;
2343       }
2344 
2345       int64_t stage_start = os_time_get_nano();
2346 
2347       bool dump_shader = radv_can_dump_shader(device, nir_shaders[0], false);
2348 
2349       if (dump_shader) {
2350          simple_mtx_lock(&instance->shader_dump_mtx);
2351          for (uint32_t i = 0; i < shader_count; i++)
2352             nir_print_shader(nir_shaders[i], stderr);
2353       }
2354 
2355       binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
2356                                            keep_executable_info, keep_statistic_info);
2357       shaders[s] = radv_shader_create(device, cache, binaries[s], keep_executable_info || dump_shader);
2358       radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, binaries[s], shaders[s], nir_shaders,
2359                                       shader_count, &stages[s].info);
2360 
2361       if (dump_shader)
2362          simple_mtx_unlock(&instance->shader_dump_mtx);
2363 
2364       if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
2365          *gs_copy_shader = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
2366                                                       keep_executable_info, keep_statistic_info, gs_copy_binary);
2367       }
2368 
2369       stages[s].feedback.duration += os_time_get_nano() - stage_start;
2370 
2371       active_nir_stages &= ~(1 << nir_shaders[0]->info.stage);
2372       if (nir_shaders[1])
2373          active_nir_stages &= ~(1 << nir_shaders[1]->info.stage);
2374    }
2375 }
2376 
2377 static void
radv_pipeline_retain_shaders(struct radv_retained_shaders * retained_shaders,struct radv_shader_stage * stages)2378 radv_pipeline_retain_shaders(struct radv_retained_shaders *retained_shaders, struct radv_shader_stage *stages)
2379 {
2380    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2381       if (!stages[s].entrypoint)
2382          continue;
2383 
2384       int64_t stage_start = os_time_get_nano();
2385 
2386       /* Serialize the NIR shader to reduce memory pressure. */
2387       struct blob blob;
2388 
2389       blob_init(&blob);
2390       nir_serialize(&blob, stages[s].nir, true);
2391       blob_finish_get_buffer(&blob, &retained_shaders->stages[s].serialized_nir,
2392                              &retained_shaders->stages[s].serialized_nir_size);
2393 
2394       memcpy(retained_shaders->stages[s].shader_sha1, stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2395       memcpy(&retained_shaders->stages[s].key, &stages[s].key, sizeof(stages[s].key));
2396 
2397       stages[s].feedback.duration += os_time_get_nano() - stage_start;
2398    }
2399 }
2400 
2401 static void
radv_pipeline_import_retained_shaders(const struct radv_device * device,struct radv_graphics_lib_pipeline * lib,struct radv_shader_stage * stages)2402 radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_lib_pipeline *lib,
2403                                       struct radv_shader_stage *stages)
2404 {
2405    const struct radv_physical_device *pdev = radv_device_physical(device);
2406    struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
2407 
2408    /* Import the stages (SPIR-V only in case of cache hits). */
2409    for (uint32_t i = 0; i < lib->stage_count; i++) {
2410       const VkPipelineShaderStageCreateInfo *sinfo = &lib->stages[i];
2411       gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage);
2412 
2413       radv_pipeline_stage_init(lib->base.base.create_flags, sinfo,
2414                                &lib->layout, &lib->stage_keys[s], &stages[s]);
2415    }
2416 
2417    /* Import the NIR shaders (after SPIRV->NIR). */
2418    for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
2419       if (!retained_shaders->stages[s].serialized_nir_size)
2420          continue;
2421 
2422       int64_t stage_start = os_time_get_nano();
2423 
2424       /* Deserialize the NIR shader. */
2425       const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
2426       struct blob_reader blob_reader;
2427       blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
2428                        retained_shaders->stages[s].serialized_nir_size);
2429 
2430       stages[s].stage = s;
2431       stages[s].nir = nir_deserialize(NULL, options, &blob_reader);
2432       stages[s].entrypoint = nir_shader_get_entrypoint(stages[s].nir)->function->name;
2433       memcpy(stages[s].shader_sha1, retained_shaders->stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2434       memcpy(&stages[s].key, &retained_shaders->stages[s].key, sizeof(stages[s].key));
2435 
2436       radv_shader_layout_init(&lib->layout, s, &stages[s].layout);
2437 
2438       stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2439 
2440       stages[s].feedback.duration += os_time_get_nano() - stage_start;
2441    }
2442 }
2443 
2444 static void
radv_pipeline_load_retained_shaders(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_shader_stage * stages)2445 radv_pipeline_load_retained_shaders(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2446                                     struct radv_shader_stage *stages)
2447 {
2448    const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2449    const VkPipelineLibraryCreateInfoKHR *libs_info =
2450       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2451 
2452    /* Nothing to load if no libs are imported. */
2453    if (!libs_info)
2454       return;
2455 
2456    /* Nothing to load if fast-linking is enabled and if there is no retained shaders. */
2457    if (radv_should_import_lib_binaries(create_flags))
2458       return;
2459 
2460    for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2461       VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2462       struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2463 
2464       radv_pipeline_import_retained_shaders(device, gfx_pipeline_lib, stages);
2465    }
2466 }
2467 
2468 static unsigned
radv_get_rasterization_prim(const struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state)2469 radv_get_rasterization_prim(const struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state)
2470 {
2471    unsigned rast_prim;
2472 
2473    if (gfx_state->unknown_rast_prim)
2474       return -1;
2475 
2476    if (stages[MESA_SHADER_GEOMETRY].nir) {
2477       rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive);
2478    } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2479       if (stages[MESA_SHADER_TESS_EVAL].nir->info.tess.point_mode) {
2480          rast_prim = V_028A6C_POINTLIST;
2481       } else {
2482          rast_prim = radv_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode);
2483       }
2484    } else if (stages[MESA_SHADER_MESH].nir) {
2485       rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_MESH].nir->info.mesh.primitive_type);
2486    } else {
2487       rast_prim = radv_conv_prim_to_gs_out(gfx_state->ia.topology, false);
2488    }
2489 
2490    return rast_prim;
2491 }
2492 
2493 static bool
radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo * pCreateInfo)2494 radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo)
2495 {
2496    const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2497    const VkPipelineLibraryCreateInfoKHR *libs_info =
2498       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2499 
2500    if (!libs_info)
2501       return false;
2502 
2503    return !(create_flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
2504 }
2505 
2506 static bool
radv_skip_graphics_pipeline_compile(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo)2507 radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo)
2508 {
2509    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
2510    const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2511    const struct radv_physical_device *pdev = radv_device_physical(device);
2512    VkShaderStageFlagBits binary_stages = 0;
2513    VkShaderStageFlags active_stages = 0;
2514 
2515    /* No compilation when pipeline binaries are imported. */
2516    if (binary_info && binary_info->binaryCount > 0)
2517       return true;
2518 
2519    /* Do not skip for libraries. */
2520    if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
2521       return false;
2522 
2523    /* Do not skip when fast-linking isn't enabled. */
2524    if (!radv_is_fast_linking_enabled(pCreateInfo))
2525       return false;
2526 
2527    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2528       const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2529       active_stages |= sinfo->stage;
2530    }
2531 
2532    const VkPipelineLibraryCreateInfoKHR *libs_info =
2533       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2534    if (libs_info) {
2535       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2536          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2537          struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2538 
2539          assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
2540 
2541          active_stages |= gfx_pipeline_lib->base.active_stages;
2542 
2543          for (uint32_t s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2544             if (!gfx_pipeline_lib->base.base.shaders[i])
2545                continue;
2546 
2547             binary_stages |= mesa_to_vk_shader_stage(i);
2548          }
2549       }
2550    }
2551 
2552    if (pdev->info.gfx_level >= GFX9) {
2553       /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2554       if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2555          binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2556       }
2557 
2558       if (binary_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2559          if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2560             binary_stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2561          } else {
2562             binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2563          }
2564       }
2565    }
2566 
2567    /* Only skip compilation when all binaries have been imported. */
2568    return binary_stages == active_stages;
2569 }
2570 
2571 void
radv_graphics_shaders_compile(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,bool is_internal,struct radv_retained_shaders * retained_shaders,bool noop_fs,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2572 radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
2573                               struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2574                               bool keep_executable_info, bool keep_statistic_info, bool is_internal,
2575                               struct radv_retained_shaders *retained_shaders, bool noop_fs,
2576                               struct radv_shader **shaders, struct radv_shader_binary **binaries,
2577                               struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
2578 {
2579    const struct radv_physical_device *pdev = radv_device_physical(device);
2580    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2581    const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
2582    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2583       if (!stages[s].entrypoint)
2584          continue;
2585 
2586       int64_t stage_start = os_time_get_nano();
2587 
2588       /* NIR might already have been imported from a library. */
2589       if (!stages[s].nir) {
2590          struct radv_spirv_to_nir_options options = {
2591             .lower_view_index_to_zero = !gfx_state->has_multiview_view_index,
2592             .fix_dual_src_mrt1_export =
2593                gfx_state->ps.epilog.mrt0_is_dual_src && instance->drirc.dual_color_blend_by_location,
2594             .lower_view_index_to_device_index = stages[s].key.view_index_from_device_index,
2595          };
2596          blake3_hash key;
2597 
2598          if (nir_cache) {
2599             radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
2600             stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
2601          }
2602          if (!stages[s].nir) {
2603             stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
2604             if (nir_cache)
2605                radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
2606          }
2607       }
2608 
2609       stages[s].feedback.duration += os_time_get_nano() - stage_start;
2610    }
2611 
2612    if (retained_shaders) {
2613       radv_pipeline_retain_shaders(retained_shaders, stages);
2614    }
2615 
2616    VkShaderStageFlagBits active_nir_stages = 0;
2617    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2618       if (stages[i].nir)
2619          active_nir_stages |= mesa_to_vk_shader_stage(i);
2620    }
2621 
2622    if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
2623        BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
2624       nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
2625       nir_shader *task = stages[MESA_SHADER_TASK].nir;
2626 
2627       /* Mesh shaders only have a 1D "vertex index" which we use
2628        * as "workgroup index" to emulate the 3D workgroup ID.
2629        */
2630       nir_lower_compute_system_values_options o = {
2631          .lower_workgroup_id_to_index = true,
2632          .shortcut_1d_workgroup_id = true,
2633          .num_workgroups[0] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[0] : 0,
2634          .num_workgroups[1] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[1] : 0,
2635          .num_workgroups[2] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[2] : 0,
2636       };
2637 
2638       NIR_PASS(_, mesh, nir_lower_compute_system_values, &o);
2639    }
2640 
2641    radv_foreach_stage(i, active_nir_stages)
2642    {
2643       gl_shader_stage next_stage;
2644 
2645       if (stages[i].next_stage != MESA_SHADER_NONE) {
2646          next_stage = stages[i].next_stage;
2647       } else {
2648          next_stage = radv_get_next_stage(i, active_nir_stages);
2649       }
2650 
2651       radv_nir_shader_info_init(i, next_stage, &stages[i].info);
2652    }
2653 
2654    /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
2655    radv_fill_shader_info_ngg(device, stages, active_nir_stages);
2656 
2657    if (stages[MESA_SHADER_GEOMETRY].nir) {
2658       unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
2659 
2660       if (stages[MESA_SHADER_GEOMETRY].info.is_ngg) {
2661          nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
2662                          nir_lower_gs_intrinsics_count_vertices_per_primitive |
2663                          nir_lower_gs_intrinsics_overwrite_incomplete;
2664       }
2665 
2666       NIR_PASS(_, stages[MESA_SHADER_GEOMETRY].nir, nir_lower_gs_intrinsics, nir_gs_flags);
2667    }
2668 
2669    /* Remove all varyings when the fragment shader is a noop. */
2670    if (noop_fs) {
2671       radv_foreach_stage(i, active_nir_stages)
2672       {
2673          if (radv_is_last_vgt_stage(&stages[i])) {
2674             radv_remove_varyings(stages[i].nir);
2675             break;
2676          }
2677       }
2678    }
2679 
2680    radv_graphics_shaders_link(device, gfx_state, stages);
2681 
2682    if (stages[MESA_SHADER_FRAGMENT].nir) {
2683       unsigned rast_prim = radv_get_rasterization_prim(stages, gfx_state);
2684 
2685       NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, rast_prim);
2686    }
2687 
2688    radv_foreach_stage(i, active_nir_stages)
2689    {
2690       int64_t stage_start = os_time_get_nano();
2691 
2692       radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
2693 
2694       /* Gather info again, information such as outputs_read can be out-of-date. */
2695       nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
2696       radv_nir_lower_io(device, stages[i].nir);
2697 
2698       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2699    }
2700 
2701    if (stages[MESA_SHADER_FRAGMENT].nir) {
2702       radv_nir_lower_poly_line_smooth(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2703 
2704       if (!gfx_state->ps.has_epilog)
2705          radv_nir_remap_color_attachment(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2706    }
2707 
2708    /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
2709    radv_graphics_shaders_link_varyings(stages);
2710 
2711    radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
2712 
2713    radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages);
2714 
2715    radv_foreach_stage(i, active_nir_stages)
2716    {
2717       int64_t stage_start = os_time_get_nano();
2718 
2719       radv_postprocess_nir(device, gfx_state, &stages[i]);
2720 
2721       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2722    }
2723 
2724    /* Compile NIR shaders to AMD assembly. */
2725    radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
2726                                     active_nir_stages, shaders, binaries, gs_copy_shader, gs_copy_binary);
2727 
2728    if (keep_executable_info) {
2729       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
2730          struct radv_shader *shader = shaders[i];
2731          if (!shader)
2732             continue;
2733 
2734          if (!stages[i].spirv.size)
2735             continue;
2736 
2737          shader->spirv = malloc(stages[i].spirv.size);
2738          memcpy(shader->spirv, stages[i].spirv.data, stages[i].spirv.size);
2739          shader->spirv_size = stages[i].spirv.size;
2740       }
2741    }
2742 }
2743 
2744 static bool
radv_should_compute_pipeline_hash(const struct radv_device * device,const enum radv_pipeline_type pipeline_type,bool fast_linking_enabled)2745 radv_should_compute_pipeline_hash(const struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2746                                   bool fast_linking_enabled)
2747 {
2748    const struct radv_physical_device *pdev = radv_device_physical(device);
2749    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2750 
2751    /* Skip computing the pipeline hash when GPL fast-linking is enabled because these shaders aren't
2752     * supposed to be cached and computing the hash is costly. Though, make sure it's always computed
2753     * when RGP is enabled, otherwise ISA isn't reported.
2754     */
2755    return !fast_linking_enabled ||
2756           ((instance->vk.trace_mode & RADV_TRACE_MODE_RGP) && pipeline_type == RADV_PIPELINE_GRAPHICS);
2757 }
2758 
2759 void
radv_graphics_pipeline_state_finish(struct radv_device * device,struct radv_graphics_pipeline_state * gfx_state)2760 radv_graphics_pipeline_state_finish(struct radv_device *device, struct radv_graphics_pipeline_state *gfx_state)
2761 {
2762    radv_pipeline_layout_finish(device, &gfx_state->layout);
2763    vk_free(&device->vk.alloc, gfx_state->vk_data);
2764 
2765    if (gfx_state->stages) {
2766       for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++)
2767          ralloc_free(gfx_state->stages[i].nir);
2768       free(gfx_state->stages);
2769    }
2770 }
2771 
2772 VkResult
radv_generate_graphics_pipeline_state(struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_graphics_pipeline_state * gfx_state)2773 radv_generate_graphics_pipeline_state(struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2774                                       struct radv_graphics_pipeline_state *gfx_state)
2775 {
2776    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
2777    const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2778    const bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
2779    enum radv_pipeline_type pipeline_type = RADV_PIPELINE_GRAPHICS;
2780    VkResult result;
2781 
2782    memset(gfx_state, 0, sizeof(*gfx_state));
2783 
2784    VkGraphicsPipelineLibraryFlagBitsEXT needed_lib_flags = ALL_GRAPHICS_LIB_FLAGS;
2785    if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
2786       const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
2787          vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
2788       needed_lib_flags = lib_info ? lib_info->flags : 0;
2789       pipeline_type = RADV_PIPELINE_GRAPHICS_LIB;
2790    }
2791 
2792    radv_pipeline_layout_init(device, &gfx_state->layout, false);
2793 
2794    /* If we have libraries, import them first. */
2795    const VkPipelineLibraryCreateInfoKHR *libs_info =
2796       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2797    if (libs_info) {
2798       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2799          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2800          const struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2801 
2802          vk_graphics_pipeline_state_merge(&gfx_state->vk, &gfx_pipeline_lib->graphics_state);
2803 
2804          radv_graphics_pipeline_import_layout(&gfx_state->layout, &gfx_pipeline_lib->layout);
2805 
2806          needed_lib_flags &= ~gfx_pipeline_lib->lib_flags;
2807       }
2808    }
2809 
2810    result = vk_graphics_pipeline_state_fill(&device->vk, &gfx_state->vk, pCreateInfo, NULL, 0, NULL, NULL,
2811                                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &gfx_state->vk_data);
2812    if (result != VK_SUCCESS)
2813       goto fail;
2814 
2815    if (pipeline_layout)
2816       radv_graphics_pipeline_import_layout(&gfx_state->layout, pipeline_layout);
2817 
2818    if (radv_should_compute_pipeline_hash(device, pipeline_type, fast_linking_enabled))
2819       radv_pipeline_layout_hash(&gfx_state->layout);
2820 
2821    gfx_state->compilation_required = !radv_skip_graphics_pipeline_compile(device, pCreateInfo);
2822    if (gfx_state->compilation_required) {
2823       gfx_state->key = radv_generate_graphics_pipeline_key(device, pCreateInfo, &gfx_state->vk, needed_lib_flags);
2824 
2825       gfx_state->stages = malloc(sizeof(struct radv_shader_stage) * MESA_VULKAN_SHADER_STAGES);
2826       if (!gfx_state->stages) {
2827          result = VK_ERROR_OUT_OF_HOST_MEMORY;
2828          goto fail;
2829       }
2830 
2831       for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2832          gfx_state->stages[i].entrypoint = NULL;
2833          gfx_state->stages[i].nir = NULL;
2834          gfx_state->stages[i].spirv.size = 0;
2835          gfx_state->stages[i].next_stage = MESA_SHADER_NONE;
2836       }
2837 
2838       for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2839          const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2840          gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2841 
2842          radv_pipeline_stage_init(create_flags, sinfo, &gfx_state->layout, &gfx_state->key.stage_info[stage],
2843                                   &gfx_state->stages[stage]);
2844       }
2845 
2846       radv_pipeline_load_retained_shaders(device, pCreateInfo, gfx_state->stages);
2847    }
2848 
2849    return VK_SUCCESS;
2850 
2851 fail:
2852    radv_graphics_pipeline_state_finish(device, gfx_state);
2853    return result;
2854 }
2855 
2856 void
radv_graphics_pipeline_hash(const struct radv_device * device,const struct radv_graphics_pipeline_state * gfx_state,unsigned char * hash)2857 radv_graphics_pipeline_hash(const struct radv_device *device, const struct radv_graphics_pipeline_state *gfx_state,
2858                             unsigned char *hash)
2859 {
2860    struct mesa_sha1 ctx;
2861 
2862    _mesa_sha1_init(&ctx);
2863    radv_pipeline_hash(device, &gfx_state->layout, &ctx);
2864 
2865    _mesa_sha1_update(&ctx, &gfx_state->key.gfx_state, sizeof(gfx_state->key.gfx_state));
2866 
2867    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2868       const struct radv_shader_stage *stage = &gfx_state->stages[s];
2869 
2870       if (!stage->entrypoint)
2871          continue;
2872 
2873       _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
2874       _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key));
2875    }
2876 
2877    _mesa_sha1_final(&ctx, hash);
2878 }
2879 
2880 static VkResult
radv_graphics_pipeline_compile(struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_state * gfx_state,struct radv_device * device,struct vk_pipeline_cache * cache,bool fast_linking_enabled)2881 radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2882                                const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device,
2883                                struct vk_pipeline_cache *cache, bool fast_linking_enabled)
2884 {
2885    struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
2886    struct radv_shader_binary *gs_copy_binary = NULL;
2887    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
2888    bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
2889    struct radv_shader_stage *stages = gfx_state->stages;
2890    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
2891       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2892    VkPipelineCreationFeedback pipeline_feedback = {
2893       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2894    };
2895    bool skip_shaders_cache = false;
2896    VkResult result = VK_SUCCESS;
2897    const bool retain_shaders =
2898       !!(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
2899    struct radv_retained_shaders *retained_shaders = NULL;
2900 
2901    int64_t pipeline_start = os_time_get_nano();
2902 
2903    if (radv_should_compute_pipeline_hash(device, pipeline->base.type, fast_linking_enabled)) {
2904       radv_graphics_pipeline_hash(device, gfx_state, pipeline->base.sha1);
2905 
2906       pipeline->base.pipeline_hash = *(uint64_t *)pipeline->base.sha1;
2907    }
2908 
2909    /* Skip the shaders cache when any of the below are true:
2910     * - fast-linking is enabled because it's useless to cache unoptimized pipelines
2911     * - shaders are captured because it's for debugging purposes
2912     * - binaries are captured for later uses
2913     * - graphics pipeline libraries are created with the RETAIN_LINK_TIME_OPTIMIZATION flag and
2914     *   module identifiers are used (ie. no SPIR-V provided).
2915     */
2916    if (fast_linking_enabled || keep_executable_info ||
2917        (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR)) {
2918       skip_shaders_cache = true;
2919    } else if (retain_shaders) {
2920       assert(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
2921       for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2922          if (stages[i].entrypoint && !stages[i].spirv.size) {
2923             skip_shaders_cache = true;
2924             break;
2925          }
2926       }
2927    }
2928 
2929    bool found_in_application_cache = true;
2930    if (!skip_shaders_cache &&
2931        radv_graphics_pipeline_cache_search(device, cache, pipeline, &found_in_application_cache)) {
2932       if (found_in_application_cache)
2933          pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2934 
2935       if (retain_shaders) {
2936          /* For graphics pipeline libraries created with the RETAIN_LINK_TIME_OPTIMIZATION flag, we
2937           * need to retain the stage info because we can't know if the LTO pipelines will
2938           * be find in the shaders cache.
2939           */
2940          struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
2941 
2942          gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(device, pCreateInfo->stageCount,
2943                                                                        pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx);
2944          if (!gfx_pipeline_lib->stages)
2945             return VK_ERROR_OUT_OF_HOST_MEMORY;
2946 
2947          gfx_pipeline_lib->stage_count = pCreateInfo->stageCount;
2948 
2949          for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
2950             gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
2951             gfx_pipeline_lib->stage_keys[s] = gfx_state->key.stage_info[s];
2952          }
2953       }
2954 
2955       result = VK_SUCCESS;
2956       goto done;
2957    }
2958 
2959    if (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
2960       return VK_PIPELINE_COMPILE_REQUIRED;
2961 
2962    if (retain_shaders) {
2963       struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
2964       retained_shaders = &gfx_pipeline_lib->retained_shaders;
2965    }
2966 
2967    const bool noop_fs = radv_pipeline_needs_noop_fs(pipeline, &gfx_state->key.gfx_state);
2968 
2969    radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
2970                                  keep_statistic_info, pipeline->base.is_internal, retained_shaders, noop_fs,
2971                                  pipeline->base.shaders, binaries, &pipeline->base.gs_copy_shader, &gs_copy_binary);
2972 
2973    if (!skip_shaders_cache) {
2974       radv_pipeline_cache_insert(device, cache, &pipeline->base);
2975    }
2976 
2977    free(gs_copy_binary);
2978    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
2979       free(binaries[i]);
2980       if (stages[i].nir) {
2981          if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) {
2982             radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], i, stderr);
2983          }
2984       }
2985    }
2986 
2987 done:
2988    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2989 
2990    if (creation_feedback) {
2991       *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
2992 
2993       if (creation_feedback->pipelineStageCreationFeedbackCount > 0) {
2994          uint32_t num_feedbacks = 0;
2995 
2996          for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2997             gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
2998             creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
2999          }
3000 
3001          /* Stages imported from graphics pipeline libraries are defined as additional entries in the
3002           * order they were imported.
3003           */
3004          const VkPipelineLibraryCreateInfoKHR *libs_info =
3005             vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3006          if (libs_info) {
3007             for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3008                VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3009                struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3010 
3011                if (!gfx_pipeline_lib->base.active_stages)
3012                   continue;
3013 
3014                radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages)
3015                {
3016                   creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
3017                }
3018             }
3019          }
3020 
3021          assert(num_feedbacks == creation_feedback->pipelineStageCreationFeedbackCount);
3022       }
3023    }
3024 
3025    return result;
3026 }
3027 
3028 struct radv_vgt_shader_key
radv_get_vgt_shader_key(const struct radv_device * device,struct radv_shader ** shaders,const struct radv_shader * gs_copy_shader)3029 radv_get_vgt_shader_key(const struct radv_device *device, struct radv_shader **shaders,
3030                         const struct radv_shader *gs_copy_shader)
3031 {
3032    uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
3033    struct radv_shader *last_vgt_shader = NULL;
3034    struct radv_vgt_shader_key key;
3035 
3036    memset(&key, 0, sizeof(key));
3037 
3038    if (shaders[MESA_SHADER_GEOMETRY]) {
3039       last_vgt_shader = shaders[MESA_SHADER_GEOMETRY];
3040    } else if (shaders[MESA_SHADER_TESS_EVAL]) {
3041       last_vgt_shader = shaders[MESA_SHADER_TESS_EVAL];
3042    } else if (shaders[MESA_SHADER_VERTEX]) {
3043       last_vgt_shader = shaders[MESA_SHADER_VERTEX];
3044    } else {
3045       assert(shaders[MESA_SHADER_MESH]);
3046       last_vgt_shader = shaders[MESA_SHADER_MESH];
3047    }
3048 
3049    vs_size = gs_size = last_vgt_shader->info.wave_size;
3050    if (gs_copy_shader)
3051       vs_size = gs_copy_shader->info.wave_size;
3052 
3053    if (shaders[MESA_SHADER_TESS_CTRL])
3054       hs_size = shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
3055 
3056    key.tess = !!shaders[MESA_SHADER_TESS_CTRL];
3057    key.gs = !!shaders[MESA_SHADER_GEOMETRY];
3058    if (last_vgt_shader->info.is_ngg) {
3059       key.ngg = 1;
3060       key.ngg_passthrough = last_vgt_shader->info.is_ngg_passthrough;
3061       key.ngg_streamout = last_vgt_shader->info.so.num_outputs > 0;
3062    }
3063    if (shaders[MESA_SHADER_MESH]) {
3064       key.mesh = 1;
3065       key.mesh_scratch_ring = shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
3066    }
3067 
3068    key.hs_wave32 = hs_size == 32;
3069    key.vs_wave32 = vs_size == 32;
3070    key.gs_wave32 = gs_size == 32;
3071 
3072    return key;
3073 }
3074 
3075 static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline)3076 gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
3077 {
3078    const struct radv_physical_device *pdev = radv_device_physical(device);
3079    const struct radv_instance *instance = radv_physical_device_instance(pdev);
3080    struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
3081 
3082    if (pdev->info.gfx_level != GFX10_3)
3083       return false;
3084 
3085    if (instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
3086       return false;
3087 
3088    if (ps && !ps->info.ps.allow_flat_shading)
3089       return false;
3090 
3091    return true;
3092 }
3093 
3094 static void
radv_pipeline_init_vertex_input_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3095 radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3096                                       const struct vk_graphics_pipeline_state *state)
3097 {
3098    const struct radv_physical_device *pdev = radv_device_physical(device);
3099    const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
3100 
3101    if (!state->vi)
3102       return;
3103 
3104    u_foreach_bit (i, state->vi->bindings_valid) {
3105       pipeline->binding_stride[i] = state->vi->bindings[i].stride;
3106    }
3107 
3108    if (vs->info.vs.use_per_attribute_vb_descs) {
3109       const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3110       const enum radeon_family family = pdev->info.family;
3111       const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
3112 
3113       pipeline->vertex_input.bindings_match_attrib = true;
3114 
3115       u_foreach_bit (i, state->vi->attributes_valid) {
3116          uint32_t binding = state->vi->attributes[i].binding;
3117          uint32_t offset = state->vi->attributes[i].offset;
3118 
3119          pipeline->vertex_input.attribute_mask |= BITFIELD_BIT(i);
3120          pipeline->vertex_input.bindings[i] = binding;
3121          pipeline->vertex_input.bindings_match_attrib &= binding == i;
3122 
3123          if (state->vi->bindings[binding].stride) {
3124             pipeline->vertex_input.attrib_index_offset[i] = offset / state->vi->bindings[binding].stride;
3125          }
3126 
3127          if (state->vi->bindings[binding].input_rate) {
3128             pipeline->vertex_input.instance_rate_inputs |= BITFIELD_BIT(i);
3129             pipeline->vertex_input.divisors[i] = state->vi->bindings[binding].divisor;
3130 
3131             if (state->vi->bindings[binding].divisor == 0) {
3132                pipeline->vertex_input.zero_divisors |= BITFIELD_BIT(i);
3133             } else if (state->vi->bindings[binding].divisor > 1) {
3134                pipeline->vertex_input.nontrivial_divisors |= BITFIELD_BIT(i);
3135             }
3136          }
3137 
3138          pipeline->vertex_input.offsets[i] = offset;
3139 
3140          enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
3141          const struct ac_vtx_format_info *vtx_info = &vtx_info_table[format];
3142 
3143          pipeline->vertex_input.formats[i] = format;
3144          uint8_t format_align_req_minus_1 = vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1);
3145          pipeline->vertex_input.format_align_req_minus_1[i] = format_align_req_minus_1;
3146          uint8_t component_align_req_minus_1 =
3147             MIN2(vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size, 4) - 1;
3148          pipeline->vertex_input.component_align_req_minus_1[i] = component_align_req_minus_1;
3149          pipeline->vertex_input.format_sizes[i] = vtx_info->element_size;
3150          pipeline->vertex_input.alpha_adjust_lo |= (vtx_info->alpha_adjust & 0x1) << i;
3151          pipeline->vertex_input.alpha_adjust_hi |= (vtx_info->alpha_adjust >> 1) << i;
3152          if (G_008F0C_DST_SEL_X(vtx_info->dst_sel) == V_008F0C_SQ_SEL_Z) {
3153             pipeline->vertex_input.post_shuffle |= BITFIELD_BIT(i);
3154          }
3155 
3156          if (!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
3157             pipeline->vertex_input.nontrivial_formats |= BITFIELD_BIT(i);
3158          }
3159       }
3160    } else {
3161       u_foreach_bit (i, vs->info.vs.vb_desc_usage_mask) {
3162          pipeline->vertex_input.bindings[i] = i;
3163       }
3164    }
3165 }
3166 
3167 static void
radv_pipeline_init_shader_stages_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)3168 radv_pipeline_init_shader_stages_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3169 {
3170    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3171       bool shader_exists = !!pipeline->base.shaders[i];
3172       if (shader_exists || i < MESA_SHADER_COMPUTE) {
3173          if (shader_exists)
3174             pipeline->base.need_indirect_descriptor_sets |=
3175                radv_shader_need_indirect_descriptor_sets(pipeline->base.shaders[i]);
3176       }
3177    }
3178 
3179    gl_shader_stage first_stage =
3180       radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
3181 
3182    const struct radv_shader *shader = radv_get_shader(pipeline->base.shaders, first_stage);
3183    const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
3184 
3185    if (loc->sgpr_idx != -1) {
3186       pipeline->vtx_base_sgpr = shader->info.user_data_0;
3187       pipeline->vtx_base_sgpr += loc->sgpr_idx * 4;
3188       pipeline->vtx_emit_num = loc->num_sgprs;
3189       pipeline->uses_drawid = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id;
3190       pipeline->uses_baseinstance = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance;
3191 
3192       assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance);
3193    }
3194 }
3195 
3196 uint32_t
radv_get_vgt_gs_out(struct radv_shader ** shaders,uint32_t primitive_topology)3197 radv_get_vgt_gs_out(struct radv_shader **shaders, uint32_t primitive_topology)
3198 {
3199    uint32_t gs_out;
3200 
3201    if (shaders[MESA_SHADER_GEOMETRY]) {
3202       gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
3203    } else if (shaders[MESA_SHADER_TESS_CTRL]) {
3204       if (shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
3205          gs_out = V_028A6C_POINTLIST;
3206       } else {
3207          gs_out = radv_conv_tess_prim_to_gs_out(shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
3208       }
3209    } else if (shaders[MESA_SHADER_MESH]) {
3210       gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_MESH]->info.ms.output_prim);
3211    } else {
3212       gs_out = radv_conv_prim_to_gs_out(primitive_topology, false);
3213    }
3214 
3215    return gs_out;
3216 }
3217 
3218 static uint32_t
radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3219 radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
3220 {
3221    uint32_t primitive_topology = 0;
3222 
3223    if (pipeline->last_vgt_api_stage == MESA_SHADER_VERTEX)
3224       primitive_topology = radv_translate_prim(state->ia->primitive_topology);
3225 
3226    return radv_get_vgt_gs_out(pipeline->base.shaders, primitive_topology);
3227 }
3228 
3229 static void
radv_pipeline_init_extra(struct radv_graphics_pipeline * pipeline,const struct radv_graphics_pipeline_create_info * extra,const struct vk_graphics_pipeline_state * state)3230 radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline,
3231                          const struct radv_graphics_pipeline_create_info *extra,
3232                          const struct vk_graphics_pipeline_state *state)
3233 {
3234    if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
3235        extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
3236        extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 ||
3237        extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 ||
3238        extra->custom_blend_mode == V_028808_CB_RESOLVE) {
3239       /* According to the CB spec states, CB_SHADER_MASK should be set to enable writes to all four
3240        * channels of MRT0.
3241        */
3242       pipeline->cb_shader_mask = 0xf;
3243 
3244       pipeline->custom_blend_mode = extra->custom_blend_mode;
3245    }
3246 
3247    if (extra->use_rectlist) {
3248       struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
3249       dynamic->vk.ia.primitive_topology = V_008958_DI_PT_RECTLIST;
3250 
3251       pipeline->rast_prim = radv_conv_prim_to_gs_out(dynamic->vk.ia.primitive_topology, pipeline->is_ngg);
3252    }
3253 
3254    if (radv_pipeline_has_ds_attachments(state->rp)) {
3255       pipeline->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
3256       pipeline->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
3257       pipeline->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
3258       pipeline->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
3259    }
3260 }
3261 
3262 bool
radv_needs_null_export_workaround(const struct radv_device * device,const struct radv_shader * ps,unsigned custom_blend_mode)3263 radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
3264                                   unsigned custom_blend_mode)
3265 {
3266    const struct radv_physical_device *pdev = radv_device_physical(device);
3267    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3268 
3269    if (!ps)
3270       return false;
3271 
3272    /* Ensure that some export memory is always allocated, for two reasons:
3273     *
3274     * 1) Correctness: The hardware ignores the EXEC mask if no export
3275     *    memory is allocated, so KILL and alpha test do not work correctly
3276     *    without this.
3277     * 2) Performance: Every shader needs at least a NULL export, even when
3278     *    it writes no color/depth output. The NULL export instruction
3279     *    stalls without this setting.
3280     *
3281     * Don't add this to CB_SHADER_MASK.
3282     *
3283     * GFX10 supports pixel shaders without exports by setting both the
3284     * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
3285     * instructions if any are present.
3286     *
3287     * GFX11 requires one color output, otherwise the DCC decompression does nothing.
3288     *
3289     * Primitive Ordered Pixel Shading also requires an export, otherwise interlocking doesn't work
3290     * correctly before GFX11, and a hang happens on GFX11.
3291     */
3292    return (gfx_level <= GFX9 || ps->info.ps.can_discard || ps->info.ps.pops ||
3293            (custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 && gfx_level >= GFX11)) &&
3294           !ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask;
3295 }
3296 
3297 static VkResult
radv_graphics_pipeline_import_binaries(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkPipelineBinaryInfoKHR * binary_info)3298 radv_graphics_pipeline_import_binaries(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3299                                        const VkPipelineBinaryInfoKHR *binary_info)
3300 {
3301    blake3_hash pipeline_hash;
3302    struct mesa_blake3 ctx;
3303 
3304    _mesa_blake3_init(&ctx);
3305 
3306    for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
3307       VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
3308       struct radv_shader *shader;
3309       struct blob_reader blob;
3310 
3311       blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
3312 
3313       shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
3314       if (!shader)
3315          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3316 
3317       if (shader->info.stage == MESA_SHADER_VERTEX && i > 0) {
3318          /* The GS copy-shader is a VS placed after all other stages. */
3319          pipeline->base.gs_copy_shader = shader;
3320       } else {
3321          pipeline->base.shaders[shader->info.stage] = shader;
3322       }
3323 
3324       _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
3325    }
3326 
3327    _mesa_blake3_final(&ctx, pipeline_hash);
3328 
3329    pipeline->base.pipeline_hash = *(uint64_t *)pipeline_hash;
3330 
3331    pipeline->has_pipeline_binaries = true;
3332 
3333    return VK_SUCCESS;
3334 }
3335 
3336 static VkResult
radv_graphics_pipeline_init(struct radv_graphics_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_create_info * extra)3337 radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
3338                             struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
3339                             const struct radv_graphics_pipeline_create_info *extra)
3340 {
3341    bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3342    struct radv_graphics_pipeline_state gfx_state;
3343    VkResult result = VK_SUCCESS;
3344 
3345    pipeline->last_vgt_api_stage = MESA_SHADER_NONE;
3346 
3347    const VkPipelineLibraryCreateInfoKHR *libs_info =
3348       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3349 
3350    /* If we have libraries, import them first. */
3351    if (libs_info) {
3352       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3353          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3354          struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3355 
3356          assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
3357 
3358          radv_graphics_pipeline_import_lib(device, pipeline, gfx_pipeline_lib);
3359       }
3360    }
3361 
3362    radv_pipeline_import_graphics_info(device, pipeline, pCreateInfo);
3363 
3364    result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3365    if (result != VK_SUCCESS)
3366       return result;
3367 
3368    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3369 
3370    if (binary_info && binary_info->binaryCount > 0) {
3371       result = radv_graphics_pipeline_import_binaries(device, pipeline, binary_info);
3372    } else {
3373       if (gfx_state.compilation_required) {
3374          result =
3375             radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3376       }
3377    }
3378 
3379    if (result != VK_SUCCESS) {
3380       radv_graphics_pipeline_state_finish(device, &gfx_state);
3381       return result;
3382    }
3383 
3384    uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk);
3385 
3386    radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk);
3387 
3388    if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3389       radv_pipeline_init_input_assembly_state(device, pipeline);
3390    radv_pipeline_init_dynamic_state(device, pipeline, &gfx_state.vk, pCreateInfo);
3391 
3392    const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
3393    if (ps && !ps->info.ps.has_epilog) {
3394       pipeline->spi_shader_col_format = ps->info.ps.spi_shader_col_format;
3395       pipeline->cb_shader_mask = ps->info.ps.cb_shader_mask;
3396    }
3397 
3398    unsigned custom_blend_mode = extra ? extra->custom_blend_mode : 0;
3399    if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) && !pipeline->spi_shader_col_format) {
3400       pipeline->spi_shader_col_format = V_028714_SPI_SHADER_32_R;
3401    }
3402 
3403    if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3404       radv_pipeline_init_vertex_input_state(device, pipeline, &gfx_state.vk);
3405 
3406    radv_pipeline_init_shader_stages_state(device, pipeline);
3407 
3408    pipeline->is_ngg = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.is_ngg;
3409    pipeline->has_ngg_culling =
3410       pipeline->is_ngg && pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
3411    pipeline->force_vrs_per_vertex = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
3412    pipeline->rast_prim = vgt_gs_out_prim_type;
3413    pipeline->uses_out_of_order_rast = gfx_state.vk.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD;
3414    pipeline->uses_vrs = radv_is_vrs_enabled(&gfx_state.vk);
3415    pipeline->uses_vrs_attachment = radv_pipeline_uses_vrs_attachment(pipeline, &gfx_state.vk);
3416    pipeline->uses_vrs_coarse_shading = !pipeline->uses_vrs && gfx103_pipeline_vrs_coarse_shading(device, pipeline);
3417 
3418    pipeline->base.push_constant_size = gfx_state.layout.push_constant_size;
3419    pipeline->base.dynamic_offset_count = gfx_state.layout.dynamic_offset_count;
3420 
3421    if (extra) {
3422       radv_pipeline_init_extra(pipeline, extra, &gfx_state.vk);
3423    }
3424 
3425    radv_graphics_pipeline_state_finish(device, &gfx_state);
3426    return result;
3427 }
3428 
3429 VkResult
radv_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3430 radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
3431                               const struct radv_graphics_pipeline_create_info *extra,
3432                               const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3433 {
3434    VK_FROM_HANDLE(radv_device, device, _device);
3435    VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3436    struct radv_graphics_pipeline *pipeline;
3437    VkResult result;
3438 
3439    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3440    if (pipeline == NULL)
3441       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3442 
3443    radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_GRAPHICS);
3444    pipeline->base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3445    pipeline->base.is_internal = _cache == device->meta_state.cache;
3446 
3447    result = radv_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, extra);
3448    if (result != VK_SUCCESS) {
3449       radv_pipeline_destroy(device, &pipeline->base, pAllocator);
3450       return result;
3451    }
3452 
3453    *pPipeline = radv_pipeline_to_handle(&pipeline->base);
3454    radv_rmv_log_graphics_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
3455    return VK_SUCCESS;
3456 }
3457 
3458 void
radv_destroy_graphics_pipeline(struct radv_device * device,struct radv_graphics_pipeline * pipeline)3459 radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3460 {
3461    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3462       if (pipeline->base.shaders[i])
3463          radv_shader_unref(device, pipeline->base.shaders[i]);
3464    }
3465 
3466    if (pipeline->base.gs_copy_shader)
3467       radv_shader_unref(device, pipeline->base.gs_copy_shader);
3468 }
3469 
3470 static VkResult
radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)3471 radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, struct radv_device *device,
3472                                 struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
3473 {
3474    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3475    VkResult result;
3476 
3477    const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
3478       vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
3479    const VkPipelineLibraryCreateInfoKHR *libs_info =
3480       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3481    bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3482 
3483    struct vk_graphics_pipeline_state *state = &pipeline->graphics_state;
3484 
3485    pipeline->base.last_vgt_api_stage = MESA_SHADER_NONE;
3486    pipeline->lib_flags = lib_info ? lib_info->flags : 0;
3487 
3488    radv_pipeline_layout_init(device, &pipeline->layout, false);
3489 
3490    /* If we have libraries, import them first. */
3491    if (libs_info) {
3492       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3493          VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3494          struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3495 
3496          vk_graphics_pipeline_state_merge(state, &gfx_pipeline_lib->graphics_state);
3497 
3498          radv_graphics_pipeline_import_layout(&pipeline->layout, &gfx_pipeline_lib->layout);
3499 
3500          radv_graphics_pipeline_import_lib(device, &pipeline->base, gfx_pipeline_lib);
3501 
3502          pipeline->lib_flags |= gfx_pipeline_lib->lib_flags;
3503       }
3504    }
3505 
3506    result = vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, 0, NULL, NULL,
3507                                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data);
3508    if (result != VK_SUCCESS)
3509       return result;
3510 
3511    radv_pipeline_import_graphics_info(device, &pipeline->base, pCreateInfo);
3512 
3513    if (pipeline_layout)
3514       radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout);
3515 
3516    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3517 
3518    if (binary_info && binary_info->binaryCount > 0) {
3519       result = radv_graphics_pipeline_import_binaries(device, &pipeline->base, binary_info);
3520    } else {
3521       struct radv_graphics_pipeline_state gfx_state;
3522 
3523       result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3524       if (result != VK_SUCCESS)
3525          return result;
3526 
3527       result =
3528          radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3529 
3530       radv_graphics_pipeline_state_finish(device, &gfx_state);
3531    }
3532 
3533    return result;
3534 }
3535 
3536 static VkResult
radv_graphics_lib_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3537 radv_graphics_lib_pipeline_create(VkDevice _device, VkPipelineCache _cache,
3538                                   const VkGraphicsPipelineCreateInfo *pCreateInfo,
3539                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3540 {
3541    VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3542    VK_FROM_HANDLE(radv_device, device, _device);
3543    struct radv_graphics_lib_pipeline *pipeline;
3544    VkResult result;
3545 
3546    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3547    if (pipeline == NULL)
3548       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3549 
3550    radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_GRAPHICS_LIB);
3551    pipeline->base.base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3552 
3553    pipeline->mem_ctx = ralloc_context(NULL);
3554 
3555    result = radv_graphics_lib_pipeline_init(pipeline, device, cache, pCreateInfo);
3556    if (result != VK_SUCCESS) {
3557       radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
3558       return result;
3559    }
3560 
3561    *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
3562 
3563    return VK_SUCCESS;
3564 }
3565 
3566 void
radv_destroy_graphics_lib_pipeline(struct radv_device * device,struct radv_graphics_lib_pipeline * pipeline)3567 radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline)
3568 {
3569    struct radv_retained_shaders *retained_shaders = &pipeline->retained_shaders;
3570 
3571    radv_pipeline_layout_finish(device, &pipeline->layout);
3572 
3573    vk_free(&device->vk.alloc, pipeline->state_data);
3574 
3575    for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3576       free(retained_shaders->stages[i].serialized_nir);
3577    }
3578 
3579    ralloc_free(pipeline->mem_ctx);
3580 
3581    radv_destroy_graphics_pipeline(device, &pipeline->base);
3582 }
3583 
3584 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3585 radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
3586                              const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
3587                              VkPipeline *pPipelines)
3588 {
3589    VkResult result = VK_SUCCESS;
3590    unsigned i = 0;
3591 
3592    for (; i < count; i++) {
3593       const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
3594       VkResult r;
3595       if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
3596          r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
3597       } else {
3598          r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, &pPipelines[i]);
3599       }
3600       if (r != VK_SUCCESS) {
3601          result = r;
3602          pPipelines[i] = VK_NULL_HANDLE;
3603 
3604          if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
3605             break;
3606       }
3607    }
3608 
3609    for (; i < count; ++i)
3610       pPipelines[i] = VK_NULL_HANDLE;
3611 
3612    return result;
3613 }
3614