1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include "meta/radv_meta.h"
12 #include "nir/nir.h"
13 #include "nir/nir_builder.h"
14 #include "nir/nir_serialize.h"
15 #include "nir/nir_xfb_info.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/mesa-sha1.h"
20 #include "util/os_time.h"
21 #include "util/u_atomic.h"
22 #include "radv_cs.h"
23 #include "radv_debug.h"
24 #include "radv_entrypoints.h"
25 #include "radv_formats.h"
26 #include "radv_physical_device.h"
27 #include "radv_pipeline_binary.h"
28 #include "radv_pipeline_cache.h"
29 #include "radv_rmv.h"
30 #include "radv_shader.h"
31 #include "radv_shader_args.h"
32 #include "vk_nir_convert_ycbcr.h"
33 #include "vk_pipeline.h"
34 #include "vk_render_pass.h"
35 #include "vk_util.h"
36
37 #include "util/u_debug.h"
38 #include "ac_binary.h"
39 #include "ac_formats.h"
40 #include "ac_nir.h"
41 #include "ac_shader_util.h"
42 #include "aco_interface.h"
43 #include "sid.h"
44
45 static bool
radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state * state)46 radv_is_static_vrs_enabled(const struct vk_graphics_pipeline_state *state)
47 {
48 if (!state->fsr)
49 return false;
50
51 return state->fsr->fragment_size.width != 1 || state->fsr->fragment_size.height != 1 ||
52 state->fsr->combiner_ops[0] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR ||
53 state->fsr->combiner_ops[1] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
54 }
55
56 static bool
radv_is_vrs_enabled(const struct vk_graphics_pipeline_state * state)57 radv_is_vrs_enabled(const struct vk_graphics_pipeline_state *state)
58 {
59 return radv_is_static_vrs_enabled(state) || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_FSR);
60 }
61
62 static bool
radv_pipeline_has_ds_attachments(const struct vk_render_pass_state * rp)63 radv_pipeline_has_ds_attachments(const struct vk_render_pass_state *rp)
64 {
65 return rp->depth_attachment_format != VK_FORMAT_UNDEFINED || rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
66 }
67
68 static bool
radv_pipeline_has_color_attachments(const struct vk_render_pass_state * rp)69 radv_pipeline_has_color_attachments(const struct vk_render_pass_state *rp)
70 {
71 for (uint32_t i = 0; i < rp->color_attachment_count; ++i) {
72 if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
73 return true;
74 }
75
76 return false;
77 }
78
79 /**
80 * Get rid of DST in the blend factors by commuting the operands:
81 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
82 */
83 void
radv_blend_remove_dst(VkBlendOp * func,VkBlendFactor * src_factor,VkBlendFactor * dst_factor,VkBlendFactor expected_dst,VkBlendFactor replacement_src)84 radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
85 VkBlendFactor replacement_src)
86 {
87 if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
88 *src_factor = VK_BLEND_FACTOR_ZERO;
89 *dst_factor = replacement_src;
90
91 /* Commuting the operands requires reversing subtractions. */
92 if (*func == VK_BLEND_OP_SUBTRACT)
93 *func = VK_BLEND_OP_REVERSE_SUBTRACT;
94 else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
95 *func = VK_BLEND_OP_SUBTRACT;
96 }
97 }
98
99 static unsigned
radv_choose_spi_color_format(const struct radv_device * device,VkFormat vk_format,bool blend_enable,bool blend_need_alpha)100 radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
101 bool blend_need_alpha)
102 {
103 const struct radv_physical_device *pdev = radv_device_physical(device);
104 const struct util_format_description *desc = vk_format_description(vk_format);
105 bool use_rbplus = pdev->info.rbplus_allowed;
106 struct ac_spi_color_formats formats = {0};
107 unsigned format, ntype, swap;
108
109 format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
110 ntype = ac_get_cb_number_type(desc->format);
111 swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false);
112
113 ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
114
115 if (blend_enable && blend_need_alpha)
116 return formats.blend_alpha;
117 else if (blend_need_alpha)
118 return formats.alpha;
119 else if (blend_enable)
120 return formats.blend;
121 else
122 return formats.normal;
123 }
124
125 static bool
format_is_int8(VkFormat format)126 format_is_int8(VkFormat format)
127 {
128 const struct util_format_description *desc = vk_format_description(format);
129 int channel = vk_format_get_first_non_void_channel(format);
130
131 return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
132 }
133
134 static bool
format_is_int10(VkFormat format)135 format_is_int10(VkFormat format)
136 {
137 const struct util_format_description *desc = vk_format_description(format);
138
139 if (desc->nr_channels != 4)
140 return false;
141 for (unsigned i = 0; i < 4; i++) {
142 if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
143 return true;
144 }
145 return false;
146 }
147
148 static bool
format_is_float32(VkFormat format)149 format_is_float32(VkFormat format)
150 {
151 const struct util_format_description *desc = vk_format_description(format);
152 int channel = vk_format_get_first_non_void_channel(format);
153
154 return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
155 }
156
157 unsigned
radv_compact_spi_shader_col_format(uint32_t spi_shader_col_format)158 radv_compact_spi_shader_col_format(uint32_t spi_shader_col_format)
159 {
160 unsigned value = 0, num_mrts = 0;
161 unsigned i, num_targets;
162
163 /* Compute the number of MRTs. */
164 num_targets = DIV_ROUND_UP(util_last_bit(spi_shader_col_format), 4);
165
166 /* Remove holes in spi_shader_col_format. */
167 for (i = 0; i < num_targets; i++) {
168 unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf;
169
170 if (spi_format) {
171 value |= spi_format << (num_mrts * 4);
172 num_mrts++;
173 }
174 }
175
176 return value;
177 }
178
179 /*
180 * Ordered so that for each i,
181 * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
182 */
183 const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
184 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT, VK_FORMAT_R8G8B8A8_UNORM,
185 VK_FORMAT_R16G16B16A16_UNORM, VK_FORMAT_R16G16B16A16_SNORM, VK_FORMAT_R16G16B16A16_UINT,
186 VK_FORMAT_R16G16B16A16_SINT, VK_FORMAT_R32G32B32A32_SFLOAT, VK_FORMAT_R8G8B8A8_UINT,
187 VK_FORMAT_R8G8B8A8_SINT, VK_FORMAT_A2R10G10B10_UINT_PACK32, VK_FORMAT_A2R10G10B10_SINT_PACK32,
188 };
189
190 unsigned
radv_format_meta_fs_key(struct radv_device * device,VkFormat format)191 radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
192 {
193 unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
194 assert(col_format != V_028714_SPI_SHADER_32_AR);
195
196 bool is_int8 = format_is_int8(format);
197 bool is_int10 = format_is_int10(format);
198
199 if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
200 return 8;
201 else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
202 return 9;
203 else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
204 return 10;
205 else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
206 return 11;
207 else {
208 if (col_format >= V_028714_SPI_SHADER_32_AR)
209 --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
210
211 --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
212 return col_format;
213 }
214 }
215
216 static bool
radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)217 radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state *state,
218 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
219 {
220 /* Use a PS epilog when the fragment shader is compiled without the fragment output interface. */
221 if ((state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) &&
222 (lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
223 !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT))
224 return true;
225
226 /* These dynamic states need to compile PS epilogs on-demand. */
227 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
228 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
229 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS) ||
230 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
231 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE))
232 return true;
233
234 return false;
235 }
236
237 static bool
radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)238 radv_pipeline_uses_vrs_attachment(const struct radv_graphics_pipeline *pipeline,
239 const struct vk_graphics_pipeline_state *state)
240 {
241 VkPipelineCreateFlags2KHR create_flags = pipeline->base.create_flags;
242 if (state->rp)
243 create_flags |= state->pipeline_flags;
244
245 return (create_flags & VK_PIPELINE_CREATE_2_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0;
246 }
247
248 static void
radv_pipeline_init_multisample_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state)249 radv_pipeline_init_multisample_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
250 const VkGraphicsPipelineCreateInfo *pCreateInfo,
251 const struct vk_graphics_pipeline_state *state)
252 {
253 struct radv_multisample_state *ms = &pipeline->ms;
254
255 /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
256 *
257 * "Sample shading is enabled for a graphics pipeline:
258 *
259 * - If the interface of the fragment shader entry point of the
260 * graphics pipeline includes an input variable decorated
261 * with SampleId or SamplePosition. In this case
262 * minSampleShadingFactor takes the value 1.0.
263 * - Else if the sampleShadingEnable member of the
264 * VkPipelineMultisampleStateCreateInfo structure specified
265 * when creating the graphics pipeline is set to VK_TRUE. In
266 * this case minSampleShadingFactor takes the value of
267 * VkPipelineMultisampleStateCreateInfo::minSampleShading.
268 *
269 * Otherwise, sample shading is considered disabled."
270 */
271 if (state->ms && state->ms->sample_shading_enable) {
272 ms->sample_shading_enable = true;
273 ms->min_sample_shading = state->ms->min_sample_shading;
274 }
275 }
276
277 static uint32_t
radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)278 radv_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim)
279 {
280 switch (prim) {
281 case TESS_PRIMITIVE_TRIANGLES:
282 case TESS_PRIMITIVE_QUADS:
283 return V_028A6C_TRISTRIP;
284 case TESS_PRIMITIVE_ISOLINES:
285 return V_028A6C_LINESTRIP;
286 default:
287 assert(0);
288 return 0;
289 }
290 }
291
292 static uint64_t
radv_dynamic_state_mask(VkDynamicState state)293 radv_dynamic_state_mask(VkDynamicState state)
294 {
295 switch (state) {
296 case VK_DYNAMIC_STATE_VIEWPORT:
297 case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT:
298 return RADV_DYNAMIC_VIEWPORT;
299 case VK_DYNAMIC_STATE_SCISSOR:
300 case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT:
301 return RADV_DYNAMIC_SCISSOR;
302 case VK_DYNAMIC_STATE_LINE_WIDTH:
303 return RADV_DYNAMIC_LINE_WIDTH;
304 case VK_DYNAMIC_STATE_DEPTH_BIAS:
305 return RADV_DYNAMIC_DEPTH_BIAS;
306 case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
307 return RADV_DYNAMIC_BLEND_CONSTANTS;
308 case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
309 return RADV_DYNAMIC_DEPTH_BOUNDS;
310 case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
311 return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
312 case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
313 return RADV_DYNAMIC_STENCIL_WRITE_MASK;
314 case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
315 return RADV_DYNAMIC_STENCIL_REFERENCE;
316 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
317 return RADV_DYNAMIC_DISCARD_RECTANGLE;
318 case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
319 return RADV_DYNAMIC_SAMPLE_LOCATIONS;
320 case VK_DYNAMIC_STATE_LINE_STIPPLE_KHR:
321 return RADV_DYNAMIC_LINE_STIPPLE;
322 case VK_DYNAMIC_STATE_CULL_MODE:
323 return RADV_DYNAMIC_CULL_MODE;
324 case VK_DYNAMIC_STATE_FRONT_FACE:
325 return RADV_DYNAMIC_FRONT_FACE;
326 case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY:
327 return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
328 case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE:
329 return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
330 case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE:
331 return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
332 case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP:
333 return RADV_DYNAMIC_DEPTH_COMPARE_OP;
334 case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE:
335 return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
336 case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE:
337 return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
338 case VK_DYNAMIC_STATE_STENCIL_OP:
339 return RADV_DYNAMIC_STENCIL_OP;
340 case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE:
341 return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
342 case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
343 return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
344 case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT:
345 return RADV_DYNAMIC_PATCH_CONTROL_POINTS;
346 case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE:
347 return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
348 case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE:
349 return RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
350 case VK_DYNAMIC_STATE_LOGIC_OP_EXT:
351 return RADV_DYNAMIC_LOGIC_OP;
352 case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE:
353 return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
354 case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:
355 return RADV_DYNAMIC_COLOR_WRITE_ENABLE;
356 case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT:
357 return RADV_DYNAMIC_VERTEX_INPUT;
358 case VK_DYNAMIC_STATE_POLYGON_MODE_EXT:
359 return RADV_DYNAMIC_POLYGON_MODE;
360 case VK_DYNAMIC_STATE_TESSELLATION_DOMAIN_ORIGIN_EXT:
361 return RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
362 case VK_DYNAMIC_STATE_LOGIC_OP_ENABLE_EXT:
363 return RADV_DYNAMIC_LOGIC_OP_ENABLE;
364 case VK_DYNAMIC_STATE_LINE_STIPPLE_ENABLE_EXT:
365 return RADV_DYNAMIC_LINE_STIPPLE_ENABLE;
366 case VK_DYNAMIC_STATE_ALPHA_TO_COVERAGE_ENABLE_EXT:
367 return RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE;
368 case VK_DYNAMIC_STATE_SAMPLE_MASK_EXT:
369 return RADV_DYNAMIC_SAMPLE_MASK;
370 case VK_DYNAMIC_STATE_DEPTH_CLIP_ENABLE_EXT:
371 return RADV_DYNAMIC_DEPTH_CLIP_ENABLE;
372 case VK_DYNAMIC_STATE_CONSERVATIVE_RASTERIZATION_MODE_EXT:
373 return RADV_DYNAMIC_CONSERVATIVE_RAST_MODE;
374 case VK_DYNAMIC_STATE_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE_EXT:
375 return RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE;
376 case VK_DYNAMIC_STATE_PROVOKING_VERTEX_MODE_EXT:
377 return RADV_DYNAMIC_PROVOKING_VERTEX_MODE;
378 case VK_DYNAMIC_STATE_DEPTH_CLAMP_ENABLE_EXT:
379 return RADV_DYNAMIC_DEPTH_CLAMP_ENABLE;
380 case VK_DYNAMIC_STATE_COLOR_WRITE_MASK_EXT:
381 return RADV_DYNAMIC_COLOR_WRITE_MASK;
382 case VK_DYNAMIC_STATE_COLOR_BLEND_ENABLE_EXT:
383 return RADV_DYNAMIC_COLOR_BLEND_ENABLE;
384 case VK_DYNAMIC_STATE_RASTERIZATION_SAMPLES_EXT:
385 return RADV_DYNAMIC_RASTERIZATION_SAMPLES;
386 case VK_DYNAMIC_STATE_LINE_RASTERIZATION_MODE_EXT:
387 return RADV_DYNAMIC_LINE_RASTERIZATION_MODE;
388 case VK_DYNAMIC_STATE_COLOR_BLEND_EQUATION_EXT:
389 return RADV_DYNAMIC_COLOR_BLEND_EQUATION;
390 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_ENABLE_EXT:
391 return RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE;
392 case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_MODE_EXT:
393 return RADV_DYNAMIC_DISCARD_RECTANGLE_MODE;
394 case VK_DYNAMIC_STATE_ATTACHMENT_FEEDBACK_LOOP_ENABLE_EXT:
395 return RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE;
396 case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_ENABLE_EXT:
397 return RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE;
398 case VK_DYNAMIC_STATE_ALPHA_TO_ONE_ENABLE_EXT:
399 return RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE;
400 default:
401 unreachable("Unhandled dynamic state");
402 }
403 }
404
405 #define RADV_DYNAMIC_CB_STATES \
406 (RADV_DYNAMIC_LOGIC_OP_ENABLE | RADV_DYNAMIC_LOGIC_OP | RADV_DYNAMIC_COLOR_WRITE_ENABLE | \
407 RADV_DYNAMIC_COLOR_WRITE_MASK | RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_BLEND_EQUATION | \
408 RADV_DYNAMIC_BLEND_CONSTANTS)
409
410 static bool
radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb)411 radv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, const struct vk_color_blend_state *cb)
412 {
413 /* If we don't know then we have to assume that blend may be enabled. cb may also be NULL in this
414 * case.
415 */
416 if (pipeline->dynamic_states & (RADV_DYNAMIC_COLOR_BLEND_ENABLE | RADV_DYNAMIC_COLOR_WRITE_MASK))
417 return true;
418
419 /* If we have the blend enable state, then cb being NULL indicates no attachments are written. */
420 if (cb) {
421 for (uint32_t i = 0; i < cb->attachment_count; i++) {
422 if (cb->attachments[i].write_mask && cb->attachments[i].blend_enable)
423 return true;
424 }
425 }
426
427 return false;
428 }
429
430 static uint64_t
radv_pipeline_needed_dynamic_state(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)431 radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
432 const struct vk_graphics_pipeline_state *state)
433 {
434 const struct radv_physical_device *pdev = radv_device_physical(device);
435 bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
436 bool raster_enabled =
437 !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
438 uint64_t states = RADV_DYNAMIC_ALL;
439
440 if (pdev->info.gfx_level < GFX10_3)
441 states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
442
443 /* Disable dynamic states that are useless to mesh shading. */
444 if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) {
445 if (!raster_enabled)
446 return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
447
448 states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
449 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY);
450 }
451
452 /* Disable dynamic states that are useless when rasterization is disabled. */
453 if (!raster_enabled) {
454 states = RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE |
455 RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
456 RADV_DYNAMIC_VERTEX_INPUT;
457
458 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
459 states |= RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN;
460
461 return states;
462 }
463
464 if (!state->rs->depth_bias.enable && !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE))
465 states &= ~RADV_DYNAMIC_DEPTH_BIAS;
466
467 if (!(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) &&
468 (!state->ds || !state->ds->depth.bounds_test.enable))
469 states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
470
471 if (!(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) &&
472 (!state->ds || !state->ds->stencil.test_enable))
473 states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | RADV_DYNAMIC_STENCIL_REFERENCE |
474 RADV_DYNAMIC_STENCIL_OP);
475
476 if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) && !state->dr->rectangle_count)
477 states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
478
479 if (!(pipeline->dynamic_states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) &&
480 (!state->ms || !state->ms->sample_locations_enable))
481 states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
482
483 if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, state->cb))
484 states &= ~RADV_DYNAMIC_BLEND_CONSTANTS;
485
486 if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT))
487 states &= ~(RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN);
488
489 return states;
490 }
491
492 struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param(const struct radv_device * device,struct radv_shader * const * shaders)493 radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
494 {
495 const struct radv_physical_device *pdev = radv_device_physical(device);
496 struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
497
498 ia_multi_vgt_param.ia_switch_on_eoi = false;
499 if (shaders[MESA_SHADER_FRAGMENT] && shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
500 ia_multi_vgt_param.ia_switch_on_eoi = true;
501 if (shaders[MESA_SHADER_GEOMETRY] && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
502 ia_multi_vgt_param.ia_switch_on_eoi = true;
503 if (shaders[MESA_SHADER_TESS_CTRL]) {
504 const struct radv_shader *tes = radv_get_shader(shaders, MESA_SHADER_TESS_EVAL);
505
506 /* SWITCH_ON_EOI must be set if PrimID is used. */
507 if (shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || tes->info.uses_prim_id ||
508 (tes->info.merged_shader_compiled_separately && shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id))
509 ia_multi_vgt_param.ia_switch_on_eoi = true;
510 }
511
512 ia_multi_vgt_param.partial_vs_wave = false;
513 if (shaders[MESA_SHADER_TESS_CTRL]) {
514 /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
515 if ((pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_PITCAIRN ||
516 pdev->info.family == CHIP_BONAIRE) &&
517 shaders[MESA_SHADER_GEOMETRY])
518 ia_multi_vgt_param.partial_vs_wave = true;
519 /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
520 if (pdev->info.has_distributed_tess) {
521 if (shaders[MESA_SHADER_GEOMETRY]) {
522 if (pdev->info.gfx_level <= GFX8)
523 ia_multi_vgt_param.partial_es_wave = true;
524 } else {
525 ia_multi_vgt_param.partial_vs_wave = true;
526 }
527 }
528 }
529
530 if (shaders[MESA_SHADER_GEOMETRY]) {
531 /* On these chips there is the possibility of a hang if the
532 * pipeline uses a GS and partial_vs_wave is not set.
533 *
534 * This mostly does not hit 4-SE chips, as those typically set
535 * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
536 * with GS due to another workaround.
537 *
538 * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
539 */
540 if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_FIJI || pdev->info.family == CHIP_POLARIS10 ||
541 pdev->info.family == CHIP_POLARIS11 || pdev->info.family == CHIP_POLARIS12 ||
542 pdev->info.family == CHIP_VEGAM) {
543 ia_multi_vgt_param.partial_vs_wave = true;
544 }
545 }
546
547 ia_multi_vgt_param.base =
548 /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
549 S_028AA8_MAX_PRIMGRP_IN_WAVE(pdev->info.gfx_level == GFX8 ? 2 : 0) |
550 S_030960_EN_INST_OPT_BASIC(pdev->info.gfx_level >= GFX9) | S_030960_EN_INST_OPT_ADV(pdev->info.gfx_level >= GFX9);
551
552 return ia_multi_vgt_param;
553 }
554
555 static uint32_t
radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo * vi,uint32_t attrib_binding)556 radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t attrib_binding)
557 {
558 for (uint32_t i = 0; i < vi->vertexBindingDescriptionCount; i++) {
559 const VkVertexInputBindingDescription *input_binding = &vi->pVertexBindingDescriptions[i];
560
561 if (input_binding->binding == attrib_binding)
562 return input_binding->stride;
563 }
564
565 return 0;
566 }
567
568 #define ALL_GRAPHICS_LIB_FLAGS \
569 (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT | \
570 VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT | \
571 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | \
572 VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT)
573
574 static VkGraphicsPipelineLibraryFlagBitsEXT
shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)575 shader_stage_to_pipeline_library_flags(VkShaderStageFlagBits stage)
576 {
577 assert(util_bitcount(stage) == 1);
578 switch (stage) {
579 case VK_SHADER_STAGE_VERTEX_BIT:
580 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
581 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
582 case VK_SHADER_STAGE_GEOMETRY_BIT:
583 case VK_SHADER_STAGE_TASK_BIT_EXT:
584 case VK_SHADER_STAGE_MESH_BIT_EXT:
585 return VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT;
586 case VK_SHADER_STAGE_FRAGMENT_BIT:
587 return VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
588 default:
589 unreachable("Invalid shader stage");
590 }
591 }
592
593 static void
radv_graphics_pipeline_import_layout(struct radv_pipeline_layout * dst,const struct radv_pipeline_layout * src)594 radv_graphics_pipeline_import_layout(struct radv_pipeline_layout *dst, const struct radv_pipeline_layout *src)
595 {
596 for (uint32_t s = 0; s < src->num_sets; s++) {
597 if (!src->set[s].layout)
598 continue;
599
600 radv_pipeline_layout_add_set(dst, s, src->set[s].layout);
601 }
602
603 dst->independent_sets |= src->independent_sets;
604 dst->push_constant_size = MAX2(dst->push_constant_size, src->push_constant_size);
605 }
606
607 static void
radv_pipeline_import_graphics_info(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)608 radv_pipeline_import_graphics_info(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
609 const VkGraphicsPipelineCreateInfo *pCreateInfo)
610 {
611 /* Mark all states declared dynamic at pipeline creation. */
612 if (pCreateInfo->pDynamicState) {
613 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
614 for (uint32_t s = 0; s < count; s++) {
615 pipeline->dynamic_states |= radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
616 }
617 }
618
619 /* Mark all active stages at pipeline creation. */
620 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
621 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
622
623 pipeline->active_stages |= sinfo->stage;
624 }
625
626 if (pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_EXT) {
627 pipeline->last_vgt_api_stage = MESA_SHADER_MESH;
628 } else {
629 pipeline->last_vgt_api_stage = util_last_bit(pipeline->active_stages & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
630 }
631 }
632
633 static bool
radv_should_import_lib_binaries(const VkPipelineCreateFlags2KHR create_flags)634 radv_should_import_lib_binaries(const VkPipelineCreateFlags2KHR create_flags)
635 {
636 return !(create_flags & (VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT |
637 VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT));
638 }
639
640 static void
radv_graphics_pipeline_import_lib(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,struct radv_graphics_lib_pipeline * lib)641 radv_graphics_pipeline_import_lib(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
642 struct radv_graphics_lib_pipeline *lib)
643 {
644 bool import_binaries = false;
645
646 /* There should be no common blocks between a lib we import and the current
647 * pipeline we're building.
648 */
649 assert((pipeline->active_stages & lib->base.active_stages) == 0);
650
651 pipeline->dynamic_states |= lib->base.dynamic_states;
652 pipeline->active_stages |= lib->base.active_stages;
653
654 /* Import binaries when LTO is disabled and when the library doesn't retain any shaders. */
655 if (lib->base.has_pipeline_binaries || radv_should_import_lib_binaries(pipeline->base.create_flags)) {
656 import_binaries = true;
657 }
658
659 if (import_binaries) {
660 /* Import the compiled shaders. */
661 for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
662 if (!lib->base.base.shaders[s])
663 continue;
664
665 pipeline->base.shaders[s] = radv_shader_ref(lib->base.base.shaders[s]);
666 }
667
668 /* Import the GS copy shader if present. */
669 if (lib->base.base.gs_copy_shader) {
670 assert(!pipeline->base.gs_copy_shader);
671 pipeline->base.gs_copy_shader = radv_shader_ref(lib->base.base.gs_copy_shader);
672 }
673 }
674 }
675
676 static void
radv_pipeline_init_input_assembly_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)677 radv_pipeline_init_input_assembly_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
678 {
679 pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param(device, pipeline->base.shaders);
680 }
681
682 static bool
radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)683 radv_pipeline_uses_ds_feedback_loop(const struct radv_graphics_pipeline *pipeline,
684 const struct vk_graphics_pipeline_state *state)
685 {
686 VkPipelineCreateFlags2KHR create_flags = pipeline->base.create_flags;
687 if (state->rp)
688 create_flags |= state->pipeline_flags;
689
690 return (create_flags & VK_PIPELINE_CREATE_2_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
691 }
692
693 void
radv_get_viewport_xform(const VkViewport * viewport,float scale[3],float translate[3])694 radv_get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3])
695 {
696 float x = viewport->x;
697 float y = viewport->y;
698 float half_width = 0.5f * viewport->width;
699 float half_height = 0.5f * viewport->height;
700 double n = viewport->minDepth;
701 double f = viewport->maxDepth;
702
703 scale[0] = half_width;
704 translate[0] = half_width + x;
705 scale[1] = half_height;
706 translate[1] = half_height + y;
707
708 scale[2] = (f - n);
709 translate[2] = n;
710 }
711
712 static void
radv_pipeline_init_dynamic_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,const VkGraphicsPipelineCreateInfo * pCreateInfo)713 radv_pipeline_init_dynamic_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
714 const struct vk_graphics_pipeline_state *state,
715 const VkGraphicsPipelineCreateInfo *pCreateInfo)
716 {
717 uint64_t needed_states = radv_pipeline_needed_dynamic_state(device, pipeline, state);
718 struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
719 uint64_t states = needed_states;
720
721 /* Initialize non-zero values for default dynamic state. */
722 dynamic->vk.rs.line.width = 1.0f;
723 dynamic->vk.fsr.fragment_size.width = 1u;
724 dynamic->vk.fsr.fragment_size.height = 1u;
725 dynamic->vk.ds.depth.bounds_test.max = 1.0f;
726 dynamic->vk.ds.stencil.front.compare_mask = ~0;
727 dynamic->vk.ds.stencil.front.write_mask = ~0;
728 dynamic->vk.ds.stencil.back.compare_mask = ~0;
729 dynamic->vk.ds.stencil.back.write_mask = ~0;
730 dynamic->vk.ms.rasterization_samples = VK_SAMPLE_COUNT_1_BIT;
731
732 pipeline->needed_dynamic_state = needed_states;
733
734 states &= ~pipeline->dynamic_states;
735
736 /* Input assembly. */
737 if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
738 dynamic->vk.ia.primitive_topology = radv_translate_prim(state->ia->primitive_topology);
739 }
740
741 if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
742 dynamic->vk.ia.primitive_restart_enable = state->ia->primitive_restart_enable;
743 }
744
745 /* Tessellation. */
746 if (states & RADV_DYNAMIC_PATCH_CONTROL_POINTS) {
747 dynamic->vk.ts.patch_control_points = state->ts->patch_control_points;
748 }
749
750 if (states & RADV_DYNAMIC_TESS_DOMAIN_ORIGIN) {
751 dynamic->vk.ts.domain_origin = state->ts->domain_origin;
752 }
753
754 /* Viewport. */
755 if (needed_states & RADV_DYNAMIC_VIEWPORT) {
756 dynamic->vk.vp.viewport_count = state->vp->viewport_count;
757 if (states & RADV_DYNAMIC_VIEWPORT) {
758 typed_memcpy(dynamic->vk.vp.viewports, state->vp->viewports, state->vp->viewport_count);
759 for (unsigned i = 0; i < dynamic->vk.vp.viewport_count; i++)
760 radv_get_viewport_xform(&dynamic->vk.vp.viewports[i], dynamic->hw_vp.xform[i].scale,
761 dynamic->hw_vp.xform[i].translate);
762 }
763 }
764
765 if (needed_states & RADV_DYNAMIC_SCISSOR) {
766 dynamic->vk.vp.scissor_count = state->vp->scissor_count;
767 if (states & RADV_DYNAMIC_SCISSOR) {
768 typed_memcpy(dynamic->vk.vp.scissors, state->vp->scissors, state->vp->scissor_count);
769 }
770 }
771
772 if (states & RADV_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) {
773 dynamic->vk.vp.depth_clip_negative_one_to_one = state->vp->depth_clip_negative_one_to_one;
774 }
775
776 /* Discard rectangles. */
777 if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
778 dynamic->vk.dr.rectangle_count = state->dr->rectangle_count;
779 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
780 typed_memcpy(dynamic->vk.dr.rectangles, state->dr->rectangles, state->dr->rectangle_count);
781 }
782 }
783
784 /* Rasterization. */
785 if (states & RADV_DYNAMIC_LINE_WIDTH) {
786 dynamic->vk.rs.line.width = state->rs->line.width;
787 }
788
789 if (states & RADV_DYNAMIC_DEPTH_BIAS) {
790 dynamic->vk.rs.depth_bias.constant = state->rs->depth_bias.constant;
791 dynamic->vk.rs.depth_bias.clamp = state->rs->depth_bias.clamp;
792 dynamic->vk.rs.depth_bias.slope = state->rs->depth_bias.slope;
793 dynamic->vk.rs.depth_bias.representation = state->rs->depth_bias.representation;
794 }
795
796 if (states & RADV_DYNAMIC_CULL_MODE) {
797 dynamic->vk.rs.cull_mode = state->rs->cull_mode;
798 }
799
800 if (states & RADV_DYNAMIC_FRONT_FACE) {
801 dynamic->vk.rs.front_face = state->rs->front_face;
802 }
803
804 if (states & RADV_DYNAMIC_LINE_STIPPLE) {
805 dynamic->vk.rs.line.stipple.factor = state->rs->line.stipple.factor;
806 dynamic->vk.rs.line.stipple.pattern = state->rs->line.stipple.pattern;
807 }
808
809 if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
810 dynamic->vk.rs.depth_bias.enable = state->rs->depth_bias.enable;
811 }
812
813 if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
814 dynamic->vk.rs.rasterizer_discard_enable = state->rs->rasterizer_discard_enable;
815 }
816
817 if (states & RADV_DYNAMIC_POLYGON_MODE) {
818 dynamic->vk.rs.polygon_mode = radv_translate_fill(state->rs->polygon_mode);
819 }
820
821 if (states & RADV_DYNAMIC_LINE_STIPPLE_ENABLE) {
822 dynamic->vk.rs.line.stipple.enable = state->rs->line.stipple.enable;
823 }
824
825 if (states & RADV_DYNAMIC_DEPTH_CLIP_ENABLE) {
826 dynamic->vk.rs.depth_clip_enable = state->rs->depth_clip_enable;
827 }
828
829 if (states & RADV_DYNAMIC_CONSERVATIVE_RAST_MODE) {
830 dynamic->vk.rs.conservative_mode = state->rs->conservative_mode;
831 }
832
833 if (states & RADV_DYNAMIC_PROVOKING_VERTEX_MODE) {
834 dynamic->vk.rs.provoking_vertex = state->rs->provoking_vertex;
835 }
836
837 if (states & RADV_DYNAMIC_DEPTH_CLAMP_ENABLE) {
838 dynamic->vk.rs.depth_clamp_enable = state->rs->depth_clamp_enable;
839 }
840
841 if (states & RADV_DYNAMIC_LINE_RASTERIZATION_MODE) {
842 dynamic->vk.rs.line.mode = state->rs->line.mode;
843 }
844
845 /* Fragment shading rate. */
846 if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
847 dynamic->vk.fsr = *state->fsr;
848 }
849
850 /* Multisample. */
851 if (states & RADV_DYNAMIC_ALPHA_TO_COVERAGE_ENABLE) {
852 dynamic->vk.ms.alpha_to_coverage_enable = state->ms->alpha_to_coverage_enable;
853 }
854
855 if (states & RADV_DYNAMIC_ALPHA_TO_ONE_ENABLE) {
856 dynamic->vk.ms.alpha_to_one_enable = state->ms->alpha_to_one_enable;
857 }
858
859 if (states & RADV_DYNAMIC_SAMPLE_MASK) {
860 dynamic->vk.ms.sample_mask = state->ms->sample_mask & 0xffff;
861 }
862
863 if (states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) {
864 dynamic->vk.ms.rasterization_samples = state->ms->rasterization_samples;
865 }
866
867 if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS_ENABLE) {
868 dynamic->vk.ms.sample_locations_enable = state->ms->sample_locations_enable;
869 }
870
871 if (states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
872 unsigned count = state->ms->sample_locations->per_pixel * state->ms->sample_locations->grid_size.width *
873 state->ms->sample_locations->grid_size.height;
874
875 dynamic->sample_location.per_pixel = state->ms->sample_locations->per_pixel;
876 dynamic->sample_location.grid_size = state->ms->sample_locations->grid_size;
877 dynamic->sample_location.count = count;
878 typed_memcpy(&dynamic->sample_location.locations[0], state->ms->sample_locations->locations, count);
879 }
880
881 /* Depth stencil. */
882 /* If there is no depthstencil attachment, then don't read
883 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
884 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
885 * no need to override the depthstencil defaults in
886 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
887 *
888 * Section 9.2 of the Vulkan 1.0.15 spec says:
889 *
890 * pDepthStencilState is [...] NULL if the pipeline has rasterization
891 * disabled or if the subpass of the render pass the pipeline is created
892 * against does not use a depth/stencil attachment.
893 */
894 if (needed_states && radv_pipeline_has_ds_attachments(state->rp)) {
895 if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
896 dynamic->vk.ds.depth.bounds_test.min = state->ds->depth.bounds_test.min;
897 dynamic->vk.ds.depth.bounds_test.max = state->ds->depth.bounds_test.max;
898 }
899
900 if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
901 dynamic->vk.ds.stencil.front.compare_mask = state->ds->stencil.front.compare_mask;
902 dynamic->vk.ds.stencil.back.compare_mask = state->ds->stencil.back.compare_mask;
903 }
904
905 if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
906 dynamic->vk.ds.stencil.front.write_mask = state->ds->stencil.front.write_mask;
907 dynamic->vk.ds.stencil.back.write_mask = state->ds->stencil.back.write_mask;
908 }
909
910 if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
911 dynamic->vk.ds.stencil.front.reference = state->ds->stencil.front.reference;
912 dynamic->vk.ds.stencil.back.reference = state->ds->stencil.back.reference;
913 }
914
915 if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
916 dynamic->vk.ds.depth.test_enable = state->ds->depth.test_enable;
917 }
918
919 if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
920 dynamic->vk.ds.depth.write_enable = state->ds->depth.write_enable;
921 }
922
923 if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
924 dynamic->vk.ds.depth.compare_op = state->ds->depth.compare_op;
925 }
926
927 if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
928 dynamic->vk.ds.depth.bounds_test.enable = state->ds->depth.bounds_test.enable;
929 }
930
931 if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
932 dynamic->vk.ds.stencil.test_enable = state->ds->stencil.test_enable;
933 }
934
935 if (states & RADV_DYNAMIC_STENCIL_OP) {
936 dynamic->vk.ds.stencil.front.op.compare = state->ds->stencil.front.op.compare;
937 dynamic->vk.ds.stencil.front.op.fail = state->ds->stencil.front.op.fail;
938 dynamic->vk.ds.stencil.front.op.pass = state->ds->stencil.front.op.pass;
939 dynamic->vk.ds.stencil.front.op.depth_fail = state->ds->stencil.front.op.depth_fail;
940
941 dynamic->vk.ds.stencil.back.op.compare = state->ds->stencil.back.op.compare;
942 dynamic->vk.ds.stencil.back.op.fail = state->ds->stencil.back.op.fail;
943 dynamic->vk.ds.stencil.back.op.pass = state->ds->stencil.back.op.pass;
944 dynamic->vk.ds.stencil.back.op.depth_fail = state->ds->stencil.back.op.depth_fail;
945 }
946 }
947
948 /* Color blend. */
949 /* Section 9.2 of the Vulkan 1.0.15 spec says:
950 *
951 * pColorBlendState is [...] NULL if the pipeline has rasterization
952 * disabled or if the subpass of the render pass the pipeline is
953 * created against does not use any color attachments.
954 */
955 if (states & RADV_DYNAMIC_BLEND_CONSTANTS) {
956 typed_memcpy(dynamic->vk.cb.blend_constants, state->cb->blend_constants, 4);
957 }
958
959 if (radv_pipeline_has_color_attachments(state->rp)) {
960 if (states & RADV_DYNAMIC_LOGIC_OP) {
961 if ((pipeline->dynamic_states & RADV_DYNAMIC_LOGIC_OP_ENABLE) || state->cb->logic_op_enable) {
962 dynamic->vk.cb.logic_op = radv_translate_blend_logic_op(state->cb->logic_op);
963 }
964 }
965
966 if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
967 dynamic->vk.cb.color_write_enables = state->cb->color_write_enables;
968 }
969
970 if (states & RADV_DYNAMIC_LOGIC_OP_ENABLE) {
971 dynamic->vk.cb.logic_op_enable = state->cb->logic_op_enable;
972 }
973
974 if (states & RADV_DYNAMIC_COLOR_WRITE_MASK) {
975 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
976 dynamic->vk.cb.attachments[i].write_mask = state->cb->attachments[i].write_mask;
977 }
978 }
979
980 if (states & RADV_DYNAMIC_COLOR_BLEND_ENABLE) {
981 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
982 dynamic->vk.cb.attachments[i].blend_enable = state->cb->attachments[i].blend_enable;
983 }
984 }
985
986 if (states & RADV_DYNAMIC_COLOR_BLEND_EQUATION) {
987 for (unsigned i = 0; i < state->cb->attachment_count; i++) {
988 const struct vk_color_blend_attachment_state *att = &state->cb->attachments[i];
989
990 dynamic->vk.cb.attachments[i].src_color_blend_factor = att->src_color_blend_factor;
991 dynamic->vk.cb.attachments[i].dst_color_blend_factor = att->dst_color_blend_factor;
992 dynamic->vk.cb.attachments[i].color_blend_op = att->color_blend_op;
993 dynamic->vk.cb.attachments[i].src_alpha_blend_factor = att->src_alpha_blend_factor;
994 dynamic->vk.cb.attachments[i].dst_alpha_blend_factor = att->dst_alpha_blend_factor;
995 dynamic->vk.cb.attachments[i].alpha_blend_op = att->alpha_blend_op;
996 }
997 }
998 }
999
1000 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_ENABLE) {
1001 dynamic->vk.dr.enable = state->dr->rectangle_count > 0;
1002 }
1003
1004 if (states & RADV_DYNAMIC_DISCARD_RECTANGLE_MODE) {
1005 dynamic->vk.dr.mode = state->dr->mode;
1006 }
1007
1008 if (states & RADV_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE) {
1009 bool uses_ds_feedback_loop = radv_pipeline_uses_ds_feedback_loop(pipeline, state);
1010
1011 dynamic->feedback_loop_aspects =
1012 uses_ds_feedback_loop ? (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) : VK_IMAGE_ASPECT_NONE;
1013 }
1014
1015 for (uint32_t i = 0; i < MAX_RTS; i++) {
1016 dynamic->vk.cal.color_map[i] = state->cal ? state->cal->color_map[i] : i;
1017 dynamic->vk.ial.color_map[i] = state->ial ? state->ial->color_map[i] : i;
1018 }
1019
1020 dynamic->vk.ial.depth_att = state->ial ? state->ial->depth_att : MESA_VK_ATTACHMENT_UNUSED;
1021 dynamic->vk.ial.stencil_att = state->ial ? state->ial->stencil_att : MESA_VK_ATTACHMENT_UNUSED;
1022
1023 pipeline->dynamic_state.mask = states;
1024 }
1025
1026 struct radv_shader *
radv_get_shader(struct radv_shader * const * shaders,gl_shader_stage stage)1027 radv_get_shader(struct radv_shader *const *shaders, gl_shader_stage stage)
1028 {
1029 if (stage == MESA_SHADER_VERTEX) {
1030 if (shaders[MESA_SHADER_VERTEX])
1031 return shaders[MESA_SHADER_VERTEX];
1032 if (shaders[MESA_SHADER_TESS_CTRL])
1033 return shaders[MESA_SHADER_TESS_CTRL];
1034 if (shaders[MESA_SHADER_GEOMETRY])
1035 return shaders[MESA_SHADER_GEOMETRY];
1036 } else if (stage == MESA_SHADER_TESS_EVAL) {
1037 if (!shaders[MESA_SHADER_TESS_CTRL])
1038 return NULL;
1039 if (shaders[MESA_SHADER_TESS_EVAL])
1040 return shaders[MESA_SHADER_TESS_EVAL];
1041 if (shaders[MESA_SHADER_GEOMETRY])
1042 return shaders[MESA_SHADER_GEOMETRY];
1043 }
1044 return shaders[stage];
1045 }
1046
1047 static bool
radv_should_export_multiview(const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state)1048 radv_should_export_multiview(const struct radv_shader_stage *stage, const struct radv_graphics_state_key *gfx_state)
1049 {
1050 /* Export the layer in the last VGT stage if multiview is used.
1051 * Also checks for NONE stage, which happens when we have depth-only rendering.
1052 * When the next stage is unknown (with GPL or ESO), the layer is exported unconditionally.
1053 */
1054 return gfx_state->has_multiview_view_index && radv_is_last_vgt_stage(stage) &&
1055 !(stage->nir->info.outputs_written & VARYING_BIT_LAYER);
1056 }
1057
1058 static void
radv_remove_point_size(const struct radv_graphics_state_key * gfx_state,nir_shader * producer,nir_shader * consumer)1059 radv_remove_point_size(const struct radv_graphics_state_key *gfx_state, nir_shader *producer, nir_shader *consumer)
1060 {
1061 if ((consumer->info.inputs_read & VARYING_BIT_PSIZ) || !(producer->info.outputs_written & VARYING_BIT_PSIZ))
1062 return;
1063
1064 /* Do not remove PSIZ if the shader uses XFB because it might be stored. */
1065 if (producer->xfb_info)
1066 return;
1067
1068 /* Do not remove PSIZ if the rasterization primitive uses points. */
1069 if (consumer->info.stage == MESA_SHADER_FRAGMENT &&
1070 ((producer->info.stage == MESA_SHADER_TESS_EVAL && producer->info.tess.point_mode) ||
1071 (producer->info.stage == MESA_SHADER_GEOMETRY && producer->info.gs.output_primitive == MESA_PRIM_POINTS) ||
1072 (producer->info.stage == MESA_SHADER_MESH && producer->info.mesh.primitive_type == MESA_PRIM_POINTS)))
1073 return;
1074
1075 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
1076 assert(var);
1077
1078 /* Change PSIZ to a global variable which allows it to be DCE'd. */
1079 var->data.location = 0;
1080 var->data.mode = nir_var_shader_temp;
1081
1082 producer->info.outputs_written &= ~VARYING_BIT_PSIZ;
1083 NIR_PASS_V(producer, nir_fixup_deref_modes);
1084 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1085 NIR_PASS(_, producer, nir_opt_dce);
1086 }
1087
1088 static void
radv_remove_color_exports(const struct radv_graphics_state_key * gfx_state,nir_shader * nir)1089 radv_remove_color_exports(const struct radv_graphics_state_key *gfx_state, nir_shader *nir)
1090 {
1091 uint8_t color_remap[MAX_RTS];
1092 bool fixup_derefs = false;
1093
1094 /* Do not remove color exports when a PS epilog is used because the format isn't known and the color write mask can
1095 * be dynamic. */
1096 if (gfx_state->ps.has_epilog)
1097 return;
1098
1099 /* Shader output locations to color attachment mappings. */
1100 memset(color_remap, MESA_VK_ATTACHMENT_UNUSED, sizeof(color_remap));
1101 for (uint32_t i = 0; i < MAX_RTS; i++) {
1102 if (gfx_state->ps.epilog.color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
1103 color_remap[gfx_state->ps.epilog.color_map[i]] = i;
1104 }
1105
1106 nir_foreach_shader_out_variable (var, nir) {
1107 int idx = var->data.location;
1108 idx -= FRAG_RESULT_DATA0;
1109
1110 if (idx < 0)
1111 continue;
1112
1113 const uint8_t cb_idx = color_remap[idx];
1114 unsigned col_format = (gfx_state->ps.epilog.spi_shader_col_format >> (4 * cb_idx)) & 0xf;
1115
1116 if (col_format == V_028714_SPI_SHADER_ZERO) {
1117 /* Remove the color export if it's unused or in presence of holes. */
1118 nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1119 var->data.location = 0;
1120 var->data.mode = nir_var_shader_temp;
1121 fixup_derefs = true;
1122 }
1123 }
1124
1125 if (fixup_derefs) {
1126 NIR_PASS_V(nir, nir_fixup_deref_modes);
1127 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1128 NIR_PASS(_, nir, nir_opt_dce);
1129 }
1130 }
1131
1132 static void
merge_tess_info(struct shader_info * tes_info,struct shader_info * tcs_info)1133 merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
1134 {
1135 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
1136 *
1137 * "PointMode. Controls generation of points rather than triangles
1138 * or lines. This functionality defaults to disabled, and is
1139 * enabled if either shader stage includes the execution mode.
1140 *
1141 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
1142 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
1143 * and OutputVertices, it says:
1144 *
1145 * "One mode must be set in at least one of the tessellation
1146 * shader stages."
1147 *
1148 * So, the fields can be set in either the TCS or TES, but they must
1149 * agree if set in both. Our backend looks at TES, so bitwise-or in
1150 * the values from the TCS.
1151 */
1152 assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
1153 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
1154 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
1155
1156 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1157 tcs_info->tess.spacing == tes_info->tess.spacing);
1158 tes_info->tess.spacing |= tcs_info->tess.spacing;
1159
1160 assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1161 tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED ||
1162 tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
1163 tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
1164 tes_info->tess.ccw |= tcs_info->tess.ccw;
1165 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
1166
1167 /* Copy the merged info back to the TCS */
1168 tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
1169 tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode;
1170 }
1171
1172 static void
radv_link_shaders(const struct radv_device * device,struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage,const struct radv_graphics_state_key * gfx_state)1173 radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
1174 struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
1175 {
1176 const struct radv_physical_device *pdev = radv_device_physical(device);
1177 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
1178 nir_shader *producer = producer_stage->nir;
1179 nir_shader *consumer = consumer_stage->nir;
1180
1181 if (consumer->info.stage == MESA_SHADER_FRAGMENT) {
1182 /* Lower the viewport index to zero when the last vertex stage doesn't export it. */
1183 if ((consumer->info.inputs_read & VARYING_BIT_VIEWPORT) &&
1184 !(producer->info.outputs_written & VARYING_BIT_VIEWPORT)) {
1185 NIR_PASS(_, consumer, radv_nir_lower_viewport_to_zero);
1186 }
1187 }
1188
1189 if (producer_stage->key.optimisations_disabled || consumer_stage->key.optimisations_disabled)
1190 return;
1191
1192 if (consumer->info.stage == MESA_SHADER_FRAGMENT && producer->info.has_transform_feedback_varyings) {
1193 nir_link_xfb_varyings(producer, consumer);
1194 }
1195
1196 unsigned array_deref_of_vec_options =
1197 nir_lower_direct_array_deref_of_vec_load | nir_lower_indirect_array_deref_of_vec_load |
1198 nir_lower_direct_array_deref_of_vec_store | nir_lower_indirect_array_deref_of_vec_store;
1199
1200 NIR_PASS(_, producer, nir_lower_array_deref_of_vec, nir_var_shader_out, NULL, array_deref_of_vec_options);
1201 NIR_PASS(_, consumer, nir_lower_array_deref_of_vec, nir_var_shader_in, NULL, array_deref_of_vec_options);
1202
1203 nir_lower_io_arrays_to_elements(producer, consumer);
1204 nir_validate_shader(producer, "after nir_lower_io_arrays_to_elements");
1205 nir_validate_shader(consumer, "after nir_lower_io_arrays_to_elements");
1206
1207 radv_nir_lower_io_to_scalar_early(producer, nir_var_shader_out);
1208 radv_nir_lower_io_to_scalar_early(consumer, nir_var_shader_in);
1209
1210 /* Remove PSIZ from shaders when it's not needed.
1211 * This is typically produced by translation layers like Zink or D9VK.
1212 */
1213 if (gfx_state->enable_remove_point_size)
1214 radv_remove_point_size(gfx_state, producer, consumer);
1215
1216 if (nir_link_opt_varyings(producer, consumer)) {
1217 nir_validate_shader(producer, "after nir_link_opt_varyings");
1218 nir_validate_shader(consumer, "after nir_link_opt_varyings");
1219
1220 NIR_PASS(_, consumer, nir_opt_constant_folding);
1221 NIR_PASS(_, consumer, nir_opt_algebraic);
1222 NIR_PASS(_, consumer, nir_opt_dce);
1223 }
1224
1225 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
1226 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
1227
1228 nir_remove_unused_varyings(producer, consumer);
1229
1230 nir_compact_varyings(producer, consumer, true);
1231
1232 nir_validate_shader(producer, "after nir_compact_varyings");
1233 nir_validate_shader(consumer, "after nir_compact_varyings");
1234
1235 if (producer->info.stage == MESA_SHADER_MESH) {
1236 /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */
1237 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1238 }
1239
1240 const bool has_geom_or_tess =
1241 consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL;
1242 const bool merged_gs = consumer->info.stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX9;
1243
1244 if (producer->info.stage == MESA_SHADER_TESS_CTRL || producer->info.stage == MESA_SHADER_MESH ||
1245 (producer->info.stage == MESA_SHADER_VERTEX && has_geom_or_tess) ||
1246 (producer->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
1247 NIR_PASS(_, producer, nir_lower_io_to_vector, nir_var_shader_out);
1248
1249 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
1250 NIR_PASS(_, producer, nir_vectorize_tess_levels);
1251
1252 NIR_PASS(_, producer, nir_opt_combine_stores, nir_var_shader_out);
1253 }
1254
1255 if (consumer->info.stage == MESA_SHADER_GEOMETRY || consumer->info.stage == MESA_SHADER_TESS_CTRL ||
1256 consumer->info.stage == MESA_SHADER_TESS_EVAL) {
1257 NIR_PASS(_, consumer, nir_lower_io_to_vector, nir_var_shader_in);
1258 }
1259 }
1260
1261 static const gl_shader_stage graphics_shader_order[] = {
1262 MESA_SHADER_VERTEX, MESA_SHADER_TESS_CTRL, MESA_SHADER_TESS_EVAL, MESA_SHADER_GEOMETRY,
1263
1264 MESA_SHADER_TASK, MESA_SHADER_MESH,
1265
1266 MESA_SHADER_FRAGMENT,
1267 };
1268
1269 static void
radv_link_vs(const struct radv_device * device,struct radv_shader_stage * vs_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1270 radv_link_vs(const struct radv_device *device, struct radv_shader_stage *vs_stage, struct radv_shader_stage *next_stage,
1271 const struct radv_graphics_state_key *gfx_state)
1272 {
1273 assert(vs_stage->nir->info.stage == MESA_SHADER_VERTEX);
1274
1275 if (radv_should_export_multiview(vs_stage, gfx_state)) {
1276 NIR_PASS(_, vs_stage->nir, radv_nir_export_multiview);
1277 }
1278
1279 if (next_stage) {
1280 assert(next_stage->nir->info.stage == MESA_SHADER_TESS_CTRL ||
1281 next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1282 next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1283
1284 radv_link_shaders(device, vs_stage, next_stage, gfx_state);
1285 }
1286 }
1287
1288 static void
radv_link_tcs(const struct radv_device * device,struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage,const struct radv_graphics_state_key * gfx_state)1289 radv_link_tcs(const struct radv_device *device, struct radv_shader_stage *tcs_stage,
1290 struct radv_shader_stage *tes_stage, const struct radv_graphics_state_key *gfx_state)
1291 {
1292 if (!tes_stage)
1293 return;
1294
1295 assert(tcs_stage->nir->info.stage == MESA_SHADER_TESS_CTRL);
1296 assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1297
1298 radv_link_shaders(device, tcs_stage, tes_stage, gfx_state);
1299
1300 /* Copy TCS info into the TES info */
1301 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
1302 }
1303
1304 static void
radv_link_tes(const struct radv_device * device,struct radv_shader_stage * tes_stage,struct radv_shader_stage * next_stage,const struct radv_graphics_state_key * gfx_state)1305 radv_link_tes(const struct radv_device *device, struct radv_shader_stage *tes_stage,
1306 struct radv_shader_stage *next_stage, const struct radv_graphics_state_key *gfx_state)
1307 {
1308 assert(tes_stage->nir->info.stage == MESA_SHADER_TESS_EVAL);
1309
1310 if (radv_should_export_multiview(tes_stage, gfx_state)) {
1311 NIR_PASS(_, tes_stage->nir, radv_nir_export_multiview);
1312 }
1313
1314 if (next_stage) {
1315 assert(next_stage->nir->info.stage == MESA_SHADER_GEOMETRY ||
1316 next_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1317
1318 radv_link_shaders(device, tes_stage, next_stage, gfx_state);
1319 }
1320 }
1321
1322 static void
radv_link_gs(const struct radv_device * device,struct radv_shader_stage * gs_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1323 radv_link_gs(const struct radv_device *device, struct radv_shader_stage *gs_stage, struct radv_shader_stage *fs_stage,
1324 const struct radv_graphics_state_key *gfx_state)
1325 {
1326 assert(gs_stage->nir->info.stage == MESA_SHADER_GEOMETRY);
1327
1328 if (radv_should_export_multiview(gs_stage, gfx_state)) {
1329 NIR_PASS(_, gs_stage->nir, radv_nir_export_multiview);
1330 }
1331
1332 if (fs_stage) {
1333 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1334
1335 radv_link_shaders(device, gs_stage, fs_stage, gfx_state);
1336 }
1337 }
1338
1339 static void
radv_link_task(const struct radv_device * device,struct radv_shader_stage * task_stage,struct radv_shader_stage * mesh_stage,const struct radv_graphics_state_key * gfx_state)1340 radv_link_task(const struct radv_device *device, struct radv_shader_stage *task_stage,
1341 struct radv_shader_stage *mesh_stage, const struct radv_graphics_state_key *gfx_state)
1342 {
1343 assert(task_stage->nir->info.stage == MESA_SHADER_TASK);
1344
1345 if (mesh_stage) {
1346 assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1347
1348 /* Linking task and mesh shaders shouldn't do anything for now but keep it for consistency. */
1349 radv_link_shaders(device, task_stage, mesh_stage, gfx_state);
1350 }
1351 }
1352
1353 static void
radv_link_mesh(const struct radv_device * device,struct radv_shader_stage * mesh_stage,struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1354 radv_link_mesh(const struct radv_device *device, struct radv_shader_stage *mesh_stage,
1355 struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1356 {
1357 assert(mesh_stage->nir->info.stage == MESA_SHADER_MESH);
1358
1359 if (fs_stage) {
1360 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1361
1362 nir_foreach_shader_in_variable (var, fs_stage->nir) {
1363 /* These variables are per-primitive when used with a mesh shader. */
1364 if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || var->data.location == VARYING_SLOT_VIEWPORT ||
1365 var->data.location == VARYING_SLOT_LAYER) {
1366 var->data.per_primitive = true;
1367 }
1368 }
1369
1370 radv_link_shaders(device, mesh_stage, fs_stage, gfx_state);
1371 }
1372
1373 /* Lower mesh shader draw ID to zero prevent app bugs from triggering undefined behaviour. */
1374 if (mesh_stage->info.ms.has_task && BITSET_TEST(mesh_stage->nir->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
1375 radv_nir_lower_draw_id_to_zero(mesh_stage->nir);
1376 }
1377
1378 static void
radv_link_fs(struct radv_shader_stage * fs_stage,const struct radv_graphics_state_key * gfx_state)1379 radv_link_fs(struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
1380 {
1381 assert(fs_stage->nir->info.stage == MESA_SHADER_FRAGMENT);
1382
1383 /* Lower the view index to map on the layer. */
1384 NIR_PASS(_, fs_stage->nir, radv_nir_lower_view_index);
1385
1386 radv_remove_color_exports(gfx_state, fs_stage->nir);
1387 }
1388
1389 static bool
radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline * pipeline,const struct radv_graphics_state_key * gfx_state)1390 radv_pipeline_needs_noop_fs(struct radv_graphics_pipeline *pipeline, const struct radv_graphics_state_key *gfx_state)
1391 {
1392 if (pipeline->base.type == RADV_PIPELINE_GRAPHICS &&
1393 !(radv_pipeline_to_graphics(&pipeline->base)->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1394 return true;
1395
1396 if (pipeline->base.type == RADV_PIPELINE_GRAPHICS_LIB &&
1397 (gfx_state->lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1398 !(radv_pipeline_to_graphics_lib(&pipeline->base)->base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT))
1399 return true;
1400
1401 return false;
1402 }
1403
1404 static void
radv_remove_varyings(nir_shader * nir)1405 radv_remove_varyings(nir_shader *nir)
1406 {
1407 /* We can't demote mesh outputs to nir_var_shader_temp yet, because
1408 * they don't support array derefs of vectors.
1409 */
1410 if (nir->info.stage == MESA_SHADER_MESH)
1411 return;
1412
1413 bool fixup_derefs = false;
1414
1415 nir_foreach_shader_out_variable (var, nir) {
1416 if (var->data.always_active_io)
1417 continue;
1418
1419 if (var->data.location < VARYING_SLOT_VAR0)
1420 continue;
1421
1422 nir->info.outputs_written &= ~BITFIELD64_BIT(var->data.location);
1423 var->data.location = 0;
1424 var->data.mode = nir_var_shader_temp;
1425 fixup_derefs = true;
1426 }
1427
1428 if (fixup_derefs) {
1429 NIR_PASS_V(nir, nir_fixup_deref_modes);
1430 NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1431 NIR_PASS(_, nir, nir_opt_dce);
1432 }
1433 }
1434
1435 static void
radv_graphics_shaders_link(const struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages)1436 radv_graphics_shaders_link(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
1437 struct radv_shader_stage *stages)
1438 {
1439 /* Walk backwards to link */
1440 struct radv_shader_stage *next_stage = NULL;
1441 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1442 gl_shader_stage s = graphics_shader_order[i];
1443 if (!stages[s].nir)
1444 continue;
1445
1446 switch (s) {
1447 case MESA_SHADER_VERTEX:
1448 radv_link_vs(device, &stages[s], next_stage, gfx_state);
1449 break;
1450 case MESA_SHADER_TESS_CTRL:
1451 radv_link_tcs(device, &stages[s], next_stage, gfx_state);
1452 break;
1453 case MESA_SHADER_TESS_EVAL:
1454 radv_link_tes(device, &stages[s], next_stage, gfx_state);
1455 break;
1456 case MESA_SHADER_GEOMETRY:
1457 radv_link_gs(device, &stages[s], next_stage, gfx_state);
1458 break;
1459 case MESA_SHADER_TASK:
1460 radv_link_task(device, &stages[s], next_stage, gfx_state);
1461 break;
1462 case MESA_SHADER_MESH:
1463 radv_link_mesh(device, &stages[s], next_stage, gfx_state);
1464 break;
1465 case MESA_SHADER_FRAGMENT:
1466 radv_link_fs(&stages[s], gfx_state);
1467 break;
1468 default:
1469 unreachable("Invalid graphics shader stage");
1470 }
1471
1472 next_stage = &stages[s];
1473 }
1474 }
1475
1476 /**
1477 * Fist pass of varying optimization.
1478 * This function is called for each shader pair from first to last.
1479 *
1480 * 1. Run some NIR passes in preparation.
1481 * 2. Optimize varyings.
1482 * 3. If either shader changed, run algebraic optimizations.
1483 */
1484 static void
radv_graphics_shaders_link_varyings_first(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1485 radv_graphics_shaders_link_varyings_first(struct radv_shader_stage *producer_stage,
1486 struct radv_shader_stage *consumer_stage)
1487 {
1488 nir_shader *producer = producer_stage->nir;
1489 nir_shader *consumer = consumer_stage->nir;
1490
1491 /* It is expected by nir_opt_varyings that no undefined stores are present in the shader. */
1492 NIR_PASS(_, producer, nir_opt_undef);
1493
1494 /* Update load/store alignments because inter-stage code motion may move instructions used to deduce this info. */
1495 NIR_PASS(_, consumer, nir_opt_load_store_update_alignments);
1496
1497 /* Scalarize all I/O, because nir_opt_varyings and nir_opt_vectorize_io expect all I/O to be scalarized. */
1498 NIR_PASS(_, producer, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
1499 NIR_PASS(_, consumer, nir_lower_io_to_scalar, nir_var_shader_in, NULL, NULL);
1500
1501 /* Eliminate useless vec->mov copies resulting from scalarization. */
1502 NIR_PASS(_, producer, nir_copy_prop);
1503
1504 const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1505
1506 /* Run algebraic optimizations on shaders that changed. */
1507 if (p & nir_progress_producer) {
1508 radv_optimize_nir_algebraic(producer, false, false);
1509 }
1510 if (p & nir_progress_consumer) {
1511 radv_optimize_nir_algebraic(consumer, false, false);
1512 }
1513 }
1514
1515 /**
1516 * Second pass of varying optimization.
1517 * This function is called for each shader pair from last to fist,
1518 * after the first pass had already been called for each pair.
1519 * Done because the previous pass might have enabled additional
1520 * opportunities for optimization.
1521 *
1522 * 1. Optimize varyings again.
1523 * 2. If either shader changed, run algebraic optimizations.
1524 * 3. Run some NIR passes to clean up the shaders.
1525 */
1526 static void
radv_graphics_shaders_link_varyings_second(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1527 radv_graphics_shaders_link_varyings_second(struct radv_shader_stage *producer_stage,
1528 struct radv_shader_stage *consumer_stage)
1529 {
1530 nir_shader *producer = producer_stage->nir;
1531 nir_shader *consumer = consumer_stage->nir;
1532
1533 const nir_opt_varyings_progress p = nir_opt_varyings(producer, consumer, true, 0, 0);
1534
1535 /* Run algebraic optimizations on shaders that changed. */
1536 if (p & nir_progress_producer) {
1537 radv_optimize_nir_algebraic(producer, true, false);
1538 }
1539 if (p & nir_progress_consumer) {
1540 radv_optimize_nir_algebraic(consumer, true, false);
1541 }
1542
1543 /* Re-vectorize I/O for stages that output to memory (LDS or VRAM).
1544 * Don't vectorize FS inputs, doing so just regresses shader stats without any benefit.
1545 * There is also no benefit from re-vectorizing the outputs of the last pre-rasterization
1546 * stage here, because ac_nir_lower_ngg/legacy already takes care of that.
1547 */
1548 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
1549 NIR_PASS(_, producer, nir_opt_vectorize_io, nir_var_shader_out);
1550 NIR_PASS(_, consumer, nir_opt_vectorize_io, nir_var_shader_in);
1551 }
1552
1553 /* Recompute driver locations of PS inputs
1554 * because the backend compiler relies on their driver locations.
1555 */
1556 if (consumer->info.stage == MESA_SHADER_FRAGMENT)
1557 nir_recompute_io_bases(consumer, nir_var_shader_in);
1558
1559 /* Gather shader info; at least the I/O info likely changed
1560 * and changes to only the I/O info are not reflected in nir_opt_varyings_progress.
1561 */
1562 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
1563 nir_shader_gather_info(consumer, nir_shader_get_entrypoint(consumer));
1564
1565 /* Recreate XFB info from intrinsics (nir_opt_varyings may have changed it). */
1566 if (producer->xfb_info) {
1567 nir_gather_xfb_info_from_intrinsics(producer);
1568 }
1569 }
1570
1571 static void
radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage * vs_stage,struct radv_shader_stage * consumer_stage)1572 radv_graphics_shaders_fill_linked_vs_io_info(struct radv_shader_stage *vs_stage,
1573 struct radv_shader_stage *consumer_stage)
1574 {
1575 const unsigned num_reserved_slots = util_bitcount64(consumer_stage->nir->info.inputs_read);
1576 vs_stage->info.vs.num_linked_outputs = num_reserved_slots;
1577 vs_stage->info.outputs_linked = true;
1578
1579 switch (consumer_stage->stage) {
1580 case MESA_SHADER_TESS_CTRL: {
1581 consumer_stage->info.tcs.num_linked_inputs = num_reserved_slots;
1582 consumer_stage->info.inputs_linked = true;
1583 break;
1584 }
1585 case MESA_SHADER_GEOMETRY: {
1586 consumer_stage->info.gs.num_linked_inputs = num_reserved_slots;
1587 consumer_stage->info.inputs_linked = true;
1588 break;
1589 }
1590 default:
1591 unreachable("invalid next stage for VS");
1592 }
1593 }
1594
1595 static void
radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage * tcs_stage,struct radv_shader_stage * tes_stage)1596 radv_graphics_shaders_fill_linked_tcs_tes_io_info(struct radv_shader_stage *tcs_stage,
1597 struct radv_shader_stage *tes_stage)
1598 {
1599 assume(tes_stage->stage == MESA_SHADER_TESS_EVAL);
1600
1601 /* Count the number of per-vertex output slots we need to reserve for the TCS and TES. */
1602 const uint64_t per_vertex_mask =
1603 tes_stage->nir->info.inputs_read & ~(VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1604 const unsigned num_reserved_slots = util_bitcount64(per_vertex_mask);
1605
1606 /* Count the number of per-patch output slots we need to reserve for the TCS and TES.
1607 * This is necessary because we need it to determine the patch size in VRAM.
1608 */
1609 const uint64_t tess_lvl_mask =
1610 tes_stage->nir->info.inputs_read & (VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER);
1611 const unsigned num_reserved_patch_slots =
1612 util_bitcount64(tess_lvl_mask) + util_bitcount64(tes_stage->nir->info.patch_inputs_read);
1613
1614 tcs_stage->info.tcs.num_linked_outputs = num_reserved_slots;
1615 tcs_stage->info.tcs.num_linked_patch_outputs = num_reserved_patch_slots;
1616 tcs_stage->info.outputs_linked = true;
1617
1618 tes_stage->info.tes.num_linked_inputs = num_reserved_slots;
1619 tes_stage->info.tes.num_linked_patch_inputs = num_reserved_patch_slots;
1620 tes_stage->info.inputs_linked = true;
1621 }
1622
1623 static void
radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage * tes_stage,struct radv_shader_stage * gs_stage)1624 radv_graphics_shaders_fill_linked_tes_gs_io_info(struct radv_shader_stage *tes_stage,
1625 struct radv_shader_stage *gs_stage)
1626 {
1627 assume(gs_stage->stage == MESA_SHADER_GEOMETRY);
1628
1629 const unsigned num_reserved_slots = util_bitcount64(gs_stage->nir->info.inputs_read);
1630 tes_stage->info.tes.num_linked_outputs = num_reserved_slots;
1631 tes_stage->info.outputs_linked = true;
1632 gs_stage->info.gs.num_linked_inputs = num_reserved_slots;
1633 gs_stage->info.inputs_linked = true;
1634 }
1635
1636 static void
radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage * producer_stage,struct radv_shader_stage * consumer_stage)1637 radv_graphics_shaders_fill_linked_io_info(struct radv_shader_stage *producer_stage,
1638 struct radv_shader_stage *consumer_stage)
1639 {
1640 /* We don't need to fill this info for the last pre-rasterization stage. */
1641 if (consumer_stage->stage == MESA_SHADER_FRAGMENT)
1642 return;
1643
1644 switch (producer_stage->stage) {
1645 case MESA_SHADER_VERTEX:
1646 radv_graphics_shaders_fill_linked_vs_io_info(producer_stage, consumer_stage);
1647 break;
1648
1649 case MESA_SHADER_TESS_CTRL:
1650 radv_graphics_shaders_fill_linked_tcs_tes_io_info(producer_stage, consumer_stage);
1651 break;
1652
1653 case MESA_SHADER_TESS_EVAL:
1654 radv_graphics_shaders_fill_linked_tes_gs_io_info(producer_stage, consumer_stage);
1655 break;
1656
1657 default:
1658 break;
1659 }
1660 }
1661
1662 /**
1663 * Varying optimizations performed on lowered shader I/O.
1664 *
1665 * We do this after lowering shader I/O because this is more effective
1666 * than running the same optimizations on I/O derefs.
1667 */
1668 static void
radv_graphics_shaders_link_varyings(struct radv_shader_stage * stages)1669 radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages)
1670 {
1671 /* Optimize varyings from first to last stage. */
1672 gl_shader_stage prev = MESA_SHADER_NONE;
1673 for (int i = 0; i < ARRAY_SIZE(graphics_shader_order); ++i) {
1674 gl_shader_stage s = graphics_shader_order[i];
1675 if (!stages[s].nir)
1676 continue;
1677
1678 if (prev != MESA_SHADER_NONE) {
1679 if (!stages[prev].key.optimisations_disabled && !stages[s].key.optimisations_disabled)
1680 radv_graphics_shaders_link_varyings_first(&stages[prev], &stages[s]);
1681 }
1682
1683 prev = s;
1684 }
1685
1686 /* Optimize varyings from last to first stage. */
1687 gl_shader_stage next = MESA_SHADER_NONE;
1688 for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; --i) {
1689 gl_shader_stage s = graphics_shader_order[i];
1690 if (!stages[s].nir)
1691 continue;
1692
1693 if (next != MESA_SHADER_NONE) {
1694 if (!stages[s].key.optimisations_disabled && !stages[next].key.optimisations_disabled)
1695 radv_graphics_shaders_link_varyings_second(&stages[s], &stages[next]);
1696
1697 radv_graphics_shaders_fill_linked_io_info(&stages[s], &stages[next]);
1698 }
1699
1700 next = s;
1701 }
1702 }
1703
1704 struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device * device,const struct radv_ps_epilog_state * state)1705 radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
1706 {
1707 const struct radv_physical_device *pdev = radv_device_physical(device);
1708 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1709 unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
1710 struct radv_ps_epilog_key key;
1711
1712 memset(&key, 0, sizeof(key));
1713 memset(key.color_map, MESA_VK_ATTACHMENT_UNUSED, sizeof(key.color_map));
1714
1715 for (unsigned i = 0; i < state->color_attachment_count; ++i) {
1716 unsigned cf;
1717 unsigned cb_idx = state->color_attachment_mappings[i];
1718 VkFormat fmt = state->color_attachment_formats[i];
1719
1720 if (fmt == VK_FORMAT_UNDEFINED || !(state->color_write_mask & (0xfu << (i * 4))) ||
1721 cb_idx == MESA_VK_ATTACHMENT_UNUSED) {
1722 cf = V_028714_SPI_SHADER_ZERO;
1723 } else {
1724 bool blend_enable = state->color_blend_enable & (0xfu << (i * 4));
1725
1726 cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
1727
1728 if (format_is_int8(fmt))
1729 is_int8 |= 1 << i;
1730 if (format_is_int10(fmt))
1731 is_int10 |= 1 << i;
1732 if (format_is_float32(fmt))
1733 is_float32 |= 1 << i;
1734 }
1735
1736 col_format |= cf << (4 * i);
1737
1738 key.color_map[i] = state->color_attachment_mappings[i];
1739 }
1740
1741 if (!(col_format & 0xf) && state->need_src_alpha & (1 << 0)) {
1742 /* When a subpass doesn't have any color attachments, write the alpha channel of MRT0 when
1743 * alpha coverage is enabled because the depth attachment needs it.
1744 */
1745 col_format |= V_028714_SPI_SHADER_32_AR;
1746 key.color_map[0] = 0;
1747 }
1748
1749 /* The output for dual source blending should have the same format as the first output. */
1750 if (state->mrt0_is_dual_src) {
1751 assert(!(col_format >> 4));
1752 col_format |= (col_format & 0xf) << 4;
1753 key.color_map[1] = 1;
1754 }
1755
1756 if (state->alpha_to_coverage_via_mrtz)
1757 z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
1758 state->alpha_to_coverage_via_mrtz);
1759
1760 key.spi_shader_col_format = col_format;
1761 key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
1762 key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
1763 key.enable_mrt_output_nan_fixup = instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
1764 key.colors_written = state->colors_written;
1765 key.mrt0_is_dual_src = state->mrt0_is_dual_src;
1766 key.export_depth = state->export_depth;
1767 key.export_stencil = state->export_stencil;
1768 key.export_sample_mask = state->export_sample_mask;
1769 key.alpha_to_coverage_via_mrtz = state->alpha_to_coverage_via_mrtz;
1770 key.spi_shader_z_format = z_format;
1771 key.alpha_to_one = state->alpha_to_one;
1772
1773 return key;
1774 }
1775
1776 static struct radv_ps_epilog_key
radv_pipeline_generate_ps_epilog_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state)1777 radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state)
1778 {
1779 struct radv_ps_epilog_state ps_epilog = {0};
1780
1781 if (state->ms && state->ms->alpha_to_coverage_enable)
1782 ps_epilog.need_src_alpha |= 0x1;
1783
1784 if (state->cb) {
1785 for (uint32_t i = 0; i < state->cb->attachment_count; i++) {
1786 VkBlendOp eqRGB = state->cb->attachments[i].color_blend_op;
1787 VkBlendFactor srcRGB = state->cb->attachments[i].src_color_blend_factor;
1788 VkBlendFactor dstRGB = state->cb->attachments[i].dst_color_blend_factor;
1789
1790 /* Ignore other blend targets if dual-source blending is enabled to prevent wrong
1791 * behaviour.
1792 */
1793 if (i > 0 && ps_epilog.mrt0_is_dual_src)
1794 continue;
1795
1796 ps_epilog.color_write_mask |= (unsigned)state->cb->attachments[i].write_mask << (4 * i);
1797 if (!((ps_epilog.color_write_mask >> (i * 4)) & 0xf))
1798 continue;
1799
1800 if (state->cb->attachments[i].blend_enable)
1801 ps_epilog.color_blend_enable |= 0xfu << (i * 4);
1802
1803 if (!((ps_epilog.color_blend_enable >> (i * 4)) & 0xf))
1804 continue;
1805
1806 if (i == 0 && radv_can_enable_dual_src(&state->cb->attachments[i])) {
1807 ps_epilog.mrt0_is_dual_src = true;
1808 }
1809
1810 if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
1811 srcRGB = VK_BLEND_FACTOR_ONE;
1812 dstRGB = VK_BLEND_FACTOR_ONE;
1813 }
1814
1815 if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
1816 srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
1817 srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
1818 ps_epilog.need_src_alpha |= 1 << i;
1819 }
1820 }
1821
1822 if (state->rp) {
1823 ps_epilog.color_attachment_count = state->rp->color_attachment_count;
1824
1825 for (uint32_t i = 0; i < ps_epilog.color_attachment_count; i++) {
1826 ps_epilog.color_attachment_formats[i] = state->rp->color_attachment_formats[i];
1827 }
1828 }
1829
1830 if (state->ms)
1831 ps_epilog.alpha_to_one = state->ms->alpha_to_one_enable;
1832
1833 for (uint32_t i = 0; i < MAX_RTS; i++) {
1834 ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i;
1835 }
1836
1837 return radv_generate_ps_epilog_key(device, &ps_epilog);
1838 }
1839
1840 static struct radv_graphics_state_key
radv_generate_graphics_state_key(const struct radv_device * device,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)1841 radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
1842 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
1843 {
1844 const struct radv_physical_device *pdev = radv_device_physical(device);
1845 struct radv_graphics_state_key key;
1846
1847 memset(&key, 0, sizeof(key));
1848
1849 key.lib_flags = lib_flags;
1850 key.has_multiview_view_index = state->rp ? !!state->rp->view_mask : 0;
1851
1852 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI)) {
1853 key.vs.has_prolog = true;
1854 }
1855
1856 /* Compile the pre-rasterization stages only when the vertex input interface is missing. */
1857 if ((state->shader_stages && VK_SHADER_STAGE_VERTEX_BIT) && !state->vi) {
1858 key.vs.has_prolog = true;
1859 }
1860
1861 /* Vertex input state */
1862 if (state->vi) {
1863 u_foreach_bit (i, state->vi->attributes_valid) {
1864 uint32_t binding = state->vi->attributes[i].binding;
1865 uint32_t offset = state->vi->attributes[i].offset;
1866 enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
1867
1868 key.vi.vertex_attribute_formats[i] = format;
1869 key.vi.vertex_attribute_bindings[i] = binding;
1870 key.vi.vertex_attribute_offsets[i] = offset;
1871 key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
1872
1873 /* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */
1874 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) {
1875 /* From the Vulkan spec 1.2.157:
1876 *
1877 * "If the bound pipeline state object was created with the
1878 * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE dynamic state enabled then pStrides[i]
1879 * specifies the distance in bytes between two consecutive elements within the
1880 * corresponding buffer. In this case the VkVertexInputBindingDescription::stride state
1881 * from the pipeline state object is ignored."
1882 *
1883 * Make sure the vertex attribute stride is zero to avoid computing a wrong offset if
1884 * it's initialized to something else than zero.
1885 */
1886 key.vi.vertex_attribute_strides[i] = state->vi->bindings[binding].stride;
1887 }
1888
1889 if (state->vi->bindings[binding].input_rate) {
1890 key.vi.instance_rate_inputs |= 1u << i;
1891 }
1892
1893 const struct ac_vtx_format_info *vtx_info =
1894 ac_get_vtx_format_info(pdev->info.gfx_level, pdev->info.family, format);
1895 unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
1896
1897 /* If offset is misaligned, then the buffer offset must be too. Just skip updating
1898 * vertex_binding_align in this case.
1899 */
1900 if (offset % attrib_align == 0) {
1901 key.vi.vertex_binding_align[binding] = MAX2(key.vi.vertex_binding_align[binding], attrib_align);
1902 }
1903 }
1904 }
1905
1906 if (state->ts)
1907 key.ts.patch_control_points = state->ts->patch_control_points;
1908
1909 if (state->ms) {
1910 key.ms.sample_shading_enable = state->ms->sample_shading_enable;
1911 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) &&
1912 state->ms->rasterization_samples > 1) {
1913 key.ms.rasterization_samples = state->ms->rasterization_samples;
1914 }
1915 }
1916
1917 if (pdev->info.gfx_level >= GFX11 && state->ms) {
1918 key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable;
1919 }
1920
1921 if (state->ia) {
1922 key.ia.topology = radv_translate_prim(state->ia->primitive_topology);
1923 }
1924
1925 if (!state->vi || !(state->shader_stages & (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
1926 VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT))) {
1927 key.unknown_rast_prim = true;
1928 }
1929
1930 if (pdev->info.gfx_level >= GFX10 && state->rs) {
1931 key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
1932 }
1933
1934 key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
1935
1936 if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) &&
1937 (pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
1938 key.adjust_frag_coord_z = true;
1939
1940 if (radv_pipeline_needs_ps_epilog(state, lib_flags))
1941 key.ps.has_epilog = true;
1942
1943 key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
1944
1945 if (pdev->info.gfx_level >= GFX11) {
1946 /* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also
1947 * exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic
1948 * rendering), it's not possible to know the info at compile time and MRTZ needs to be
1949 * exported in the epilog.
1950 */
1951 key.ps.exports_mrtz_via_epilog =
1952 key.ps.has_epilog && (!state->ms || BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE));
1953 }
1954
1955 key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1956 (!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
1957
1958 if (pdev->use_ngg) {
1959 VkShaderStageFlags ngg_stage;
1960
1961 if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
1962 ngg_stage = VK_SHADER_STAGE_GEOMETRY_BIT;
1963 } else if (state->shader_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1964 ngg_stage = VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1965 } else {
1966 ngg_stage = VK_SHADER_STAGE_VERTEX_BIT;
1967 }
1968
1969 key.dynamic_provoking_vtx_mode =
1970 BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX) &&
1971 (ngg_stage == VK_SHADER_STAGE_VERTEX_BIT || ngg_stage == VK_SHADER_STAGE_GEOMETRY_BIT);
1972 }
1973
1974 if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) && state->ia &&
1975 state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST &&
1976 !BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_POLYGON_MODE) && state->rs &&
1977 state->rs->polygon_mode != VK_POLYGON_MODE_POINT) {
1978 key.enable_remove_point_size = true;
1979 }
1980
1981 if (device->vk.enabled_features.smoothLines) {
1982 /* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time.
1983 * This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also
1984 * simplifies the implementation.
1985 */
1986 if (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1987 (state->rs && state->rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR))
1988 key.dynamic_line_rast_mode = true;
1989
1990 /* For GPL, when the fragment shader is compiled without any pre-rasterization information,
1991 * ensure the line rasterization mode is considered dynamic because we can't know if it's
1992 * going to draw lines or not.
1993 */
1994 key.dynamic_line_rast_mode |= !!(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) &&
1995 !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
1996 }
1997
1998 return key;
1999 }
2000
2001 static struct radv_graphics_pipeline_key
radv_generate_graphics_pipeline_key(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)2002 radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2003 const struct vk_graphics_pipeline_state *state,
2004 VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
2005 {
2006 VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2007 struct radv_graphics_pipeline_key key = {0};
2008
2009 key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags);
2010
2011 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2012 const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
2013 gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
2014
2015 key.stage_info[s] = radv_pipeline_get_shader_key(device, stage, create_flags, pCreateInfo->pNext);
2016
2017 if (s == MESA_SHADER_MESH && (state->shader_stages & VK_SHADER_STAGE_TASK_BIT_EXT))
2018 key.stage_info[s].has_task_shader = true;
2019 }
2020
2021 return key;
2022 }
2023
2024 static void
radv_fill_shader_info_ngg(struct radv_device * device,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2025 radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
2026 VkShaderStageFlagBits active_nir_stages)
2027 {
2028 const struct radv_physical_device *pdev = radv_device_physical(device);
2029 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2030
2031 if (!pdev->cache_key.use_ngg)
2032 return;
2033
2034 if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
2035 stages[MESA_SHADER_VERTEX].info.is_ngg = true;
2036 } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2037 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = true;
2038 } else if (stages[MESA_SHADER_MESH].nir) {
2039 stages[MESA_SHADER_MESH].info.is_ngg = true;
2040 }
2041
2042 if (pdev->info.gfx_level >= GFX11) {
2043 if (stages[MESA_SHADER_GEOMETRY].nir)
2044 stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
2045 } else {
2046 /* GFX10/GFX10.3 can't always enable NGG due to HW bugs/limitations. */
2047 if (stages[MESA_SHADER_TESS_EVAL].nir && stages[MESA_SHADER_GEOMETRY].nir &&
2048 stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations *
2049 stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out >
2050 256) {
2051 /* Fallback to the legacy path if tessellation is
2052 * enabled with extreme geometry because
2053 * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
2054 * might hang.
2055 */
2056 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2057 }
2058
2059 struct radv_shader_stage *last_vgt_stage = NULL;
2060 radv_foreach_stage(i, active_nir_stages)
2061 {
2062 if (radv_is_last_vgt_stage(&stages[i])) {
2063 last_vgt_stage = &stages[i];
2064 }
2065 }
2066
2067 if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) ||
2068 ((instance->debug_flags & RADV_DEBUG_NO_NGG_GS) && stages[MESA_SHADER_GEOMETRY].nir)) {
2069 /* NGG needs to be disabled on GFX10/GFX10.3 when:
2070 * - streamout is used because NGG streamout isn't supported
2071 * - NGG GS is explictly disabled to workaround performance issues
2072 */
2073 if (stages[MESA_SHADER_TESS_EVAL].nir)
2074 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2075 else
2076 stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2077 }
2078
2079 if (stages[MESA_SHADER_GEOMETRY].nir) {
2080 if (stages[MESA_SHADER_TESS_EVAL].nir)
2081 stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_TESS_EVAL].info.is_ngg;
2082 else
2083 stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[MESA_SHADER_VERTEX].info.is_ngg;
2084 }
2085
2086 /* When pre-rasterization stages are compiled separately with shader objects, NGG GS needs to
2087 * be disabled because if the next stage of VS/TES is GS and GS is unknown, it might use
2088 * streamout but it's not possible to know that when compiling VS or TES only.
2089 */
2090 if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage == MESA_SHADER_GEOMETRY &&
2091 !stages[MESA_SHADER_GEOMETRY].nir) {
2092 stages[MESA_SHADER_VERTEX].info.is_ngg = false;
2093 } else if (stages[MESA_SHADER_TESS_EVAL].nir &&
2094 stages[MESA_SHADER_TESS_EVAL].info.next_stage == MESA_SHADER_GEOMETRY &&
2095 !stages[MESA_SHADER_GEOMETRY].nir) {
2096 stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false;
2097 } else if (stages[MESA_SHADER_GEOMETRY].nir &&
2098 (!stages[MESA_SHADER_VERTEX].nir && !stages[MESA_SHADER_TESS_EVAL].nir)) {
2099 stages[MESA_SHADER_GEOMETRY].info.is_ngg = false;
2100 }
2101 }
2102 }
2103
2104 static bool
radv_consider_force_vrs(const struct radv_graphics_state_key * gfx_state,const struct radv_shader_stage * last_vgt_stage,const struct radv_shader_stage * fs_stage)2105 radv_consider_force_vrs(const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage *last_vgt_stage,
2106 const struct radv_shader_stage *fs_stage)
2107 {
2108 if (!gfx_state->ps.force_vrs_enabled)
2109 return false;
2110
2111 /* Mesh shaders aren't considered. */
2112 if (last_vgt_stage->info.stage == MESA_SHADER_MESH)
2113 return false;
2114
2115 if (last_vgt_stage->nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE))
2116 return false;
2117
2118 /* VRS has no effect if there is no pixel shader. */
2119 if (last_vgt_stage->info.next_stage == MESA_SHADER_NONE)
2120 return false;
2121
2122 /* Do not enable if the PS uses gl_FragCoord because it breaks postprocessing in some games, or with Primitive
2123 * Ordered Pixel Shading (regardless of whether per-pixel data is addressed with gl_FragCoord or a custom
2124 * interpolator) as that'd result in races between adjacent primitives with no common fine pixels.
2125 */
2126 nir_shader *fs_shader = fs_stage->nir;
2127 if (fs_shader && (BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
2128 fs_shader->info.fs.sample_interlock_ordered || fs_shader->info.fs.sample_interlock_unordered ||
2129 fs_shader->info.fs.pixel_interlock_ordered || fs_shader->info.fs.pixel_interlock_unordered)) {
2130 return false;
2131 }
2132
2133 return true;
2134 }
2135
2136 static gl_shader_stage
radv_get_next_stage(gl_shader_stage stage,VkShaderStageFlagBits active_nir_stages)2137 radv_get_next_stage(gl_shader_stage stage, VkShaderStageFlagBits active_nir_stages)
2138 {
2139 switch (stage) {
2140 case MESA_SHADER_VERTEX:
2141 if (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2142 return MESA_SHADER_TESS_CTRL;
2143 } else if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2144 return MESA_SHADER_GEOMETRY;
2145 } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2146 return MESA_SHADER_FRAGMENT;
2147 } else {
2148 return MESA_SHADER_NONE;
2149 }
2150 case MESA_SHADER_TESS_CTRL:
2151 return MESA_SHADER_TESS_EVAL;
2152 case MESA_SHADER_TESS_EVAL:
2153 if (active_nir_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2154 return MESA_SHADER_GEOMETRY;
2155 } else if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2156 return MESA_SHADER_FRAGMENT;
2157 } else {
2158 return MESA_SHADER_NONE;
2159 }
2160 case MESA_SHADER_GEOMETRY:
2161 case MESA_SHADER_MESH:
2162 if (active_nir_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
2163 return MESA_SHADER_FRAGMENT;
2164 } else {
2165 return MESA_SHADER_NONE;
2166 }
2167 case MESA_SHADER_TASK:
2168 return MESA_SHADER_MESH;
2169 case MESA_SHADER_FRAGMENT:
2170 return MESA_SHADER_NONE;
2171 default:
2172 unreachable("invalid graphics shader stage");
2173 }
2174 }
2175
2176 static void
radv_fill_shader_info(struct radv_device * device,const enum radv_pipeline_type pipeline_type,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stages,VkShaderStageFlagBits active_nir_stages)2177 radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2178 const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages,
2179 VkShaderStageFlagBits active_nir_stages)
2180 {
2181 radv_foreach_stage(i, active_nir_stages)
2182 {
2183 bool consider_force_vrs = false;
2184
2185 if (radv_is_last_vgt_stage(&stages[i])) {
2186 consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
2187 }
2188
2189 radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type,
2190 consider_force_vrs, &stages[i].info);
2191 }
2192
2193 radv_nir_shader_info_link(device, gfx_state, stages);
2194 }
2195
2196 static void
radv_declare_pipeline_args(struct radv_device * device,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,VkShaderStageFlagBits active_nir_stages)2197 radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
2198 const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
2199 {
2200 const struct radv_physical_device *pdev = radv_device_physical(device);
2201 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
2202
2203 if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
2204 radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
2205 MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args);
2206 stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
2207 stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
2208 stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask;
2209
2210 stages[MESA_SHADER_VERTEX].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs;
2211 stages[MESA_SHADER_VERTEX].info.inline_push_constant_mask =
2212 stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask;
2213 stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args;
2214
2215 active_nir_stages &= ~(1 << MESA_SHADER_VERTEX);
2216 active_nir_stages &= ~(1 << MESA_SHADER_TESS_CTRL);
2217 }
2218
2219 if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
2220 gl_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
2221 radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage,
2222 &stages[MESA_SHADER_GEOMETRY].args);
2223 stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
2224 stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
2225 stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
2226
2227 stages[pre_stage].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs;
2228 stages[pre_stage].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask;
2229 stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args;
2230 active_nir_stages &= ~(1 << pre_stage);
2231 active_nir_stages &= ~(1 << MESA_SHADER_GEOMETRY);
2232 }
2233
2234 u_foreach_bit (i, active_nir_stages) {
2235 radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args);
2236 stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
2237 stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
2238 }
2239 }
2240
2241 static struct radv_shader *
radv_create_gs_copy_shader(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * gs_stage,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,struct radv_shader_binary ** gs_copy_binary)2242 radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache,
2243 struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state,
2244 bool keep_executable_info, bool keep_statistic_info,
2245 struct radv_shader_binary **gs_copy_binary)
2246 {
2247 const struct radv_physical_device *pdev = radv_device_physical(device);
2248 struct radv_instance *instance = radv_physical_device_instance(pdev);
2249
2250 const struct radv_shader_info *gs_info = &gs_stage->info;
2251 ac_nir_gs_output_info output_info = {
2252 .streams = gs_info->gs.output_streams,
2253 .usage_mask = gs_info->gs.output_usage_mask,
2254 };
2255 nir_shader *nir = ac_nir_create_gs_copy_shader(
2256 gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
2257 gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
2258 gs_info->force_vrs_per_vertex, &output_info);
2259
2260 nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
2261 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2262
2263 struct radv_shader_stage gs_copy_stage = {
2264 .stage = MESA_SHADER_VERTEX,
2265 .shader_sha1 = {0},
2266 .key =
2267 {
2268 .optimisations_disabled = gs_stage->key.optimisations_disabled,
2269 },
2270 };
2271 radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info);
2272 radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false,
2273 &gs_copy_stage.info);
2274 gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */
2275 gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
2276 gs_copy_stage.info.so = gs_info->so;
2277 gs_copy_stage.info.outinfo = gs_info->outinfo;
2278 gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
2279 gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY;
2280
2281 radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
2282 &gs_copy_stage.args);
2283 gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
2284 gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
2285
2286 NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac);
2287 NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
2288
2289 struct radv_graphics_pipeline_key key = {0};
2290 bool dump_shader = radv_can_dump_shader(device, nir, true);
2291
2292 if (dump_shader)
2293 simple_mtx_lock(&instance->shader_dump_mtx);
2294
2295 *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info,
2296 keep_statistic_info);
2297 struct radv_shader *copy_shader =
2298 radv_shader_create(device, cache, *gs_copy_binary, keep_executable_info || dump_shader);
2299 if (copy_shader)
2300 radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, *gs_copy_binary, copy_shader, &nir, 1,
2301 &gs_copy_stage.info);
2302
2303 if (dump_shader)
2304 simple_mtx_unlock(&instance->shader_dump_mtx);
2305
2306 return copy_shader;
2307 }
2308
2309 static void
radv_graphics_shaders_nir_to_asm(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,VkShaderStageFlagBits active_nir_stages,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2310 radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
2311 struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2312 bool keep_executable_info, bool keep_statistic_info,
2313 VkShaderStageFlagBits active_nir_stages, struct radv_shader **shaders,
2314 struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
2315 struct radv_shader_binary **gs_copy_binary)
2316 {
2317 const struct radv_physical_device *pdev = radv_device_physical(device);
2318 struct radv_instance *instance = radv_physical_device_instance(pdev);
2319
2320 for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
2321 if (!(active_nir_stages & (1 << s)))
2322 continue;
2323
2324 nir_shader *nir_shaders[2] = {stages[s].nir, NULL};
2325 unsigned shader_count = 1;
2326
2327 /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2328 if (pdev->info.gfx_level >= GFX9 &&
2329 ((s == MESA_SHADER_GEOMETRY &&
2330 (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
2331 (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
2332 gl_shader_stage pre_stage;
2333
2334 if (s == MESA_SHADER_GEOMETRY && (active_nir_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {
2335 pre_stage = MESA_SHADER_TESS_EVAL;
2336 } else {
2337 pre_stage = MESA_SHADER_VERTEX;
2338 }
2339
2340 nir_shaders[0] = stages[pre_stage].nir;
2341 nir_shaders[1] = stages[s].nir;
2342 shader_count = 2;
2343 }
2344
2345 int64_t stage_start = os_time_get_nano();
2346
2347 bool dump_shader = radv_can_dump_shader(device, nir_shaders[0], false);
2348
2349 if (dump_shader) {
2350 simple_mtx_lock(&instance->shader_dump_mtx);
2351 for (uint32_t i = 0; i < shader_count; i++)
2352 nir_print_shader(nir_shaders[i], stderr);
2353 }
2354
2355 binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
2356 keep_executable_info, keep_statistic_info);
2357 shaders[s] = radv_shader_create(device, cache, binaries[s], keep_executable_info || dump_shader);
2358 radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, binaries[s], shaders[s], nir_shaders,
2359 shader_count, &stages[s].info);
2360
2361 if (dump_shader)
2362 simple_mtx_unlock(&instance->shader_dump_mtx);
2363
2364 if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
2365 *gs_copy_shader = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
2366 keep_executable_info, keep_statistic_info, gs_copy_binary);
2367 }
2368
2369 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2370
2371 active_nir_stages &= ~(1 << nir_shaders[0]->info.stage);
2372 if (nir_shaders[1])
2373 active_nir_stages &= ~(1 << nir_shaders[1]->info.stage);
2374 }
2375 }
2376
2377 static void
radv_pipeline_retain_shaders(struct radv_retained_shaders * retained_shaders,struct radv_shader_stage * stages)2378 radv_pipeline_retain_shaders(struct radv_retained_shaders *retained_shaders, struct radv_shader_stage *stages)
2379 {
2380 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2381 if (!stages[s].entrypoint)
2382 continue;
2383
2384 int64_t stage_start = os_time_get_nano();
2385
2386 /* Serialize the NIR shader to reduce memory pressure. */
2387 struct blob blob;
2388
2389 blob_init(&blob);
2390 nir_serialize(&blob, stages[s].nir, true);
2391 blob_finish_get_buffer(&blob, &retained_shaders->stages[s].serialized_nir,
2392 &retained_shaders->stages[s].serialized_nir_size);
2393
2394 memcpy(retained_shaders->stages[s].shader_sha1, stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2395 memcpy(&retained_shaders->stages[s].key, &stages[s].key, sizeof(stages[s].key));
2396
2397 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2398 }
2399 }
2400
2401 static void
radv_pipeline_import_retained_shaders(const struct radv_device * device,struct radv_graphics_lib_pipeline * lib,struct radv_shader_stage * stages)2402 radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_lib_pipeline *lib,
2403 struct radv_shader_stage *stages)
2404 {
2405 const struct radv_physical_device *pdev = radv_device_physical(device);
2406 struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
2407
2408 /* Import the stages (SPIR-V only in case of cache hits). */
2409 for (uint32_t i = 0; i < lib->stage_count; i++) {
2410 const VkPipelineShaderStageCreateInfo *sinfo = &lib->stages[i];
2411 gl_shader_stage s = vk_to_mesa_shader_stage(sinfo->stage);
2412
2413 radv_pipeline_stage_init(lib->base.base.create_flags, sinfo,
2414 &lib->layout, &lib->stage_keys[s], &stages[s]);
2415 }
2416
2417 /* Import the NIR shaders (after SPIRV->NIR). */
2418 for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.base.shaders); s++) {
2419 if (!retained_shaders->stages[s].serialized_nir_size)
2420 continue;
2421
2422 int64_t stage_start = os_time_get_nano();
2423
2424 /* Deserialize the NIR shader. */
2425 const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
2426 struct blob_reader blob_reader;
2427 blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
2428 retained_shaders->stages[s].serialized_nir_size);
2429
2430 stages[s].stage = s;
2431 stages[s].nir = nir_deserialize(NULL, options, &blob_reader);
2432 stages[s].entrypoint = nir_shader_get_entrypoint(stages[s].nir)->function->name;
2433 memcpy(stages[s].shader_sha1, retained_shaders->stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
2434 memcpy(&stages[s].key, &retained_shaders->stages[s].key, sizeof(stages[s].key));
2435
2436 radv_shader_layout_init(&lib->layout, s, &stages[s].layout);
2437
2438 stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2439
2440 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2441 }
2442 }
2443
2444 static void
radv_pipeline_load_retained_shaders(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_shader_stage * stages)2445 radv_pipeline_load_retained_shaders(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2446 struct radv_shader_stage *stages)
2447 {
2448 const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2449 const VkPipelineLibraryCreateInfoKHR *libs_info =
2450 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2451
2452 /* Nothing to load if no libs are imported. */
2453 if (!libs_info)
2454 return;
2455
2456 /* Nothing to load if fast-linking is enabled and if there is no retained shaders. */
2457 if (radv_should_import_lib_binaries(create_flags))
2458 return;
2459
2460 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2461 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2462 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2463
2464 radv_pipeline_import_retained_shaders(device, gfx_pipeline_lib, stages);
2465 }
2466 }
2467
2468 static unsigned
radv_get_rasterization_prim(const struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state)2469 radv_get_rasterization_prim(const struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state)
2470 {
2471 unsigned rast_prim;
2472
2473 if (gfx_state->unknown_rast_prim)
2474 return -1;
2475
2476 if (stages[MESA_SHADER_GEOMETRY].nir) {
2477 rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_GEOMETRY].nir->info.gs.output_primitive);
2478 } else if (stages[MESA_SHADER_TESS_EVAL].nir) {
2479 if (stages[MESA_SHADER_TESS_EVAL].nir->info.tess.point_mode) {
2480 rast_prim = V_028A6C_POINTLIST;
2481 } else {
2482 rast_prim = radv_conv_tess_prim_to_gs_out(stages[MESA_SHADER_TESS_EVAL].nir->info.tess._primitive_mode);
2483 }
2484 } else if (stages[MESA_SHADER_MESH].nir) {
2485 rast_prim = radv_conv_gl_prim_to_gs_out(stages[MESA_SHADER_MESH].nir->info.mesh.primitive_type);
2486 } else {
2487 rast_prim = radv_conv_prim_to_gs_out(gfx_state->ia.topology, false);
2488 }
2489
2490 return rast_prim;
2491 }
2492
2493 static bool
radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo * pCreateInfo)2494 radv_is_fast_linking_enabled(const VkGraphicsPipelineCreateInfo *pCreateInfo)
2495 {
2496 const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2497 const VkPipelineLibraryCreateInfoKHR *libs_info =
2498 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2499
2500 if (!libs_info)
2501 return false;
2502
2503 return !(create_flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT);
2504 }
2505
2506 static bool
radv_skip_graphics_pipeline_compile(const struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo)2507 radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo)
2508 {
2509 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
2510 const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2511 const struct radv_physical_device *pdev = radv_device_physical(device);
2512 VkShaderStageFlagBits binary_stages = 0;
2513 VkShaderStageFlags active_stages = 0;
2514
2515 /* No compilation when pipeline binaries are imported. */
2516 if (binary_info && binary_info->binaryCount > 0)
2517 return true;
2518
2519 /* Do not skip for libraries. */
2520 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)
2521 return false;
2522
2523 /* Do not skip when fast-linking isn't enabled. */
2524 if (!radv_is_fast_linking_enabled(pCreateInfo))
2525 return false;
2526
2527 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2528 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2529 active_stages |= sinfo->stage;
2530 }
2531
2532 const VkPipelineLibraryCreateInfoKHR *libs_info =
2533 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2534 if (libs_info) {
2535 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2536 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2537 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2538
2539 assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
2540
2541 active_stages |= gfx_pipeline_lib->base.active_stages;
2542
2543 for (uint32_t s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2544 if (!gfx_pipeline_lib->base.base.shaders[i])
2545 continue;
2546
2547 binary_stages |= mesa_to_vk_shader_stage(i);
2548 }
2549 }
2550 }
2551
2552 if (pdev->info.gfx_level >= GFX9) {
2553 /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
2554 if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2555 binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2556 }
2557
2558 if (binary_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
2559 if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
2560 binary_stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
2561 } else {
2562 binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
2563 }
2564 }
2565 }
2566
2567 /* Only skip compilation when all binaries have been imported. */
2568 return binary_stages == active_stages;
2569 }
2570
2571 void
radv_graphics_shaders_compile(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * stages,const struct radv_graphics_state_key * gfx_state,bool keep_executable_info,bool keep_statistic_info,bool is_internal,struct radv_retained_shaders * retained_shaders,bool noop_fs,struct radv_shader ** shaders,struct radv_shader_binary ** binaries,struct radv_shader ** gs_copy_shader,struct radv_shader_binary ** gs_copy_binary)2572 radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
2573 struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
2574 bool keep_executable_info, bool keep_statistic_info, bool is_internal,
2575 struct radv_retained_shaders *retained_shaders, bool noop_fs,
2576 struct radv_shader **shaders, struct radv_shader_binary **binaries,
2577 struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
2578 {
2579 const struct radv_physical_device *pdev = radv_device_physical(device);
2580 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2581 const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
2582 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2583 if (!stages[s].entrypoint)
2584 continue;
2585
2586 int64_t stage_start = os_time_get_nano();
2587
2588 /* NIR might already have been imported from a library. */
2589 if (!stages[s].nir) {
2590 struct radv_spirv_to_nir_options options = {
2591 .lower_view_index_to_zero = !gfx_state->has_multiview_view_index,
2592 .fix_dual_src_mrt1_export =
2593 gfx_state->ps.epilog.mrt0_is_dual_src && instance->drirc.dual_color_blend_by_location,
2594 .lower_view_index_to_device_index = stages[s].key.view_index_from_device_index,
2595 };
2596 blake3_hash key;
2597
2598 if (nir_cache) {
2599 radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
2600 stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
2601 }
2602 if (!stages[s].nir) {
2603 stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
2604 if (nir_cache)
2605 radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
2606 }
2607 }
2608
2609 stages[s].feedback.duration += os_time_get_nano() - stage_start;
2610 }
2611
2612 if (retained_shaders) {
2613 radv_pipeline_retain_shaders(retained_shaders, stages);
2614 }
2615
2616 VkShaderStageFlagBits active_nir_stages = 0;
2617 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2618 if (stages[i].nir)
2619 active_nir_stages |= mesa_to_vk_shader_stage(i);
2620 }
2621
2622 if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
2623 BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
2624 nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
2625 nir_shader *task = stages[MESA_SHADER_TASK].nir;
2626
2627 /* Mesh shaders only have a 1D "vertex index" which we use
2628 * as "workgroup index" to emulate the 3D workgroup ID.
2629 */
2630 nir_lower_compute_system_values_options o = {
2631 .lower_workgroup_id_to_index = true,
2632 .shortcut_1d_workgroup_id = true,
2633 .num_workgroups[0] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[0] : 0,
2634 .num_workgroups[1] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[1] : 0,
2635 .num_workgroups[2] = task ? task->info.mesh.ts_mesh_dispatch_dimensions[2] : 0,
2636 };
2637
2638 NIR_PASS(_, mesh, nir_lower_compute_system_values, &o);
2639 }
2640
2641 radv_foreach_stage(i, active_nir_stages)
2642 {
2643 gl_shader_stage next_stage;
2644
2645 if (stages[i].next_stage != MESA_SHADER_NONE) {
2646 next_stage = stages[i].next_stage;
2647 } else {
2648 next_stage = radv_get_next_stage(i, active_nir_stages);
2649 }
2650
2651 radv_nir_shader_info_init(i, next_stage, &stages[i].info);
2652 }
2653
2654 /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
2655 radv_fill_shader_info_ngg(device, stages, active_nir_stages);
2656
2657 if (stages[MESA_SHADER_GEOMETRY].nir) {
2658 unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
2659
2660 if (stages[MESA_SHADER_GEOMETRY].info.is_ngg) {
2661 nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
2662 nir_lower_gs_intrinsics_count_vertices_per_primitive |
2663 nir_lower_gs_intrinsics_overwrite_incomplete;
2664 }
2665
2666 NIR_PASS(_, stages[MESA_SHADER_GEOMETRY].nir, nir_lower_gs_intrinsics, nir_gs_flags);
2667 }
2668
2669 /* Remove all varyings when the fragment shader is a noop. */
2670 if (noop_fs) {
2671 radv_foreach_stage(i, active_nir_stages)
2672 {
2673 if (radv_is_last_vgt_stage(&stages[i])) {
2674 radv_remove_varyings(stages[i].nir);
2675 break;
2676 }
2677 }
2678 }
2679
2680 radv_graphics_shaders_link(device, gfx_state, stages);
2681
2682 if (stages[MESA_SHADER_FRAGMENT].nir) {
2683 unsigned rast_prim = radv_get_rasterization_prim(stages, gfx_state);
2684
2685 NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_lower_fs_barycentric, gfx_state, rast_prim);
2686 }
2687
2688 radv_foreach_stage(i, active_nir_stages)
2689 {
2690 int64_t stage_start = os_time_get_nano();
2691
2692 radv_optimize_nir(stages[i].nir, stages[i].key.optimisations_disabled);
2693
2694 /* Gather info again, information such as outputs_read can be out-of-date. */
2695 nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir));
2696 radv_nir_lower_io(device, stages[i].nir);
2697
2698 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2699 }
2700
2701 if (stages[MESA_SHADER_FRAGMENT].nir) {
2702 radv_nir_lower_poly_line_smooth(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2703
2704 if (!gfx_state->ps.has_epilog)
2705 radv_nir_remap_color_attachment(stages[MESA_SHADER_FRAGMENT].nir, gfx_state);
2706 }
2707
2708 /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
2709 radv_graphics_shaders_link_varyings(stages);
2710
2711 radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
2712
2713 radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages);
2714
2715 radv_foreach_stage(i, active_nir_stages)
2716 {
2717 int64_t stage_start = os_time_get_nano();
2718
2719 radv_postprocess_nir(device, gfx_state, &stages[i]);
2720
2721 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2722 }
2723
2724 /* Compile NIR shaders to AMD assembly. */
2725 radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
2726 active_nir_stages, shaders, binaries, gs_copy_shader, gs_copy_binary);
2727
2728 if (keep_executable_info) {
2729 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
2730 struct radv_shader *shader = shaders[i];
2731 if (!shader)
2732 continue;
2733
2734 if (!stages[i].spirv.size)
2735 continue;
2736
2737 shader->spirv = malloc(stages[i].spirv.size);
2738 memcpy(shader->spirv, stages[i].spirv.data, stages[i].spirv.size);
2739 shader->spirv_size = stages[i].spirv.size;
2740 }
2741 }
2742 }
2743
2744 static bool
radv_should_compute_pipeline_hash(const struct radv_device * device,const enum radv_pipeline_type pipeline_type,bool fast_linking_enabled)2745 radv_should_compute_pipeline_hash(const struct radv_device *device, const enum radv_pipeline_type pipeline_type,
2746 bool fast_linking_enabled)
2747 {
2748 const struct radv_physical_device *pdev = radv_device_physical(device);
2749 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2750
2751 /* Skip computing the pipeline hash when GPL fast-linking is enabled because these shaders aren't
2752 * supposed to be cached and computing the hash is costly. Though, make sure it's always computed
2753 * when RGP is enabled, otherwise ISA isn't reported.
2754 */
2755 return !fast_linking_enabled ||
2756 ((instance->vk.trace_mode & RADV_TRACE_MODE_RGP) && pipeline_type == RADV_PIPELINE_GRAPHICS);
2757 }
2758
2759 void
radv_graphics_pipeline_state_finish(struct radv_device * device,struct radv_graphics_pipeline_state * gfx_state)2760 radv_graphics_pipeline_state_finish(struct radv_device *device, struct radv_graphics_pipeline_state *gfx_state)
2761 {
2762 radv_pipeline_layout_finish(device, &gfx_state->layout);
2763 vk_free(&device->vk.alloc, gfx_state->vk_data);
2764
2765 if (gfx_state->stages) {
2766 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++)
2767 ralloc_free(gfx_state->stages[i].nir);
2768 free(gfx_state->stages);
2769 }
2770 }
2771
2772 VkResult
radv_generate_graphics_pipeline_state(struct radv_device * device,const VkGraphicsPipelineCreateInfo * pCreateInfo,struct radv_graphics_pipeline_state * gfx_state)2773 radv_generate_graphics_pipeline_state(struct radv_device *device, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2774 struct radv_graphics_pipeline_state *gfx_state)
2775 {
2776 VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
2777 const VkPipelineCreateFlags2KHR create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
2778 const bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
2779 enum radv_pipeline_type pipeline_type = RADV_PIPELINE_GRAPHICS;
2780 VkResult result;
2781
2782 memset(gfx_state, 0, sizeof(*gfx_state));
2783
2784 VkGraphicsPipelineLibraryFlagBitsEXT needed_lib_flags = ALL_GRAPHICS_LIB_FLAGS;
2785 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
2786 const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
2787 vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
2788 needed_lib_flags = lib_info ? lib_info->flags : 0;
2789 pipeline_type = RADV_PIPELINE_GRAPHICS_LIB;
2790 }
2791
2792 radv_pipeline_layout_init(device, &gfx_state->layout, false);
2793
2794 /* If we have libraries, import them first. */
2795 const VkPipelineLibraryCreateInfoKHR *libs_info =
2796 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
2797 if (libs_info) {
2798 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
2799 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
2800 const struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
2801
2802 vk_graphics_pipeline_state_merge(&gfx_state->vk, &gfx_pipeline_lib->graphics_state);
2803
2804 radv_graphics_pipeline_import_layout(&gfx_state->layout, &gfx_pipeline_lib->layout);
2805
2806 needed_lib_flags &= ~gfx_pipeline_lib->lib_flags;
2807 }
2808 }
2809
2810 result = vk_graphics_pipeline_state_fill(&device->vk, &gfx_state->vk, pCreateInfo, NULL, 0, NULL, NULL,
2811 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &gfx_state->vk_data);
2812 if (result != VK_SUCCESS)
2813 goto fail;
2814
2815 if (pipeline_layout)
2816 radv_graphics_pipeline_import_layout(&gfx_state->layout, pipeline_layout);
2817
2818 if (radv_should_compute_pipeline_hash(device, pipeline_type, fast_linking_enabled))
2819 radv_pipeline_layout_hash(&gfx_state->layout);
2820
2821 gfx_state->compilation_required = !radv_skip_graphics_pipeline_compile(device, pCreateInfo);
2822 if (gfx_state->compilation_required) {
2823 gfx_state->key = radv_generate_graphics_pipeline_key(device, pCreateInfo, &gfx_state->vk, needed_lib_flags);
2824
2825 gfx_state->stages = malloc(sizeof(struct radv_shader_stage) * MESA_VULKAN_SHADER_STAGES);
2826 if (!gfx_state->stages) {
2827 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2828 goto fail;
2829 }
2830
2831 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2832 gfx_state->stages[i].entrypoint = NULL;
2833 gfx_state->stages[i].nir = NULL;
2834 gfx_state->stages[i].spirv.size = 0;
2835 gfx_state->stages[i].next_stage = MESA_SHADER_NONE;
2836 }
2837
2838 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2839 const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
2840 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
2841
2842 radv_pipeline_stage_init(create_flags, sinfo, &gfx_state->layout, &gfx_state->key.stage_info[stage],
2843 &gfx_state->stages[stage]);
2844 }
2845
2846 radv_pipeline_load_retained_shaders(device, pCreateInfo, gfx_state->stages);
2847 }
2848
2849 return VK_SUCCESS;
2850
2851 fail:
2852 radv_graphics_pipeline_state_finish(device, gfx_state);
2853 return result;
2854 }
2855
2856 void
radv_graphics_pipeline_hash(const struct radv_device * device,const struct radv_graphics_pipeline_state * gfx_state,unsigned char * hash)2857 radv_graphics_pipeline_hash(const struct radv_device *device, const struct radv_graphics_pipeline_state *gfx_state,
2858 unsigned char *hash)
2859 {
2860 struct mesa_sha1 ctx;
2861
2862 _mesa_sha1_init(&ctx);
2863 radv_pipeline_hash(device, &gfx_state->layout, &ctx);
2864
2865 _mesa_sha1_update(&ctx, &gfx_state->key.gfx_state, sizeof(gfx_state->key.gfx_state));
2866
2867 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
2868 const struct radv_shader_stage *stage = &gfx_state->stages[s];
2869
2870 if (!stage->entrypoint)
2871 continue;
2872
2873 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
2874 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key));
2875 }
2876
2877 _mesa_sha1_final(&ctx, hash);
2878 }
2879
2880 static VkResult
radv_graphics_pipeline_compile(struct radv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_state * gfx_state,struct radv_device * device,struct vk_pipeline_cache * cache,bool fast_linking_enabled)2881 radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo,
2882 const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device,
2883 struct vk_pipeline_cache *cache, bool fast_linking_enabled)
2884 {
2885 struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
2886 struct radv_shader_binary *gs_copy_binary = NULL;
2887 bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
2888 bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
2889 struct radv_shader_stage *stages = gfx_state->stages;
2890 const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
2891 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2892 VkPipelineCreationFeedback pipeline_feedback = {
2893 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2894 };
2895 bool skip_shaders_cache = false;
2896 VkResult result = VK_SUCCESS;
2897 const bool retain_shaders =
2898 !!(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
2899 struct radv_retained_shaders *retained_shaders = NULL;
2900
2901 int64_t pipeline_start = os_time_get_nano();
2902
2903 if (radv_should_compute_pipeline_hash(device, pipeline->base.type, fast_linking_enabled)) {
2904 radv_graphics_pipeline_hash(device, gfx_state, pipeline->base.sha1);
2905
2906 pipeline->base.pipeline_hash = *(uint64_t *)pipeline->base.sha1;
2907 }
2908
2909 /* Skip the shaders cache when any of the below are true:
2910 * - fast-linking is enabled because it's useless to cache unoptimized pipelines
2911 * - shaders are captured because it's for debugging purposes
2912 * - binaries are captured for later uses
2913 * - graphics pipeline libraries are created with the RETAIN_LINK_TIME_OPTIMIZATION flag and
2914 * module identifiers are used (ie. no SPIR-V provided).
2915 */
2916 if (fast_linking_enabled || keep_executable_info ||
2917 (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR)) {
2918 skip_shaders_cache = true;
2919 } else if (retain_shaders) {
2920 assert(pipeline->base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
2921 for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
2922 if (stages[i].entrypoint && !stages[i].spirv.size) {
2923 skip_shaders_cache = true;
2924 break;
2925 }
2926 }
2927 }
2928
2929 bool found_in_application_cache = true;
2930 if (!skip_shaders_cache &&
2931 radv_graphics_pipeline_cache_search(device, cache, pipeline, &found_in_application_cache)) {
2932 if (found_in_application_cache)
2933 pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2934
2935 if (retain_shaders) {
2936 /* For graphics pipeline libraries created with the RETAIN_LINK_TIME_OPTIMIZATION flag, we
2937 * need to retain the stage info because we can't know if the LTO pipelines will
2938 * be find in the shaders cache.
2939 */
2940 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
2941
2942 gfx_pipeline_lib->stages = radv_copy_shader_stage_create_info(device, pCreateInfo->stageCount,
2943 pCreateInfo->pStages, gfx_pipeline_lib->mem_ctx);
2944 if (!gfx_pipeline_lib->stages)
2945 return VK_ERROR_OUT_OF_HOST_MEMORY;
2946
2947 gfx_pipeline_lib->stage_count = pCreateInfo->stageCount;
2948
2949 for (unsigned i = 0; i < pCreateInfo->stageCount; i++) {
2950 gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
2951 gfx_pipeline_lib->stage_keys[s] = gfx_state->key.stage_info[s];
2952 }
2953 }
2954
2955 result = VK_SUCCESS;
2956 goto done;
2957 }
2958
2959 if (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
2960 return VK_PIPELINE_COMPILE_REQUIRED;
2961
2962 if (retain_shaders) {
2963 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(&pipeline->base);
2964 retained_shaders = &gfx_pipeline_lib->retained_shaders;
2965 }
2966
2967 const bool noop_fs = radv_pipeline_needs_noop_fs(pipeline, &gfx_state->key.gfx_state);
2968
2969 radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
2970 keep_statistic_info, pipeline->base.is_internal, retained_shaders, noop_fs,
2971 pipeline->base.shaders, binaries, &pipeline->base.gs_copy_shader, &gs_copy_binary);
2972
2973 if (!skip_shaders_cache) {
2974 radv_pipeline_cache_insert(device, cache, &pipeline->base);
2975 }
2976
2977 free(gs_copy_binary);
2978 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
2979 free(binaries[i]);
2980 if (stages[i].nir) {
2981 if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) {
2982 radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], i, stderr);
2983 }
2984 }
2985 }
2986
2987 done:
2988 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2989
2990 if (creation_feedback) {
2991 *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
2992
2993 if (creation_feedback->pipelineStageCreationFeedbackCount > 0) {
2994 uint32_t num_feedbacks = 0;
2995
2996 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
2997 gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
2998 creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
2999 }
3000
3001 /* Stages imported from graphics pipeline libraries are defined as additional entries in the
3002 * order they were imported.
3003 */
3004 const VkPipelineLibraryCreateInfoKHR *libs_info =
3005 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3006 if (libs_info) {
3007 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3008 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3009 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3010
3011 if (!gfx_pipeline_lib->base.active_stages)
3012 continue;
3013
3014 radv_foreach_stage(s, gfx_pipeline_lib->base.active_stages)
3015 {
3016 creation_feedback->pPipelineStageCreationFeedbacks[num_feedbacks++] = stages[s].feedback;
3017 }
3018 }
3019 }
3020
3021 assert(num_feedbacks == creation_feedback->pipelineStageCreationFeedbackCount);
3022 }
3023 }
3024
3025 return result;
3026 }
3027
3028 struct radv_vgt_shader_key
radv_get_vgt_shader_key(const struct radv_device * device,struct radv_shader ** shaders,const struct radv_shader * gs_copy_shader)3029 radv_get_vgt_shader_key(const struct radv_device *device, struct radv_shader **shaders,
3030 const struct radv_shader *gs_copy_shader)
3031 {
3032 uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
3033 struct radv_shader *last_vgt_shader = NULL;
3034 struct radv_vgt_shader_key key;
3035
3036 memset(&key, 0, sizeof(key));
3037
3038 if (shaders[MESA_SHADER_GEOMETRY]) {
3039 last_vgt_shader = shaders[MESA_SHADER_GEOMETRY];
3040 } else if (shaders[MESA_SHADER_TESS_EVAL]) {
3041 last_vgt_shader = shaders[MESA_SHADER_TESS_EVAL];
3042 } else if (shaders[MESA_SHADER_VERTEX]) {
3043 last_vgt_shader = shaders[MESA_SHADER_VERTEX];
3044 } else {
3045 assert(shaders[MESA_SHADER_MESH]);
3046 last_vgt_shader = shaders[MESA_SHADER_MESH];
3047 }
3048
3049 vs_size = gs_size = last_vgt_shader->info.wave_size;
3050 if (gs_copy_shader)
3051 vs_size = gs_copy_shader->info.wave_size;
3052
3053 if (shaders[MESA_SHADER_TESS_CTRL])
3054 hs_size = shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
3055
3056 key.tess = !!shaders[MESA_SHADER_TESS_CTRL];
3057 key.gs = !!shaders[MESA_SHADER_GEOMETRY];
3058 if (last_vgt_shader->info.is_ngg) {
3059 key.ngg = 1;
3060 key.ngg_passthrough = last_vgt_shader->info.is_ngg_passthrough;
3061 key.ngg_streamout = last_vgt_shader->info.so.num_outputs > 0;
3062 }
3063 if (shaders[MESA_SHADER_MESH]) {
3064 key.mesh = 1;
3065 key.mesh_scratch_ring = shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
3066 }
3067
3068 key.hs_wave32 = hs_size == 32;
3069 key.vs_wave32 = vs_size == 32;
3070 key.gs_wave32 = gs_size == 32;
3071
3072 return key;
3073 }
3074
3075 static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_device * device,const struct radv_graphics_pipeline * pipeline)3076 gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
3077 {
3078 const struct radv_physical_device *pdev = radv_device_physical(device);
3079 const struct radv_instance *instance = radv_physical_device_instance(pdev);
3080 struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
3081
3082 if (pdev->info.gfx_level != GFX10_3)
3083 return false;
3084
3085 if (instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
3086 return false;
3087
3088 if (ps && !ps->info.ps.allow_flat_shading)
3089 return false;
3090
3091 return true;
3092 }
3093
3094 static void
radv_pipeline_init_vertex_input_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3095 radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3096 const struct vk_graphics_pipeline_state *state)
3097 {
3098 const struct radv_physical_device *pdev = radv_device_physical(device);
3099 const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
3100
3101 if (!state->vi)
3102 return;
3103
3104 u_foreach_bit (i, state->vi->bindings_valid) {
3105 pipeline->binding_stride[i] = state->vi->bindings[i].stride;
3106 }
3107
3108 if (vs->info.vs.use_per_attribute_vb_descs) {
3109 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3110 const enum radeon_family family = pdev->info.family;
3111 const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(gfx_level, family);
3112
3113 pipeline->vertex_input.bindings_match_attrib = true;
3114
3115 u_foreach_bit (i, state->vi->attributes_valid) {
3116 uint32_t binding = state->vi->attributes[i].binding;
3117 uint32_t offset = state->vi->attributes[i].offset;
3118
3119 pipeline->vertex_input.attribute_mask |= BITFIELD_BIT(i);
3120 pipeline->vertex_input.bindings[i] = binding;
3121 pipeline->vertex_input.bindings_match_attrib &= binding == i;
3122
3123 if (state->vi->bindings[binding].stride) {
3124 pipeline->vertex_input.attrib_index_offset[i] = offset / state->vi->bindings[binding].stride;
3125 }
3126
3127 if (state->vi->bindings[binding].input_rate) {
3128 pipeline->vertex_input.instance_rate_inputs |= BITFIELD_BIT(i);
3129 pipeline->vertex_input.divisors[i] = state->vi->bindings[binding].divisor;
3130
3131 if (state->vi->bindings[binding].divisor == 0) {
3132 pipeline->vertex_input.zero_divisors |= BITFIELD_BIT(i);
3133 } else if (state->vi->bindings[binding].divisor > 1) {
3134 pipeline->vertex_input.nontrivial_divisors |= BITFIELD_BIT(i);
3135 }
3136 }
3137
3138 pipeline->vertex_input.offsets[i] = offset;
3139
3140 enum pipe_format format = vk_format_to_pipe_format(state->vi->attributes[i].format);
3141 const struct ac_vtx_format_info *vtx_info = &vtx_info_table[format];
3142
3143 pipeline->vertex_input.formats[i] = format;
3144 uint8_t format_align_req_minus_1 = vtx_info->chan_byte_size >= 4 ? 3 : (vtx_info->element_size - 1);
3145 pipeline->vertex_input.format_align_req_minus_1[i] = format_align_req_minus_1;
3146 uint8_t component_align_req_minus_1 =
3147 MIN2(vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size, 4) - 1;
3148 pipeline->vertex_input.component_align_req_minus_1[i] = component_align_req_minus_1;
3149 pipeline->vertex_input.format_sizes[i] = vtx_info->element_size;
3150 pipeline->vertex_input.alpha_adjust_lo |= (vtx_info->alpha_adjust & 0x1) << i;
3151 pipeline->vertex_input.alpha_adjust_hi |= (vtx_info->alpha_adjust >> 1) << i;
3152 if (G_008F0C_DST_SEL_X(vtx_info->dst_sel) == V_008F0C_SQ_SEL_Z) {
3153 pipeline->vertex_input.post_shuffle |= BITFIELD_BIT(i);
3154 }
3155
3156 if (!(vtx_info->has_hw_format & BITFIELD_BIT(vtx_info->num_channels - 1))) {
3157 pipeline->vertex_input.nontrivial_formats |= BITFIELD_BIT(i);
3158 }
3159 }
3160 } else {
3161 u_foreach_bit (i, vs->info.vs.vb_desc_usage_mask) {
3162 pipeline->vertex_input.bindings[i] = i;
3163 }
3164 }
3165 }
3166
3167 static void
radv_pipeline_init_shader_stages_state(const struct radv_device * device,struct radv_graphics_pipeline * pipeline)3168 radv_pipeline_init_shader_stages_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3169 {
3170 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
3171 bool shader_exists = !!pipeline->base.shaders[i];
3172 if (shader_exists || i < MESA_SHADER_COMPUTE) {
3173 if (shader_exists)
3174 pipeline->base.need_indirect_descriptor_sets |=
3175 radv_shader_need_indirect_descriptor_sets(pipeline->base.shaders[i]);
3176 }
3177 }
3178
3179 gl_shader_stage first_stage =
3180 radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX;
3181
3182 const struct radv_shader *shader = radv_get_shader(pipeline->base.shaders, first_stage);
3183 const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
3184
3185 if (loc->sgpr_idx != -1) {
3186 pipeline->vtx_base_sgpr = shader->info.user_data_0;
3187 pipeline->vtx_base_sgpr += loc->sgpr_idx * 4;
3188 pipeline->vtx_emit_num = loc->num_sgprs;
3189 pipeline->uses_drawid = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_draw_id;
3190 pipeline->uses_baseinstance = radv_get_shader(pipeline->base.shaders, first_stage)->info.vs.needs_base_instance;
3191
3192 assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance);
3193 }
3194 }
3195
3196 uint32_t
radv_get_vgt_gs_out(struct radv_shader ** shaders,uint32_t primitive_topology)3197 radv_get_vgt_gs_out(struct radv_shader **shaders, uint32_t primitive_topology)
3198 {
3199 uint32_t gs_out;
3200
3201 if (shaders[MESA_SHADER_GEOMETRY]) {
3202 gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
3203 } else if (shaders[MESA_SHADER_TESS_CTRL]) {
3204 if (shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
3205 gs_out = V_028A6C_POINTLIST;
3206 } else {
3207 gs_out = radv_conv_tess_prim_to_gs_out(shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode);
3208 }
3209 } else if (shaders[MESA_SHADER_MESH]) {
3210 gs_out = radv_conv_gl_prim_to_gs_out(shaders[MESA_SHADER_MESH]->info.ms.output_prim);
3211 } else {
3212 gs_out = radv_conv_prim_to_gs_out(primitive_topology, false);
3213 }
3214
3215 return gs_out;
3216 }
3217
3218 static uint32_t
radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)3219 radv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state)
3220 {
3221 uint32_t primitive_topology = 0;
3222
3223 if (pipeline->last_vgt_api_stage == MESA_SHADER_VERTEX)
3224 primitive_topology = radv_translate_prim(state->ia->primitive_topology);
3225
3226 return radv_get_vgt_gs_out(pipeline->base.shaders, primitive_topology);
3227 }
3228
3229 static void
radv_pipeline_init_extra(struct radv_graphics_pipeline * pipeline,const struct radv_graphics_pipeline_create_info * extra,const struct vk_graphics_pipeline_state * state)3230 radv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline,
3231 const struct radv_graphics_pipeline_create_info *extra,
3232 const struct vk_graphics_pipeline_state *state)
3233 {
3234 if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
3235 extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
3236 extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 ||
3237 extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 ||
3238 extra->custom_blend_mode == V_028808_CB_RESOLVE) {
3239 /* According to the CB spec states, CB_SHADER_MASK should be set to enable writes to all four
3240 * channels of MRT0.
3241 */
3242 pipeline->cb_shader_mask = 0xf;
3243
3244 pipeline->custom_blend_mode = extra->custom_blend_mode;
3245 }
3246
3247 if (extra->use_rectlist) {
3248 struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
3249 dynamic->vk.ia.primitive_topology = V_008958_DI_PT_RECTLIST;
3250
3251 pipeline->rast_prim = radv_conv_prim_to_gs_out(dynamic->vk.ia.primitive_topology, pipeline->is_ngg);
3252 }
3253
3254 if (radv_pipeline_has_ds_attachments(state->rp)) {
3255 pipeline->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
3256 pipeline->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
3257 pipeline->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
3258 pipeline->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
3259 }
3260 }
3261
3262 bool
radv_needs_null_export_workaround(const struct radv_device * device,const struct radv_shader * ps,unsigned custom_blend_mode)3263 radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
3264 unsigned custom_blend_mode)
3265 {
3266 const struct radv_physical_device *pdev = radv_device_physical(device);
3267 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
3268
3269 if (!ps)
3270 return false;
3271
3272 /* Ensure that some export memory is always allocated, for two reasons:
3273 *
3274 * 1) Correctness: The hardware ignores the EXEC mask if no export
3275 * memory is allocated, so KILL and alpha test do not work correctly
3276 * without this.
3277 * 2) Performance: Every shader needs at least a NULL export, even when
3278 * it writes no color/depth output. The NULL export instruction
3279 * stalls without this setting.
3280 *
3281 * Don't add this to CB_SHADER_MASK.
3282 *
3283 * GFX10 supports pixel shaders without exports by setting both the
3284 * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
3285 * instructions if any are present.
3286 *
3287 * GFX11 requires one color output, otherwise the DCC decompression does nothing.
3288 *
3289 * Primitive Ordered Pixel Shading also requires an export, otherwise interlocking doesn't work
3290 * correctly before GFX11, and a hang happens on GFX11.
3291 */
3292 return (gfx_level <= GFX9 || ps->info.ps.can_discard || ps->info.ps.pops ||
3293 (custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 && gfx_level >= GFX11)) &&
3294 !ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask;
3295 }
3296
3297 static VkResult
radv_graphics_pipeline_import_binaries(struct radv_device * device,struct radv_graphics_pipeline * pipeline,const VkPipelineBinaryInfoKHR * binary_info)3298 radv_graphics_pipeline_import_binaries(struct radv_device *device, struct radv_graphics_pipeline *pipeline,
3299 const VkPipelineBinaryInfoKHR *binary_info)
3300 {
3301 blake3_hash pipeline_hash;
3302 struct mesa_blake3 ctx;
3303
3304 _mesa_blake3_init(&ctx);
3305
3306 for (uint32_t i = 0; i < binary_info->binaryCount; i++) {
3307 VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[i]);
3308 struct radv_shader *shader;
3309 struct blob_reader blob;
3310
3311 blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
3312
3313 shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
3314 if (!shader)
3315 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3316
3317 if (shader->info.stage == MESA_SHADER_VERTEX && i > 0) {
3318 /* The GS copy-shader is a VS placed after all other stages. */
3319 pipeline->base.gs_copy_shader = shader;
3320 } else {
3321 pipeline->base.shaders[shader->info.stage] = shader;
3322 }
3323
3324 _mesa_blake3_update(&ctx, pipeline_binary->key, sizeof(pipeline_binary->key));
3325 }
3326
3327 _mesa_blake3_final(&ctx, pipeline_hash);
3328
3329 pipeline->base.pipeline_hash = *(uint64_t *)pipeline_hash;
3330
3331 pipeline->has_pipeline_binaries = true;
3332
3333 return VK_SUCCESS;
3334 }
3335
3336 static VkResult
radv_graphics_pipeline_init(struct radv_graphics_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_create_info * extra)3337 radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device,
3338 struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
3339 const struct radv_graphics_pipeline_create_info *extra)
3340 {
3341 bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3342 struct radv_graphics_pipeline_state gfx_state;
3343 VkResult result = VK_SUCCESS;
3344
3345 pipeline->last_vgt_api_stage = MESA_SHADER_NONE;
3346
3347 const VkPipelineLibraryCreateInfoKHR *libs_info =
3348 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3349
3350 /* If we have libraries, import them first. */
3351 if (libs_info) {
3352 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3353 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3354 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3355
3356 assert(pipeline_lib->type == RADV_PIPELINE_GRAPHICS_LIB);
3357
3358 radv_graphics_pipeline_import_lib(device, pipeline, gfx_pipeline_lib);
3359 }
3360 }
3361
3362 radv_pipeline_import_graphics_info(device, pipeline, pCreateInfo);
3363
3364 result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3365 if (result != VK_SUCCESS)
3366 return result;
3367
3368 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3369
3370 if (binary_info && binary_info->binaryCount > 0) {
3371 result = radv_graphics_pipeline_import_binaries(device, pipeline, binary_info);
3372 } else {
3373 if (gfx_state.compilation_required) {
3374 result =
3375 radv_graphics_pipeline_compile(pipeline, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3376 }
3377 }
3378
3379 if (result != VK_SUCCESS) {
3380 radv_graphics_pipeline_state_finish(device, &gfx_state);
3381 return result;
3382 }
3383
3384 uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &gfx_state.vk);
3385
3386 radv_pipeline_init_multisample_state(device, pipeline, pCreateInfo, &gfx_state.vk);
3387
3388 if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3389 radv_pipeline_init_input_assembly_state(device, pipeline);
3390 radv_pipeline_init_dynamic_state(device, pipeline, &gfx_state.vk, pCreateInfo);
3391
3392 const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
3393 if (ps && !ps->info.ps.has_epilog) {
3394 pipeline->spi_shader_col_format = ps->info.ps.spi_shader_col_format;
3395 pipeline->cb_shader_mask = ps->info.ps.cb_shader_mask;
3396 }
3397
3398 unsigned custom_blend_mode = extra ? extra->custom_blend_mode : 0;
3399 if (radv_needs_null_export_workaround(device, ps, custom_blend_mode) && !pipeline->spi_shader_col_format) {
3400 pipeline->spi_shader_col_format = V_028714_SPI_SHADER_32_R;
3401 }
3402
3403 if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH))
3404 radv_pipeline_init_vertex_input_state(device, pipeline, &gfx_state.vk);
3405
3406 radv_pipeline_init_shader_stages_state(device, pipeline);
3407
3408 pipeline->is_ngg = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.is_ngg;
3409 pipeline->has_ngg_culling =
3410 pipeline->is_ngg && pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling;
3411 pipeline->force_vrs_per_vertex = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex;
3412 pipeline->rast_prim = vgt_gs_out_prim_type;
3413 pipeline->uses_out_of_order_rast = gfx_state.vk.rs->rasterization_order_amd == VK_RASTERIZATION_ORDER_RELAXED_AMD;
3414 pipeline->uses_vrs = radv_is_vrs_enabled(&gfx_state.vk);
3415 pipeline->uses_vrs_attachment = radv_pipeline_uses_vrs_attachment(pipeline, &gfx_state.vk);
3416 pipeline->uses_vrs_coarse_shading = !pipeline->uses_vrs && gfx103_pipeline_vrs_coarse_shading(device, pipeline);
3417
3418 pipeline->base.push_constant_size = gfx_state.layout.push_constant_size;
3419 pipeline->base.dynamic_offset_count = gfx_state.layout.dynamic_offset_count;
3420
3421 if (extra) {
3422 radv_pipeline_init_extra(pipeline, extra, &gfx_state.vk);
3423 }
3424
3425 radv_graphics_pipeline_state_finish(device, &gfx_state);
3426 return result;
3427 }
3428
3429 VkResult
radv_graphics_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3430 radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo,
3431 const struct radv_graphics_pipeline_create_info *extra,
3432 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3433 {
3434 VK_FROM_HANDLE(radv_device, device, _device);
3435 VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3436 struct radv_graphics_pipeline *pipeline;
3437 VkResult result;
3438
3439 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3440 if (pipeline == NULL)
3441 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3442
3443 radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_GRAPHICS);
3444 pipeline->base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3445 pipeline->base.is_internal = _cache == device->meta_state.cache;
3446
3447 result = radv_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, extra);
3448 if (result != VK_SUCCESS) {
3449 radv_pipeline_destroy(device, &pipeline->base, pAllocator);
3450 return result;
3451 }
3452
3453 *pPipeline = radv_pipeline_to_handle(&pipeline->base);
3454 radv_rmv_log_graphics_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
3455 return VK_SUCCESS;
3456 }
3457
3458 void
radv_destroy_graphics_pipeline(struct radv_device * device,struct radv_graphics_pipeline * pipeline)3459 radv_destroy_graphics_pipeline(struct radv_device *device, struct radv_graphics_pipeline *pipeline)
3460 {
3461 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3462 if (pipeline->base.shaders[i])
3463 radv_shader_unref(device, pipeline->base.shaders[i]);
3464 }
3465
3466 if (pipeline->base.gs_copy_shader)
3467 radv_shader_unref(device, pipeline->base.gs_copy_shader);
3468 }
3469
3470 static VkResult
radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline * pipeline,struct radv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo)3471 radv_graphics_lib_pipeline_init(struct radv_graphics_lib_pipeline *pipeline, struct radv_device *device,
3472 struct vk_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo)
3473 {
3474 VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3475 VkResult result;
3476
3477 const VkGraphicsPipelineLibraryCreateInfoEXT *lib_info =
3478 vk_find_struct_const(pCreateInfo->pNext, GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
3479 const VkPipelineLibraryCreateInfoKHR *libs_info =
3480 vk_find_struct_const(pCreateInfo->pNext, PIPELINE_LIBRARY_CREATE_INFO_KHR);
3481 bool fast_linking_enabled = radv_is_fast_linking_enabled(pCreateInfo);
3482
3483 struct vk_graphics_pipeline_state *state = &pipeline->graphics_state;
3484
3485 pipeline->base.last_vgt_api_stage = MESA_SHADER_NONE;
3486 pipeline->lib_flags = lib_info ? lib_info->flags : 0;
3487
3488 radv_pipeline_layout_init(device, &pipeline->layout, false);
3489
3490 /* If we have libraries, import them first. */
3491 if (libs_info) {
3492 for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3493 VK_FROM_HANDLE(radv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3494 struct radv_graphics_lib_pipeline *gfx_pipeline_lib = radv_pipeline_to_graphics_lib(pipeline_lib);
3495
3496 vk_graphics_pipeline_state_merge(state, &gfx_pipeline_lib->graphics_state);
3497
3498 radv_graphics_pipeline_import_layout(&pipeline->layout, &gfx_pipeline_lib->layout);
3499
3500 radv_graphics_pipeline_import_lib(device, &pipeline->base, gfx_pipeline_lib);
3501
3502 pipeline->lib_flags |= gfx_pipeline_lib->lib_flags;
3503 }
3504 }
3505
3506 result = vk_graphics_pipeline_state_fill(&device->vk, state, pCreateInfo, NULL, 0, NULL, NULL,
3507 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT, &pipeline->state_data);
3508 if (result != VK_SUCCESS)
3509 return result;
3510
3511 radv_pipeline_import_graphics_info(device, &pipeline->base, pCreateInfo);
3512
3513 if (pipeline_layout)
3514 radv_graphics_pipeline_import_layout(&pipeline->layout, pipeline_layout);
3515
3516 const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
3517
3518 if (binary_info && binary_info->binaryCount > 0) {
3519 result = radv_graphics_pipeline_import_binaries(device, &pipeline->base, binary_info);
3520 } else {
3521 struct radv_graphics_pipeline_state gfx_state;
3522
3523 result = radv_generate_graphics_pipeline_state(device, pCreateInfo, &gfx_state);
3524 if (result != VK_SUCCESS)
3525 return result;
3526
3527 result =
3528 radv_graphics_pipeline_compile(&pipeline->base, pCreateInfo, &gfx_state, device, cache, fast_linking_enabled);
3529
3530 radv_graphics_pipeline_state_finish(device, &gfx_state);
3531 }
3532
3533 return result;
3534 }
3535
3536 static VkResult
radv_graphics_lib_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3537 radv_graphics_lib_pipeline_create(VkDevice _device, VkPipelineCache _cache,
3538 const VkGraphicsPipelineCreateInfo *pCreateInfo,
3539 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
3540 {
3541 VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
3542 VK_FROM_HANDLE(radv_device, device, _device);
3543 struct radv_graphics_lib_pipeline *pipeline;
3544 VkResult result;
3545
3546 pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3547 if (pipeline == NULL)
3548 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3549
3550 radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_GRAPHICS_LIB);
3551 pipeline->base.base.create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
3552
3553 pipeline->mem_ctx = ralloc_context(NULL);
3554
3555 result = radv_graphics_lib_pipeline_init(pipeline, device, cache, pCreateInfo);
3556 if (result != VK_SUCCESS) {
3557 radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
3558 return result;
3559 }
3560
3561 *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
3562
3563 return VK_SUCCESS;
3564 }
3565
3566 void
radv_destroy_graphics_lib_pipeline(struct radv_device * device,struct radv_graphics_lib_pipeline * pipeline)3567 radv_destroy_graphics_lib_pipeline(struct radv_device *device, struct radv_graphics_lib_pipeline *pipeline)
3568 {
3569 struct radv_retained_shaders *retained_shaders = &pipeline->retained_shaders;
3570
3571 radv_pipeline_layout_finish(device, &pipeline->layout);
3572
3573 vk_free(&device->vk.alloc, pipeline->state_data);
3574
3575 for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
3576 free(retained_shaders->stages[i].serialized_nir);
3577 }
3578
3579 ralloc_free(pipeline->mem_ctx);
3580
3581 radv_destroy_graphics_pipeline(device, &pipeline->base);
3582 }
3583
3584 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3585 radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
3586 const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
3587 VkPipeline *pPipelines)
3588 {
3589 VkResult result = VK_SUCCESS;
3590 unsigned i = 0;
3591
3592 for (; i < count; i++) {
3593 const VkPipelineCreateFlagBits2KHR create_flags = vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
3594 VkResult r;
3595 if (create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
3596 r = radv_graphics_lib_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
3597 } else {
3598 r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, &pPipelines[i]);
3599 }
3600 if (r != VK_SUCCESS) {
3601 result = r;
3602 pPipelines[i] = VK_NULL_HANDLE;
3603
3604 if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
3605 break;
3606 }
3607 }
3608
3609 for (; i < count; ++i)
3610 pPipelines[i] = VK_NULL_HANDLE;
3611
3612 return result;
3613 }
3614