1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31
32 #include "pipe/p_state.h"
33
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43
44 #include "util/u_memory.h"
45
46 #include "compiler/spirv/nir_spirv.h"
47 #include "compiler/spirv/spirv_info.h"
48 #include "vk_util.h"
49
50 bool
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
52
53
54 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 {
57 assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58 if (glsl_type_is_struct_or_ifc(dst->type)) {
59 for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60 copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61 }
62 } else if (glsl_type_is_array_or_matrix(dst->type)) {
63 unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64 for (unsigned i = 0; i < count; i++) {
65 copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
66 }
67 } else {
68 nir_def *load = nir_load_deref(b, src);
69 nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
70 }
71 }
72
73 static bool
is_clipcull_dist(int location)74 is_clipcull_dist(int location)
75 {
76 switch (location) {
77 case VARYING_SLOT_CLIP_DIST0:
78 case VARYING_SLOT_CLIP_DIST1:
79 case VARYING_SLOT_CULL_DIST0:
80 case VARYING_SLOT_CULL_DIST1:
81 return true;
82 default: break;
83 }
84 return false;
85 }
86
87 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
88
89 static void
create_gfx_pushconst(nir_shader * nir)90 create_gfx_pushconst(nir_shader *nir)
91 {
92 #define PUSHCONST_MEMBER(member_idx, field) \
93 fields[member_idx].type = \
94 glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
95 fields[member_idx].name = ralloc_asprintf(nir, #field); \
96 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
97
98 nir_variable *pushconst;
99 /* create compatible layout for the ntv push constant loader */
100 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
101 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
102 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
103 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
104 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
105 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
106 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
107 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
108 PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
109
110 pushconst = nir_variable_create(nir, nir_var_mem_push_const,
111 glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
112 "gfx_pushconst");
113 pushconst->data.location = INT_MAX; //doesn't really matter
114
115 #undef PUSHCONST_MEMBER
116 }
117
118 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)119 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
120 {
121 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
122 return false;
123
124 b->cursor = nir_after_instr(&instr->instr);
125 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
126 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
127 load->num_components = 1;
128 nir_def_init(&load->instr, &load->def, 1, 32);
129 nir_builder_instr_insert(b, &load->instr);
130
131 nir_def *composite = nir_build_alu(b, nir_op_bcsel,
132 nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
133 &instr->def,
134 nir_imm_int(b, 0),
135 NULL);
136
137 nir_def_rewrite_uses_after(&instr->def, composite,
138 composite->parent_instr);
139 return true;
140 }
141
142 static bool
lower_basevertex(nir_shader * shader)143 lower_basevertex(nir_shader *shader)
144 {
145 if (shader->info.stage != MESA_SHADER_VERTEX)
146 return false;
147
148 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
149 return false;
150
151 return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
152 nir_metadata_dominance, NULL);
153 }
154
155
156 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)157 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
158 {
159 if (instr->intrinsic != nir_intrinsic_load_draw_id)
160 return false;
161
162 b->cursor = nir_before_instr(&instr->instr);
163 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
164 load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
165 load->num_components = 1;
166 nir_def_init(&load->instr, &load->def, 1, 32);
167 nir_builder_instr_insert(b, &load->instr);
168
169 nir_def_rewrite_uses(&instr->def, &load->def);
170
171 return true;
172 }
173
174 static bool
lower_drawid(nir_shader * shader)175 lower_drawid(nir_shader *shader)
176 {
177 if (shader->info.stage != MESA_SHADER_VERTEX)
178 return false;
179
180 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
181 return false;
182
183 return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
184 nir_metadata_dominance, NULL);
185 }
186
187 struct lower_gl_point_state {
188 nir_variable *gl_pos_out;
189 nir_variable *gl_point_size;
190 };
191
192 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)193 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
194 {
195 struct lower_gl_point_state *state = data;
196 nir_def *vp_scale, *pos;
197
198 if (instr->type != nir_instr_type_intrinsic)
199 return false;
200
201 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
202 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
203 intrin->intrinsic != nir_intrinsic_emit_vertex)
204 return false;
205
206 if (nir_intrinsic_stream_id(intrin) != 0)
207 return false;
208
209 if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
210 intrin->intrinsic == nir_intrinsic_end_primitive) {
211 nir_instr_remove(&intrin->instr);
212 return true;
213 }
214
215 b->cursor = nir_before_instr(instr);
216
217 // viewport-map endpoints
218 nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
219 vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
220
221 // Load point info values
222 nir_def *point_size = nir_load_var(b, state->gl_point_size);
223 nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
224
225 // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
226 nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
227 w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
228 // halt_w_delta = w_delta / 2
229 nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
230
231 // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
232 nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
233 h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
234 // halt_h_delta = h_delta / 2
235 nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
236
237 nir_def *point_dir[4][2] = {
238 { nir_imm_float(b, -1), nir_imm_float(b, -1) },
239 { nir_imm_float(b, -1), nir_imm_float(b, 1) },
240 { nir_imm_float(b, 1), nir_imm_float(b, -1) },
241 { nir_imm_float(b, 1), nir_imm_float(b, 1) }
242 };
243
244 nir_def *point_pos_x = nir_channel(b, point_pos, 0);
245 nir_def *point_pos_y = nir_channel(b, point_pos, 1);
246
247 for (size_t i = 0; i < 4; i++) {
248 pos = nir_vec4(b,
249 nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
250 nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
251 nir_channel(b, point_pos, 2),
252 nir_channel(b, point_pos, 3));
253
254 nir_store_var(b, state->gl_pos_out, pos, 0xf);
255
256 nir_emit_vertex(b);
257 }
258
259 nir_end_primitive(b);
260
261 nir_instr_remove(&intrin->instr);
262
263 return true;
264 }
265
266 static bool
lower_gl_point_gs(nir_shader * shader)267 lower_gl_point_gs(nir_shader *shader)
268 {
269 struct lower_gl_point_state state;
270
271 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
272 shader->info.gs.vertices_out *= 4;
273
274 // Gets the gl_Position in and out
275 state.gl_pos_out =
276 nir_find_variable_with_location(shader, nir_var_shader_out,
277 VARYING_SLOT_POS);
278 state.gl_point_size =
279 nir_find_variable_with_location(shader, nir_var_shader_out,
280 VARYING_SLOT_PSIZ);
281
282 // if position in or gl_PointSize aren't written, we have nothing to do
283 if (!state.gl_pos_out || !state.gl_point_size)
284 return false;
285
286 return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
287 nir_metadata_dominance, &state);
288 }
289
290 struct lower_pv_mode_state {
291 nir_variable *varyings[VARYING_SLOT_MAX][4];
292 nir_variable *pos_counter;
293 nir_variable *out_pos_counter;
294 nir_variable *ring_offset;
295 unsigned ring_size;
296 unsigned primitive_vert_count;
297 unsigned prim;
298 };
299
300 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)301 lower_pv_mode_gs_ring_index(nir_builder *b,
302 struct lower_pv_mode_state *state,
303 nir_def *index)
304 {
305 nir_def *ring_offset = nir_load_var(b, state->ring_offset);
306 return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
307 state->ring_size);
308 }
309
310 /* Given the final deref of chain of derefs this function will walk up the chain
311 * until it finds a var deref.
312 *
313 * It will then recreate an identical chain that ends with the provided deref.
314 */
315 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)316 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
317 {
318 nir_deref_instr *parent = nir_deref_instr_parent(old);
319 if (!parent)
320 return new;
321 switch(old->deref_type) {
322 case nir_deref_type_var:
323 return new;
324 case nir_deref_type_array:
325 return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
326 case nir_deref_type_struct:
327 return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
328 case nir_deref_type_array_wildcard:
329 case nir_deref_type_ptr_as_array:
330 case nir_deref_type_cast:
331 unreachable("unexpected deref type");
332 }
333 unreachable("impossible deref type");
334 }
335
336 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)337 lower_pv_mode_gs_store(nir_builder *b,
338 nir_intrinsic_instr *intrin,
339 struct lower_pv_mode_state *state)
340 {
341 b->cursor = nir_before_instr(&intrin->instr);
342 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
343 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
344 nir_variable *var = nir_deref_instr_get_variable(deref);
345
346 gl_varying_slot location = var->data.location;
347 unsigned location_frac = var->data.location_frac;
348 assert(state->varyings[location][location_frac]);
349 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
350 nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
351 nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
352 nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
353 // recreate the chain of deref that lead to the store.
354 nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
355 nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
356 nir_instr_remove(&intrin->instr);
357 return true;
358 }
359
360 return false;
361 }
362
363 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)364 lower_pv_mode_emit_rotated_prim(nir_builder *b,
365 struct lower_pv_mode_state *state,
366 nir_def *current_vertex)
367 {
368 nir_def *two = nir_imm_int(b, 2);
369 nir_def *three = nir_imm_int(b, 3);
370 bool is_triangle = state->primitive_vert_count == 3;
371 /* This shader will always see the last three vertices emitted by the user gs.
372 * The following table is used to to rotate primitives within a strip generated
373 * by the user gs such that the last vertex becomes the first.
374 *
375 * [lines, tris][even/odd index][vertex mod 3]
376 */
377 static const unsigned vert_maps[2][2][3] = {
378 {{1, 0, 0}, {1, 0, 0}},
379 {{2, 0, 1}, {2, 1, 0}}
380 };
381 /* When the primive supplied to the gs comes from a strip, the last provoking vertex
382 * is either the last or the second, depending on whether the triangle is at an odd
383 * or even position within the strip.
384 *
385 * odd or even primitive within draw
386 */
387 nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
388 for (unsigned i = 0; i < state->primitive_vert_count; i++) {
389 /* odd or even triangle within strip emitted by user GS
390 * this is handled using the table
391 */
392 nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
393 unsigned offset_even = vert_maps[is_triangle][0][i];
394 unsigned offset_odd = vert_maps[is_triangle][1][i];
395 nir_def *offset_even_value = nir_imm_int(b, offset_even);
396 nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
397 nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
398 offset_odd_value, offset_even_value);
399 /* Here we account for how triangles are provided to the gs from a strip.
400 * For even primitives we rotate by 3, meaning we do nothing.
401 * For odd primitives we rotate by 2, combined with the previous rotation this
402 * means the second vertex becomes the last.
403 */
404 if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
405 rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
406 nir_isub(b, three,
407 odd_prim)),
408 three);
409 /* Triangles that come from fans are provided to the gs the same way as
410 * odd triangles from a strip so always rotate by 2.
411 */
412 else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
413 rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
414 three);
415 rotated_i = nir_iadd(b, rotated_i, current_vertex);
416 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
417 gl_varying_slot location = var->data.location;
418 unsigned location_frac = var->data.location_frac;
419 if (state->varyings[location][location_frac]) {
420 nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
421 nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
422 copy_vars(b, nir_build_deref_var(b, var), value);
423 }
424 }
425 nir_emit_vertex(b);
426 }
427 }
428
429 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)430 lower_pv_mode_gs_emit_vertex(nir_builder *b,
431 nir_intrinsic_instr *intrin,
432 struct lower_pv_mode_state *state)
433 {
434 b->cursor = nir_before_instr(&intrin->instr);
435
436 // increment pos_counter
437 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
438 nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
439
440 nir_instr_remove(&intrin->instr);
441 return true;
442 }
443
444 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)445 lower_pv_mode_gs_end_primitive(nir_builder *b,
446 nir_intrinsic_instr *intrin,
447 struct lower_pv_mode_state *state)
448 {
449 b->cursor = nir_before_instr(&intrin->instr);
450
451 nir_def *pos_counter = nir_load_var(b, state->pos_counter);
452 nir_push_loop(b);
453 {
454 nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
455 nir_break_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
456 nir_imm_int(b, state->primitive_vert_count)));
457
458 lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
459 nir_end_primitive(b);
460
461 nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
462 }
463 nir_pop_loop(b, NULL);
464 /* Set the ring offset such that when position 0 is
465 * read we get the last value written
466 */
467 nir_store_var(b, state->ring_offset, pos_counter, 1);
468 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
469 nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
470
471 nir_instr_remove(&intrin->instr);
472 return true;
473 }
474
475 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)476 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
477 {
478 if (instr->type != nir_instr_type_intrinsic)
479 return false;
480
481 struct lower_pv_mode_state *state = data;
482 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
483
484 switch (intrin->intrinsic) {
485 case nir_intrinsic_store_deref:
486 return lower_pv_mode_gs_store(b, intrin, state);
487 case nir_intrinsic_copy_deref:
488 unreachable("should be lowered");
489 case nir_intrinsic_emit_vertex_with_counter:
490 case nir_intrinsic_emit_vertex:
491 return lower_pv_mode_gs_emit_vertex(b, intrin, state);
492 case nir_intrinsic_end_primitive:
493 case nir_intrinsic_end_primitive_with_counter:
494 return lower_pv_mode_gs_end_primitive(b, intrin, state);
495 default:
496 return false;
497 }
498 }
499
500 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)501 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
502 {
503 nir_builder b;
504 struct lower_pv_mode_state state;
505 memset(state.varyings, 0, sizeof(state.varyings));
506
507 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
508 b = nir_builder_at(nir_before_impl(entry));
509
510 state.primitive_vert_count =
511 mesa_vertices_per_prim(shader->info.gs.output_primitive);
512 state.ring_size = shader->info.gs.vertices_out;
513
514 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
515 gl_varying_slot location = var->data.location;
516 unsigned location_frac = var->data.location_frac;
517
518 char name[100];
519 snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
520 state.varyings[location][location_frac] =
521 nir_local_variable_create(entry,
522 glsl_array_type(var->type,
523 state.ring_size,
524 false),
525 name);
526 }
527
528 state.pos_counter = nir_local_variable_create(entry,
529 glsl_uint_type(),
530 "__pos_counter");
531
532 state.out_pos_counter = nir_local_variable_create(entry,
533 glsl_uint_type(),
534 "__out_pos_counter");
535
536 state.ring_offset = nir_local_variable_create(entry,
537 glsl_uint_type(),
538 "__ring_offset");
539
540 state.prim = prim;
541
542 // initialize pos_counter and out_pos_counter
543 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
544 nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
545 nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
546
547 shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
548 (state.primitive_vert_count - 1)) *
549 state.primitive_vert_count;
550 return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
551 nir_metadata_dominance, &state);
552 }
553
554 struct lower_line_stipple_state {
555 nir_variable *pos_out;
556 nir_variable *stipple_out;
557 nir_variable *prev_pos;
558 nir_variable *pos_counter;
559 nir_variable *stipple_counter;
560 bool line_rectangular;
561 };
562
563 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)564 viewport_map(nir_builder *b, nir_def *vert,
565 nir_def *scale)
566 {
567 nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
568 nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
569 w_recip);
570 return nir_fmul(b, ndc_point, scale);
571 }
572
573 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)574 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
575 {
576 struct lower_line_stipple_state *state = data;
577 if (instr->type != nir_instr_type_intrinsic)
578 return false;
579
580 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
581 if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
582 intrin->intrinsic != nir_intrinsic_emit_vertex)
583 return false;
584
585 b->cursor = nir_before_instr(instr);
586
587 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
588 // viewport-map endpoints
589 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
590 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
591 nir_def *prev = nir_load_var(b, state->prev_pos);
592 nir_def *curr = nir_load_var(b, state->pos_out);
593 prev = viewport_map(b, prev, vp_scale);
594 curr = viewport_map(b, curr, vp_scale);
595
596 // calculate length of line
597 nir_def *len;
598 if (state->line_rectangular)
599 len = nir_fast_distance(b, prev, curr);
600 else {
601 nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
602 len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
603 }
604 // update stipple_counter
605 nir_store_var(b, state->stipple_counter,
606 nir_fadd(b, nir_load_var(b, state->stipple_counter),
607 len), 1);
608 nir_pop_if(b, NULL);
609 // emit stipple out
610 nir_copy_var(b, state->stipple_out, state->stipple_counter);
611 nir_copy_var(b, state->prev_pos, state->pos_out);
612
613 // update prev_pos and pos_counter for next vertex
614 b->cursor = nir_after_instr(instr);
615 nir_store_var(b, state->pos_counter,
616 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
617 1), 1);
618
619 return true;
620 }
621
622 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)623 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
624 {
625 nir_builder b;
626 struct lower_line_stipple_state state;
627
628 state.pos_out =
629 nir_find_variable_with_location(shader, nir_var_shader_out,
630 VARYING_SLOT_POS);
631
632 // if position isn't written, we have nothing to do
633 if (!state.pos_out)
634 return false;
635
636 state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
637 glsl_float_type(),
638 "__stipple");
639 state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
640 state.stipple_out->data.driver_location = shader->num_outputs++;
641 state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
642 shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
643
644 // create temp variables
645 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
646 glsl_vec4_type(),
647 "__prev_pos");
648 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
649 glsl_uint_type(),
650 "__pos_counter");
651 state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
652 glsl_float_type(),
653 "__stipple_counter");
654
655 state.line_rectangular = line_rectangular;
656 // initialize pos_counter and stipple_counter
657 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
658 b = nir_builder_at(nir_before_impl(entry));
659 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
660 nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
661
662 return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
663 nir_metadata_dominance, &state);
664 }
665
666 static bool
lower_line_stipple_fs(nir_shader * shader)667 lower_line_stipple_fs(nir_shader *shader)
668 {
669 nir_builder b;
670 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
671 b = nir_builder_at(nir_after_impl(entry));
672
673 // create stipple counter
674 nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
675 glsl_float_type(),
676 "__stipple");
677 stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
678 stipple->data.driver_location = shader->num_inputs++;
679 stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
680 shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
681
682 nir_variable *sample_mask_out =
683 nir_find_variable_with_location(shader, nir_var_shader_out,
684 FRAG_RESULT_SAMPLE_MASK);
685 if (!sample_mask_out) {
686 sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
687 glsl_uint_type(), "sample_mask");
688 sample_mask_out->data.driver_location = shader->num_outputs++;
689 sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
690 }
691
692 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
693 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
694 nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
695 pattern = nir_iand_imm(&b, pattern, 0xffff);
696
697 nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
698 nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
699 nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
700 nir_store_var(&b, v, sample_mask_in, 1);
701 nir_store_var(&b, sample_mask, sample_mask_in, 1);
702 nir_push_loop(&b);
703 {
704 nir_def *value = nir_load_var(&b, v);
705 nir_def *index = nir_ufind_msb(&b, value);
706 nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
707 nir_def *new_value = nir_ixor(&b, value, index_mask);
708 nir_store_var(&b, v, new_value, 1);
709 nir_push_if(&b, nir_ieq_imm(&b, value, 0));
710 nir_jump(&b, nir_jump_break);
711 nir_pop_if(&b, NULL);
712
713 nir_def *stipple_pos =
714 nir_interp_deref_at_sample(&b, 1, 32,
715 &nir_build_deref_var(&b, stipple)->def, index);
716 stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
717 nir_imm_float(&b, 16.0));
718 stipple_pos = nir_f2i32(&b, stipple_pos);
719 nir_def *bit =
720 nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
721 nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
722 {
723 nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
724 sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
725 nir_store_var(&b, sample_mask, sample_mask_value, 1);
726 }
727 nir_pop_if(&b, NULL);
728 }
729 nir_pop_loop(&b, NULL);
730 nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
731
732 return true;
733 }
734
735 struct lower_line_smooth_state {
736 nir_variable *pos_out;
737 nir_variable *line_coord_out;
738 nir_variable *prev_pos;
739 nir_variable *pos_counter;
740 nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
741 *varyings[VARYING_SLOT_MAX][4]; // location_frac
742 };
743
744 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)745 lower_line_smooth_gs_store(nir_builder *b,
746 nir_intrinsic_instr *intrin,
747 struct lower_line_smooth_state *state)
748 {
749 b->cursor = nir_before_instr(&intrin->instr);
750 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
751 if (nir_deref_mode_is(deref, nir_var_shader_out)) {
752 nir_variable *var = nir_deref_instr_get_variable(deref);
753
754 // we take care of position elsewhere
755 gl_varying_slot location = var->data.location;
756 unsigned location_frac = var->data.location_frac;
757 if (location != VARYING_SLOT_POS) {
758 assert(state->varyings[location]);
759 nir_store_var(b, state->varyings[location][location_frac],
760 intrin->src[1].ssa,
761 nir_intrinsic_write_mask(intrin));
762 nir_instr_remove(&intrin->instr);
763 return true;
764 }
765 }
766
767 return false;
768 }
769
770 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)771 lower_line_smooth_gs_emit_vertex(nir_builder *b,
772 nir_intrinsic_instr *intrin,
773 struct lower_line_smooth_state *state)
774 {
775 b->cursor = nir_before_instr(&intrin->instr);
776
777 nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
778 nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
779 nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
780 nir_def *prev = nir_load_var(b, state->prev_pos);
781 nir_def *curr = nir_load_var(b, state->pos_out);
782 nir_def *prev_vp = viewport_map(b, prev, vp_scale);
783 nir_def *curr_vp = viewport_map(b, curr, vp_scale);
784
785 nir_def *width = nir_load_push_constant_zink(b, 1, 32,
786 nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
787 nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
788
789 const unsigned yx[2] = { 1, 0 };
790 nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
791 nir_def *len = nir_fast_length(b, vec);
792 nir_def *dir = nir_normalize(b, vec);
793 nir_def *half_length = nir_fmul_imm(b, len, 0.5);
794 half_length = nir_fadd_imm(b, half_length, 0.5);
795
796 nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
797 nir_def *tangent =
798 nir_fmul(b,
799 nir_fmul(b,
800 nir_swizzle(b, dir, yx, 2),
801 nir_imm_vec2(b, 1.0, -1.0)),
802 vp_scale_rcp);
803 tangent = nir_fmul(b, tangent, half_width);
804 tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
805 dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
806
807 nir_def *line_offets[8] = {
808 nir_fadd(b, tangent, nir_fneg(b, dir)),
809 nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
810 tangent,
811 nir_fneg(b, tangent),
812 tangent,
813 nir_fneg(b, tangent),
814 nir_fadd(b, tangent, dir),
815 nir_fadd(b, nir_fneg(b, tangent), dir),
816 };
817 nir_def *line_coord =
818 nir_vec4(b, half_width, half_width, half_length, half_length);
819 nir_def *line_coords[8] = {
820 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, -1, 1)),
821 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, -1, 1)),
822 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
823 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
824 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 0, 1)),
825 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 0, 1)),
826 nir_fmul(b, line_coord, nir_imm_vec4(b, -1, 1, 1, 1)),
827 nir_fmul(b, line_coord, nir_imm_vec4(b, 1, 1, 1, 1)),
828 };
829
830 /* emit first end-cap, and start line */
831 for (int i = 0; i < 4; ++i) {
832 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
833 gl_varying_slot location = var->data.location;
834 unsigned location_frac = var->data.location_frac;
835 if (state->prev_varyings[location][location_frac])
836 nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
837 }
838 nir_store_var(b, state->pos_out,
839 nir_fadd(b, prev, nir_fmul(b, line_offets[i],
840 nir_channel(b, prev, 3))), 0xf);
841 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
842 nir_emit_vertex(b);
843 }
844
845 /* finish line and emit last end-cap */
846 for (int i = 4; i < 8; ++i) {
847 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
848 gl_varying_slot location = var->data.location;
849 unsigned location_frac = var->data.location_frac;
850 if (state->varyings[location][location_frac])
851 nir_copy_var(b, var, state->varyings[location][location_frac]);
852 }
853 nir_store_var(b, state->pos_out,
854 nir_fadd(b, curr, nir_fmul(b, line_offets[i],
855 nir_channel(b, curr, 3))), 0xf);
856 nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
857 nir_emit_vertex(b);
858 }
859 nir_end_primitive(b);
860
861 nir_pop_if(b, NULL);
862
863 nir_copy_var(b, state->prev_pos, state->pos_out);
864 nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
865 gl_varying_slot location = var->data.location;
866 unsigned location_frac = var->data.location_frac;
867 if (state->varyings[location][location_frac])
868 nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
869 }
870
871 // update prev_pos and pos_counter for next vertex
872 b->cursor = nir_after_instr(&intrin->instr);
873 nir_store_var(b, state->pos_counter,
874 nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
875 1), 1);
876
877 nir_instr_remove(&intrin->instr);
878 return true;
879 }
880
881 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)882 lower_line_smooth_gs_end_primitive(nir_builder *b,
883 nir_intrinsic_instr *intrin,
884 struct lower_line_smooth_state *state)
885 {
886 b->cursor = nir_before_instr(&intrin->instr);
887
888 // reset line counter
889 nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
890
891 nir_instr_remove(&intrin->instr);
892 return true;
893 }
894
895 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)896 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
897 {
898 if (instr->type != nir_instr_type_intrinsic)
899 return false;
900
901 struct lower_line_smooth_state *state = data;
902 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
903
904 switch (intrin->intrinsic) {
905 case nir_intrinsic_store_deref:
906 return lower_line_smooth_gs_store(b, intrin, state);
907 case nir_intrinsic_copy_deref:
908 unreachable("should be lowered");
909 case nir_intrinsic_emit_vertex_with_counter:
910 case nir_intrinsic_emit_vertex:
911 return lower_line_smooth_gs_emit_vertex(b, intrin, state);
912 case nir_intrinsic_end_primitive:
913 case nir_intrinsic_end_primitive_with_counter:
914 return lower_line_smooth_gs_end_primitive(b, intrin, state);
915 default:
916 return false;
917 }
918 }
919
920 static bool
lower_line_smooth_gs(nir_shader * shader)921 lower_line_smooth_gs(nir_shader *shader)
922 {
923 nir_builder b;
924 struct lower_line_smooth_state state;
925
926 memset(state.varyings, 0, sizeof(state.varyings));
927 memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
928 nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
929 gl_varying_slot location = var->data.location;
930 unsigned location_frac = var->data.location_frac;
931 if (location == VARYING_SLOT_POS)
932 continue;
933
934 char name[100];
935 snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
936 state.varyings[location][location_frac] =
937 nir_variable_create(shader, nir_var_shader_temp,
938 var->type, name);
939
940 snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
941 state.prev_varyings[location][location_frac] =
942 nir_variable_create(shader, nir_var_shader_temp,
943 var->type, name);
944 }
945
946 state.pos_out =
947 nir_find_variable_with_location(shader, nir_var_shader_out,
948 VARYING_SLOT_POS);
949
950 // if position isn't written, we have nothing to do
951 if (!state.pos_out)
952 return false;
953
954 unsigned location = 0;
955 nir_foreach_shader_in_variable(var, shader) {
956 if (var->data.driver_location >= location)
957 location = var->data.driver_location + 1;
958 }
959
960 state.line_coord_out =
961 nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
962 "__line_coord");
963 state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
964 state.line_coord_out->data.driver_location = location;
965 state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
966 shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
967 shader->num_outputs++;
968
969 // create temp variables
970 state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
971 glsl_vec4_type(),
972 "__prev_pos");
973 state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
974 glsl_uint_type(),
975 "__pos_counter");
976
977 // initialize pos_counter
978 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
979 b = nir_builder_at(nir_before_impl(entry));
980 nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
981
982 shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
983 shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
984
985 return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
986 nir_metadata_dominance, &state);
987 }
988
989 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)990 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
991 {
992 int dummy;
993 nir_builder b;
994
995 nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
996 if (lower_stipple) {
997 stipple_counter = nir_variable_create(shader, nir_var_shader_in,
998 glsl_float_type(),
999 "__stipple");
1000 stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1001 stipple_counter->data.driver_location = shader->num_inputs++;
1002 stipple_counter->data.location =
1003 MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1004 shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1005
1006 stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1007 glsl_uint_type(),
1008 "stipple_pattern");
1009
1010 // initialize stipple_pattern
1011 nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1012 b = nir_builder_at(nir_before_impl(entry));
1013 nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1014 nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1015 nir_store_var(&b, stipple_pattern, pattern, 1);
1016 }
1017
1018 nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1019 return true;
1020 }
1021
1022 static bool
lower_dual_blend(nir_shader * shader)1023 lower_dual_blend(nir_shader *shader)
1024 {
1025 bool progress = false;
1026 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1027 if (var) {
1028 var->data.location = FRAG_RESULT_DATA0;
1029 var->data.index = 1;
1030 progress = true;
1031 }
1032 nir_shader_preserve_all_metadata(shader);
1033 return progress;
1034 }
1035
1036 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1037 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1038 {
1039 if (instr->type != nir_instr_type_alu)
1040 return false;
1041 nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1042 if (alu_instr->op != nir_op_pack_64_2x32 &&
1043 alu_instr->op != nir_op_unpack_64_2x32)
1044 return false;
1045 b->cursor = nir_before_instr(&alu_instr->instr);
1046 nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1047 nir_def *dest;
1048 switch (alu_instr->op) {
1049 case nir_op_pack_64_2x32:
1050 dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1051 break;
1052 case nir_op_unpack_64_2x32:
1053 dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1054 break;
1055 default:
1056 unreachable("Impossible opcode");
1057 }
1058 nir_def_replace(&alu_instr->def, dest);
1059 return true;
1060 }
1061
1062 static bool
lower_64bit_pack(nir_shader * shader)1063 lower_64bit_pack(nir_shader *shader)
1064 {
1065 return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1066 nir_metadata_control_flow, NULL);
1067 }
1068
1069 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1070 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1071 const nir_shader *prev_stage)
1072 {
1073 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1074 options,
1075 "filled quad gs");
1076
1077 nir_shader *nir = b.shader;
1078 nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1079 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1080 nir->info.gs.vertices_in = 4;
1081 nir->info.gs.vertices_out = 6;
1082 nir->info.gs.invocations = 1;
1083 nir->info.gs.active_stream_mask = 1;
1084
1085 nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1086 memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1087 if (prev_stage->xfb_info) {
1088 size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1089 nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1090 }
1091
1092 nir_variable *in_vars[VARYING_SLOT_MAX];
1093 nir_variable *out_vars[VARYING_SLOT_MAX];
1094 unsigned num_vars = 0;
1095
1096 /* Create input/output variables. */
1097 nir_foreach_shader_out_variable(var, prev_stage) {
1098 assert(!var->data.patch);
1099
1100 /* input vars can't be created for those */
1101 if (var->data.location == VARYING_SLOT_LAYER ||
1102 var->data.location == VARYING_SLOT_VIEW_INDEX ||
1103 /* psiz not needed for quads */
1104 var->data.location == VARYING_SLOT_PSIZ)
1105 continue;
1106
1107 char name[100];
1108 if (var->name)
1109 snprintf(name, sizeof(name), "in_%s", var->name);
1110 else
1111 snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1112
1113 nir_variable *in = nir_variable_clone(var, nir);
1114 ralloc_free(in->name);
1115 in->name = ralloc_strdup(in, name);
1116 in->type = glsl_array_type(var->type, 4, false);
1117 in->data.mode = nir_var_shader_in;
1118 nir_shader_add_variable(nir, in);
1119
1120 if (var->name)
1121 snprintf(name, sizeof(name), "out_%s", var->name);
1122 else
1123 snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1124
1125 nir_variable *out = nir_variable_clone(var, nir);
1126 ralloc_free(out->name);
1127 out->name = ralloc_strdup(out, name);
1128 out->data.mode = nir_var_shader_out;
1129 nir_shader_add_variable(nir, out);
1130
1131 in_vars[num_vars] = in;
1132 out_vars[num_vars++] = out;
1133 }
1134
1135 int mapping_first[] = {0, 1, 2, 0, 2, 3};
1136 int mapping_last[] = {0, 1, 3, 1, 2, 3};
1137 nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1138 last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1139 for (unsigned i = 0; i < 6; ++i) {
1140 /* swap indices 2 and 3 */
1141 nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1142 nir_imm_int(&b, mapping_last[i]),
1143 nir_imm_int(&b, mapping_first[i]));
1144 /* Copy inputs to outputs. */
1145 for (unsigned j = 0; j < num_vars; ++j) {
1146 if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1147 continue;
1148 }
1149 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1150 copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1151 }
1152 nir_emit_vertex(&b, 0);
1153 if (i == 2)
1154 nir_end_primitive(&b, 0);
1155 }
1156
1157 nir_end_primitive(&b, 0);
1158 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1159 nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1160 return nir;
1161 }
1162
1163 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1164 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1165 nir_intrinsic_instr *intrin,
1166 void *data)
1167 {
1168 int inlined_uniform_offset;
1169 switch (intrin->intrinsic) {
1170 case nir_intrinsic_load_flat_mask:
1171 inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1172 break;
1173 case nir_intrinsic_load_provoking_last:
1174 inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1175 break;
1176 default:
1177 return false;
1178 }
1179
1180 b->cursor = nir_before_instr(&intrin->instr);
1181 assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1182 /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1183 * anything with a different bit_size) so we need to split the load. */
1184 int num_dwords = intrin->def.bit_size / 32;
1185 nir_def *dwords[2] = {NULL};
1186 for (unsigned i = 0; i < num_dwords; i++)
1187 dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1188 nir_imm_int(b, inlined_uniform_offset + i),
1189 .align_mul = intrin->def.bit_size / 8,
1190 .align_offset = 0,
1191 .range_base = 0, .range = ~0);
1192 nir_def *new_dest_def;
1193 if (intrin->def.bit_size == 32)
1194 new_dest_def = dwords[0];
1195 else
1196 new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1197 nir_def_replace(&intrin->def, new_dest_def);
1198 return true;
1199 }
1200
1201 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1202 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1203 {
1204 return nir_shader_intrinsics_pass(nir,
1205 lower_system_values_to_inlined_uniforms_instr,
1206 nir_metadata_dominance, NULL);
1207 }
1208
1209 /* from radeonsi */
1210 static unsigned
amd_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1211 amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1212 {
1213 /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
1214
1215 switch (consumer->info.stage) {
1216 case MESA_SHADER_TESS_CTRL: /* VS->TCS */
1217 /* Non-amplifying shaders can always have their variyng expressions
1218 * moved into later shaders.
1219 */
1220 return UINT_MAX;
1221
1222 case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
1223 return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
1224 consumer->info.gs.vertices_in == 2 ? 20 : 14;
1225
1226 case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
1227 case MESA_SHADER_FRAGMENT:
1228 /* Up to 3 uniforms and 5 ALUs. */
1229 return 14;
1230
1231 default:
1232 unreachable("unexpected shader stage");
1233 }
1234 }
1235
1236 /* from radeonsi */
1237 static unsigned
amd_varying_estimate_instr_cost(nir_instr * instr)1238 amd_varying_estimate_instr_cost(nir_instr *instr)
1239 {
1240 unsigned dst_bit_size, src_bit_size, num_dst_dwords;
1241 nir_op alu_op;
1242
1243 /* This is a very loose approximation based on gfx10. */
1244 switch (instr->type) {
1245 case nir_instr_type_alu:
1246 dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
1247 src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
1248 alu_op = nir_instr_as_alu(instr)->op;
1249 num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
1250
1251 switch (alu_op) {
1252 case nir_op_mov:
1253 case nir_op_vec2:
1254 case nir_op_vec3:
1255 case nir_op_vec4:
1256 case nir_op_vec5:
1257 case nir_op_vec8:
1258 case nir_op_vec16:
1259 case nir_op_fabs:
1260 case nir_op_fneg:
1261 case nir_op_fsat:
1262 return 0;
1263
1264 case nir_op_imul:
1265 case nir_op_umul_low:
1266 return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
1267
1268 case nir_op_imul_high:
1269 case nir_op_umul_high:
1270 case nir_op_imul_2x32_64:
1271 case nir_op_umul_2x32_64:
1272 return 4;
1273
1274 case nir_op_fexp2:
1275 case nir_op_flog2:
1276 case nir_op_frcp:
1277 case nir_op_frsq:
1278 case nir_op_fsqrt:
1279 case nir_op_fsin:
1280 case nir_op_fcos:
1281 case nir_op_fsin_amd:
1282 case nir_op_fcos_amd:
1283 return 4; /* FP16 & FP32. */
1284
1285 case nir_op_fpow:
1286 return 4 + 1 + 4; /* log2 + mul + exp2 */
1287
1288 case nir_op_fsign:
1289 return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
1290
1291 case nir_op_idiv:
1292 case nir_op_udiv:
1293 case nir_op_imod:
1294 case nir_op_umod:
1295 case nir_op_irem:
1296 return dst_bit_size == 64 ? 80 : 40;
1297
1298 case nir_op_fdiv:
1299 return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
1300
1301 case nir_op_fmod:
1302 case nir_op_frem:
1303 return dst_bit_size == 64 ? 80 : 8;
1304
1305 default:
1306 /* Double opcodes. Comparisons have always full performance. */
1307 if ((dst_bit_size == 64 &&
1308 nir_op_infos[alu_op].output_type & nir_type_float) ||
1309 (dst_bit_size >= 8 && src_bit_size == 64 &&
1310 nir_op_infos[alu_op].input_types[0] & nir_type_float))
1311 return 16;
1312
1313 return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
1314 }
1315
1316 case nir_instr_type_intrinsic:
1317 dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
1318 num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
1319
1320 switch (nir_instr_as_intrinsic(instr)->intrinsic) {
1321 case nir_intrinsic_load_deref:
1322 /* Uniform or UBO load.
1323 * Set a low cost to balance the number of scalar loads and ALUs.
1324 */
1325 return 3 * num_dst_dwords;
1326
1327 default:
1328 unreachable("unexpected intrinsic");
1329 }
1330
1331 default:
1332 unreachable("unexpected instr type");
1333 }
1334 }
1335
1336 void
zink_screen_init_compiler(struct zink_screen * screen)1337 zink_screen_init_compiler(struct zink_screen *screen)
1338 {
1339 static const struct nir_shader_compiler_options
1340 default_options = {
1341 .io_options = nir_io_glsl_lower_derefs,
1342 .lower_ffma16 = true,
1343 .lower_ffma32 = true,
1344 .lower_ffma64 = true,
1345 .lower_scmp = true,
1346 .lower_fdph = true,
1347 .lower_flrp32 = true,
1348 .lower_fsat = true,
1349 .lower_hadd = true,
1350 .lower_iadd_sat = true,
1351 .lower_fisnormal = true,
1352 .lower_extract_byte = true,
1353 .lower_extract_word = true,
1354 .lower_insert_byte = true,
1355 .lower_insert_word = true,
1356
1357 /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1358 * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1359 * ldexp, so we don't just always lower it in NIR). Given that ldexp is
1360 * effectively unused (no instances in shader-db), it's not worth the
1361 * effort to do so.
1362 * */
1363 .lower_ldexp = true,
1364
1365 .lower_mul_high = true,
1366 .lower_to_scalar = true,
1367 .lower_uadd_carry = true,
1368 .compact_arrays = true,
1369 .lower_usub_borrow = true,
1370 .lower_uadd_sat = true,
1371 .lower_usub_sat = true,
1372 .lower_vector_cmp = true,
1373 .lower_int64_options =
1374 nir_lower_bit_count64 |
1375 nir_lower_find_lsb64 |
1376 nir_lower_ufind_msb64,
1377 .lower_doubles_options = nir_lower_dround_even,
1378 .lower_uniforms_to_ubo = true,
1379 .has_fsub = true,
1380 .has_isub = true,
1381 .lower_mul_2x32_64 = true,
1382 .support_16bit_alu = true, /* not quite what it sounds like */
1383 .support_indirect_inputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
1384 .support_indirect_outputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
1385 .max_unroll_iterations = 0,
1386 .use_interpolated_input_intrinsics = true,
1387 .has_ddx_intrinsics = true,
1388 };
1389
1390 screen->nir_options = default_options;
1391
1392 if (!screen->info.feats.features.shaderInt64)
1393 screen->nir_options.lower_int64_options = ~0;
1394
1395 if (!screen->info.feats.features.shaderFloat64) {
1396 screen->nir_options.lower_doubles_options = ~0;
1397 screen->nir_options.lower_flrp64 = true;
1398 screen->nir_options.lower_ffma64 = true;
1399 /* soft fp64 function inlining will blow up loop bodies and effectively
1400 * stop Vulkan drivers from unrolling the loops.
1401 */
1402 screen->nir_options.max_unroll_iterations_fp64 = 32;
1403 }
1404
1405 if (screen->driver_compiler_workarounds.io_opt) {
1406 screen->nir_options.io_options |= nir_io_glsl_opt_varyings;
1407
1408 switch (zink_driverid(screen)) {
1409 case VK_DRIVER_ID_MESA_RADV:
1410 case VK_DRIVER_ID_AMD_OPEN_SOURCE:
1411 case VK_DRIVER_ID_AMD_PROPRIETARY:
1412 screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1413 screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
1414 break;
1415 default:
1416 mesa_logw("zink: instruction costs not implemented for this implementation!");
1417 screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1418 screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
1419 }
1420 }
1421
1422 /*
1423 The OpFRem and OpFMod instructions use cheap approximations of remainder,
1424 and the error can be large due to the discontinuity in trunc() and floor().
1425 This can produce mathematically unexpected results in some cases, such as
1426 FMod(x,x) computing x rather than 0, and can also cause the result to have
1427 a different sign than the infinitely precise result.
1428
1429 -Table 84. Precision of core SPIR-V Instructions
1430 * for drivers that are known to have imprecise fmod for doubles, lower dmod
1431 */
1432 if (zink_driverid(screen) == VK_DRIVER_ID_MESA_RADV ||
1433 zink_driverid(screen) == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1434 zink_driverid(screen) == VK_DRIVER_ID_AMD_PROPRIETARY)
1435 screen->nir_options.lower_doubles_options = nir_lower_dmod;
1436
1437 if (screen->info.have_EXT_shader_demote_to_helper_invocation)
1438 screen->nir_options.discard_is_demote = true;
1439 }
1440
1441 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1442 zink_get_compiler_options(struct pipe_screen *pscreen,
1443 enum pipe_shader_ir ir,
1444 gl_shader_stage shader)
1445 {
1446 assert(ir == PIPE_SHADER_IR_NIR);
1447 return &zink_screen(pscreen)->nir_options;
1448 }
1449
1450 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1451 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1452 {
1453 if (zink_debug & ZINK_DEBUG_TGSI) {
1454 fprintf(stderr, "TGSI shader:\n---8<---\n");
1455 tgsi_dump_to_file(tokens, 0, stderr);
1456 fprintf(stderr, "---8<---\n\n");
1457 }
1458
1459 return tgsi_to_nir(tokens, screen, false);
1460 }
1461
1462
1463 static bool
def_is_64bit(nir_def * def,void * state)1464 def_is_64bit(nir_def *def, void *state)
1465 {
1466 bool *lower = (bool *)state;
1467 if (def && (def->bit_size == 64)) {
1468 *lower = true;
1469 return false;
1470 }
1471 return true;
1472 }
1473
1474 static bool
src_is_64bit(nir_src * src,void * state)1475 src_is_64bit(nir_src *src, void *state)
1476 {
1477 bool *lower = (bool *)state;
1478 if (src && (nir_src_bit_size(*src) == 64)) {
1479 *lower = true;
1480 return false;
1481 }
1482 return true;
1483 }
1484
1485 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1486 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1487 {
1488 bool lower = false;
1489 /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1490 * doesn't have const variants, so do the ugly const_cast here. */
1491 nir_instr *instr = (nir_instr *)const_instr;
1492
1493 nir_foreach_def(instr, def_is_64bit, &lower);
1494 if (lower)
1495 return true;
1496 nir_foreach_src(instr, src_is_64bit, &lower);
1497 return lower;
1498 }
1499
1500 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1501 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1502 {
1503 nir_instr *instr = (nir_instr *)const_instr;
1504 nir_alu_instr *alu = nir_instr_as_alu(instr);
1505 switch (alu->op) {
1506 case nir_op_pack_64_2x32_split:
1507 case nir_op_pack_32_2x16_split:
1508 case nir_op_unpack_32_2x16_split_x:
1509 case nir_op_unpack_32_2x16_split_y:
1510 case nir_op_unpack_64_2x32_split_x:
1511 case nir_op_unpack_64_2x32_split_y:
1512 return true;
1513 default:
1514 break;
1515 }
1516 return false;
1517 }
1518
1519
1520 struct bo_vars {
1521 nir_variable *uniforms[5];
1522 nir_variable *ubo[5];
1523 nir_variable *ssbo[5];
1524 uint32_t first_ubo;
1525 uint32_t first_ssbo;
1526 };
1527
1528 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1529 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1530 {
1531 struct bo_vars bo;
1532 memset(&bo, 0, sizeof(bo));
1533 if (zs->ubos_used)
1534 bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1535 assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1536 if (zs->ssbos_used)
1537 bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1538 assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1539 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1540 unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1541 if (var->data.mode == nir_var_mem_ssbo) {
1542 assert(!bo.ssbo[idx]);
1543 bo.ssbo[idx] = var;
1544 } else {
1545 if (var->data.driver_location) {
1546 assert(!bo.ubo[idx]);
1547 bo.ubo[idx] = var;
1548 } else {
1549 assert(!bo.uniforms[idx]);
1550 bo.uniforms[idx] = var;
1551 }
1552 }
1553 }
1554 return bo;
1555 }
1556
1557 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1558 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1559 {
1560 struct bo_vars *bo = data;
1561 if (instr->type != nir_instr_type_intrinsic)
1562 return false;
1563 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1564 nir_variable *var = NULL;
1565 nir_def *offset = NULL;
1566 bool is_load = true;
1567 b->cursor = nir_before_instr(instr);
1568
1569 switch (intr->intrinsic) {
1570 case nir_intrinsic_store_ssbo:
1571 var = bo->ssbo[intr->def.bit_size >> 4];
1572 offset = intr->src[2].ssa;
1573 is_load = false;
1574 break;
1575 case nir_intrinsic_load_ssbo:
1576 var = bo->ssbo[intr->def.bit_size >> 4];
1577 offset = intr->src[1].ssa;
1578 break;
1579 case nir_intrinsic_load_ubo:
1580 if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1581 var = bo->uniforms[intr->def.bit_size >> 4];
1582 else
1583 var = bo->ubo[intr->def.bit_size >> 4];
1584 offset = intr->src[1].ssa;
1585 break;
1586 default:
1587 return false;
1588 }
1589 nir_src offset_src = nir_src_for_ssa(offset);
1590 if (!nir_src_is_const(offset_src))
1591 return false;
1592
1593 unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1594 const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1595 unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1596 bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1597 if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1598 return false;
1599
1600 unsigned rewrites = 0;
1601 nir_def *result[2];
1602 for (unsigned i = 0; i < intr->num_components; i++) {
1603 if (offset_bytes + i >= size) {
1604 rewrites++;
1605 if (is_load)
1606 result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1607 }
1608 }
1609 assert(rewrites == intr->num_components);
1610 if (is_load) {
1611 nir_def *load = nir_vec(b, result, intr->num_components);
1612 nir_def_rewrite_uses(&intr->def, load);
1613 }
1614 nir_instr_remove(instr);
1615 return true;
1616 }
1617
1618 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1619 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1620 {
1621 struct bo_vars bo = get_bo_vars(zs, shader);
1622 return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1623 }
1624
1625 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1626 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1627 {
1628 bool progress;
1629 do {
1630 progress = false;
1631 if (s->options->lower_int64_options)
1632 NIR_PASS_V(s, nir_lower_int64);
1633 if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1634 NIR_PASS_V(s, lower_64bit_pack);
1635 NIR_PASS_V(s, nir_lower_vars_to_ssa);
1636 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1637 NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1638 NIR_PASS(progress, s, nir_copy_prop);
1639 NIR_PASS(progress, s, nir_opt_remove_phis);
1640 if (s->options->lower_int64_options) {
1641 NIR_PASS(progress, s, nir_lower_64bit_phis);
1642 NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1643 }
1644 NIR_PASS(progress, s, nir_opt_dce);
1645 NIR_PASS(progress, s, nir_opt_dead_cf);
1646 NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1647 NIR_PASS(progress, s, nir_opt_cse);
1648 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1649 NIR_PASS(progress, s, nir_opt_algebraic);
1650 NIR_PASS(progress, s, nir_opt_constant_folding);
1651 NIR_PASS(progress, s, nir_opt_undef);
1652 NIR_PASS(progress, s, zink_nir_lower_b2b);
1653 if (zs)
1654 NIR_PASS(progress, s, bound_bo_access, zs);
1655 if (can_shrink)
1656 NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
1657 } while (progress);
1658
1659 do {
1660 progress = false;
1661 NIR_PASS(progress, s, nir_opt_algebraic_late);
1662 if (progress) {
1663 NIR_PASS_V(s, nir_copy_prop);
1664 NIR_PASS_V(s, nir_opt_dce);
1665 NIR_PASS_V(s, nir_opt_cse);
1666 }
1667 } while (progress);
1668 }
1669
1670 /* - copy the lowered fbfetch variable
1671 * - set the new one up as an input attachment for descriptor 0.6
1672 * - load it as an image
1673 * - overwrite the previous load
1674 */
1675 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1676 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1677 {
1678 bool ms = data != NULL;
1679 if (instr->type != nir_instr_type_intrinsic)
1680 return false;
1681 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1682 if (intr->intrinsic != nir_intrinsic_load_deref)
1683 return false;
1684 nir_variable *var = nir_intrinsic_get_var(intr, 0);
1685 if (!var->data.fb_fetch_output)
1686 return false;
1687 b->cursor = nir_after_instr(instr);
1688 nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1689 /* If Dim is SubpassData, ... Image Format must be Unknown
1690 * - SPIRV OpTypeImage specification
1691 */
1692 fbfetch->data.image.format = 0;
1693 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1694 fbfetch->data.mode = nir_var_uniform;
1695 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1696 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1697 fbfetch->data.sample = ms;
1698 enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1699 fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1700 nir_shader_add_variable(b->shader, fbfetch);
1701 nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1702 nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1703 nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1704 nir_def_rewrite_uses(&intr->def, load);
1705 return true;
1706 }
1707
1708 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1709 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1710 {
1711 nir_foreach_shader_out_variable(var, shader) {
1712 if (var->data.fb_fetch_output) {
1713 *fbfetch = var;
1714 break;
1715 }
1716 }
1717 assert(*fbfetch);
1718 if (!*fbfetch)
1719 return false;
1720 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1721 }
1722
1723 /*
1724 * Add a check for out of bounds LOD for every texel fetch op
1725 * It boils down to:
1726 * - if (lod < query_levels(tex))
1727 * - res = txf(tex)
1728 * - else
1729 * - res = (0, 0, 0, 1)
1730 */
1731 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1732 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1733 {
1734 if (in->type != nir_instr_type_tex)
1735 return false;
1736 nir_tex_instr *txf = nir_instr_as_tex(in);
1737 if (txf->op != nir_texop_txf)
1738 return false;
1739
1740 b->cursor = nir_before_instr(in);
1741 int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1742 assert(lod_idx >= 0);
1743 nir_src lod_src = txf->src[lod_idx].src;
1744 if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1745 return false;
1746
1747 nir_def *lod = lod_src.ssa;
1748
1749 int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1750 int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1751 int deref_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_deref);
1752 nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1753 1 + !!(offset_idx >= 0) + !!(handle_idx >= 0));
1754 unsigned src_idx = 0;
1755 levels->op = nir_texop_query_levels;
1756 levels->dest_type = nir_type_int | lod->bit_size;
1757 if (deref_idx >= 0) {
1758 levels->src[src_idx].src_type = nir_tex_src_texture_deref;
1759 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[deref_idx].src.ssa);
1760 }
1761 if (offset_idx >= 0) {
1762 levels->src[src_idx].src_type = nir_tex_src_texture_offset;
1763 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1764 }
1765 if (handle_idx >= 0) {
1766 levels->src[src_idx].src_type = nir_tex_src_texture_handle;
1767 levels->src[src_idx++].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1768 }
1769 nir_def_init(&levels->instr, &levels->def,
1770 nir_tex_instr_dest_size(levels), 32);
1771 nir_builder_instr_insert(b, &levels->instr);
1772
1773 nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1774 nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1775 nir_builder_instr_insert(b, &new_txf->instr);
1776
1777 nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1778 nir_const_value oob_values[4] = {0};
1779 unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1780 oob_values[3] = (txf->dest_type & nir_type_float) ?
1781 nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1782 nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1783
1784 nir_pop_if(b, lod_oob_else);
1785 nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1786
1787 nir_def_rewrite_uses(&txf->def, robust_txf);
1788 nir_instr_remove_v(in);
1789 return true;
1790 }
1791
1792 /* This pass is used to workaround the lack of out of bounds LOD robustness
1793 * for texel fetch ops in VK_EXT_image_robustness.
1794 */
1795 static bool
lower_txf_lod_robustness(nir_shader * shader)1796 lower_txf_lod_robustness(nir_shader *shader)
1797 {
1798 return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1799 }
1800
1801 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1802 static bool
check_psiz(struct nir_shader * s)1803 check_psiz(struct nir_shader *s)
1804 {
1805 bool have_psiz = false;
1806 nir_foreach_shader_out_variable(var, s) {
1807 if (var->data.location == VARYING_SLOT_PSIZ) {
1808 /* genuine PSIZ outputs will have this set */
1809 have_psiz |= !!var->data.explicit_location;
1810 }
1811 }
1812 return have_psiz;
1813 }
1814
1815 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1816 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1817 {
1818 assert((int)location >= 0);
1819
1820 nir_foreach_variable_with_modes(var, nir, mode) {
1821 if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1822 unsigned num_components = glsl_get_vector_elements(var->type);
1823 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1824 num_components *= 2;
1825 if (is_clipcull_dist(var->data.location))
1826 num_components = glsl_get_aoa_size(var->type);
1827 if (var->data.location_frac <= location_frac &&
1828 var->data.location_frac + num_components > location_frac)
1829 return var;
1830 }
1831 }
1832 return NULL;
1833 }
1834
1835 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1836 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1837 {
1838 unsigned num_components = util_bitcount(output->component_mask);
1839 for (unsigned i = 0; i < num_components; i++)
1840 if (!inlined[output->component_offset + i])
1841 return false;
1842 return true;
1843 }
1844
1845 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1846 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1847 {
1848 uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1849 if (last_output < VARYING_SLOT_VAR0)
1850 last_output = VARYING_SLOT_VAR0;
1851 else
1852 last_output++;
1853 /* this should get fixed up by slot remapping */
1854 psiz->data.location = last_output;
1855 }
1856
1857 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1858 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1859 {
1860 /* could be dvec/dmat/mat: each member is the same */
1861 const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1862 /* determine size of each member type */
1863 unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1864 /* normalize slot idx to current type's size */
1865 slot %= slot_count;
1866 unsigned slot_components = glsl_get_components(plain);
1867 if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1868 slot_components *= 2;
1869 /* create a vec4 mask of the selected slot's components out of all the components */
1870 uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1871 /* return a vecN of the selected components */
1872 slot_components = util_bitcount(mask);
1873 return glsl_vec_type(slot_components);
1874 }
1875
1876 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1877 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1878 {
1879 const struct glsl_type *type = slot_type;
1880 unsigned slot_count = 0;
1881 unsigned cur_slot = 0;
1882 /* iterate over all the members in the struct, stopping once the slot idx is reached */
1883 for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1884 /* use array type for slot counting but return array member type for unroll */
1885 const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1886 type = glsl_without_array(arraytype);
1887 slot_count = glsl_count_vec4_slots(arraytype, false, false);
1888 }
1889 *slot_idx -= (cur_slot - slot_count);
1890 if (!glsl_type_is_struct_or_ifc(type))
1891 /* this is a fully unrolled struct: find the number of vec components to output */
1892 type = clamp_slot_type(type, *slot_idx);
1893 return type;
1894 }
1895
1896 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1897 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1898 {
1899 assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1900 const struct glsl_type *orig_type = var->type;
1901 const struct glsl_type *type = glsl_without_array(var->type);
1902 unsigned slot_idx = slot - so_slot;
1903 if (type != orig_type)
1904 slot_idx %= glsl_count_vec4_slots(type, false, false);
1905 /* need to find the vec4 that's being exported by this slot */
1906 while (glsl_type_is_struct_or_ifc(type))
1907 type = unroll_struct_type(type, &slot_idx);
1908
1909 /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1910 unsigned num_components = glsl_get_components(glsl_without_array(type));
1911 /* special handling: clip/cull distance are arrays with vector semantics */
1912 if (is_clipcull_dist(var->data.location)) {
1913 num_components = glsl_array_size(type);
1914 if (slot_idx)
1915 /* this is the second vec4 */
1916 num_components %= 4;
1917 else
1918 /* this is the first vec4 */
1919 num_components = MIN2(num_components, 4);
1920 }
1921 assert(num_components);
1922 /* gallium handles xfb in terms of 32bit units */
1923 if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1924 num_components *= 2;
1925 return num_components;
1926 }
1927
1928 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1929 get_var_slot_count(nir_shader *nir, nir_variable *var)
1930 {
1931 assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1932 const struct glsl_type *type = var->type;
1933 if (nir_is_arrayed_io(var, nir->info.stage))
1934 type = glsl_get_array_element(type);
1935 unsigned slot_count = 0;
1936 if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) ||
1937 var->data.location >= VARYING_SLOT_VAR0)
1938 slot_count = glsl_count_vec4_slots(type, false, false);
1939 else if (glsl_type_is_array(type))
1940 slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1941 else
1942 slot_count = 1;
1943 return slot_count;
1944 }
1945
1946
1947 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1948 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1949 {
1950 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1951 const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1952 if (packed_output->location == slot)
1953 return packed_output;
1954 }
1955 return NULL;
1956 }
1957
1958 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1959 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1960 {
1961 bool inlined[VARYING_SLOT_MAX][4] = {0};
1962 uint64_t packed = 0;
1963 uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1964 uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1965 uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1966 uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1967 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1968 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1969 unsigned xfb_components = util_bitcount(output->component_mask);
1970 /* always set stride to be used during draw */
1971 zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1972 for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1973 unsigned slot = output->location;
1974 if (inlined[slot][output->component_offset + c])
1975 continue;
1976 nir_variable *var = NULL;
1977 while (!var && slot < VARYING_SLOT_TESS_MAX)
1978 var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1979 slot = output->location;
1980 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1981 if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1982 /* if no variable is found for the xfb output, no output exists */
1983 inlined[slot][c + output->component_offset] = true;
1984 continue;
1985 }
1986 if (var->data.explicit_xfb_buffer) {
1987 /* handle dvec3 where gallium splits streamout over 2 registers */
1988 for (unsigned j = 0; j < xfb_components; j++)
1989 inlined[slot][c + output->component_offset + j] = true;
1990 }
1991 if (is_inlined(inlined[slot], output))
1992 continue;
1993 assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location));
1994 assert(!glsl_type_is_struct_or_ifc(var->type));
1995 unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1996 if (glsl_type_is_64bit(glsl_without_array(var->type)))
1997 num_components *= 2;
1998 /* if this is the entire variable, try to blast it out during the initial declaration
1999 * structs must be handled later to ensure accurate analysis
2000 */
2001 if ((num_components == xfb_components ||
2002 num_components < xfb_components ||
2003 (num_components > xfb_components && xfb_components == 4))) {
2004 var->data.explicit_xfb_buffer = 1;
2005 var->data.xfb.buffer = output->buffer;
2006 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2007 var->data.offset = (output->offset + c * sizeof(uint32_t));
2008 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2009 for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
2010 inlined[slot][c + output->component_offset + j] = true;
2011 } else {
2012 /* otherwise store some metadata for later */
2013 packed |= BITFIELD64_BIT(slot);
2014 packed_components[slot] += xfb_components;
2015 packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
2016 packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
2017 for (unsigned j = 0; j < xfb_components; j++)
2018 packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
2019 }
2020 }
2021 }
2022
2023 /* if this was flagged as a packed output before, and if all the components are
2024 * being output with the same stream on the same buffer with increasing offsets, this entire variable
2025 * can be consolidated into a single output to conserve locations
2026 */
2027 for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
2028 const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
2029 unsigned slot = output->location;
2030 if (is_inlined(inlined[slot], output))
2031 continue;
2032 nir_variable *var = NULL;
2033 while (!var)
2034 var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
2035 slot = output->location;
2036 unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
2037 if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
2038 continue;
2039 /* this is a lowered 64bit variable that can't be exported due to packing */
2040 if (var->data.is_xfb)
2041 goto out;
2042
2043 unsigned num_slots = is_clipcull_dist(var->data.location) ?
2044 glsl_array_size(var->type) / 4 :
2045 glsl_count_vec4_slots(var->type, false, false);
2046 /* for each variable, iterate over all the variable's slots and inline the outputs */
2047 for (unsigned j = 0; j < num_slots; j++) {
2048 slot = var->data.location + j;
2049 const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
2050 if (!packed_output)
2051 goto out;
2052
2053 /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2054 if (!(packed & BITFIELD64_BIT(slot)) ||
2055 util_bitcount(packed_streams[slot]) != 1 ||
2056 util_bitcount(packed_buffers[slot]) != 1)
2057 goto out;
2058
2059 /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2060 unsigned num_components = get_slot_components(var, slot, var->data.location);
2061 if (num_components != packed_components[slot])
2062 goto out;
2063
2064 /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2065 uint32_t prev_offset = packed_offsets[packed_output->location][0];
2066 for (unsigned k = 1; k < num_components; k++) {
2067 /* if the offsets are not incrementing as expected, skip consolidation */
2068 if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
2069 goto out;
2070 prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
2071 }
2072 }
2073 /* this output can be consolidated: blast out all the data inlined */
2074 var->data.explicit_xfb_buffer = 1;
2075 var->data.xfb.buffer = output->buffer;
2076 var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2077 var->data.offset = output->offset;
2078 var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2079 /* mark all slot components inlined to skip subsequent loop iterations */
2080 for (unsigned j = 0; j < num_slots; j++) {
2081 slot = var->data.location + j;
2082 for (unsigned k = 0; k < packed_components[slot]; k++)
2083 inlined[slot][k] = true;
2084 packed &= ~BITFIELD64_BIT(slot);
2085 }
2086 continue;
2087 out:
2088 unreachable("xfb should be inlined by now!");
2089 }
2090 }
2091
2092 struct decompose_state {
2093 nir_variable **split;
2094 bool needs_w;
2095 };
2096
2097 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)2098 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2099 {
2100 struct decompose_state *state = data;
2101 nir_variable **split = state->split;
2102 if (instr->type != nir_instr_type_intrinsic)
2103 return false;
2104 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2105 if (intr->intrinsic != nir_intrinsic_load_deref)
2106 return false;
2107 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2108 nir_variable *var = nir_deref_instr_get_variable(deref);
2109 if (var != split[0])
2110 return false;
2111 unsigned num_components = glsl_get_vector_elements(split[0]->type);
2112 b->cursor = nir_after_instr(instr);
2113 nir_def *loads[4];
2114 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2115 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2116 if (state->needs_w) {
2117 /* oob load w comopnent to get correct value for int/float */
2118 loads[3] = nir_channel(b, loads[0], 3);
2119 loads[0] = nir_channel(b, loads[0], 0);
2120 }
2121 nir_def *new_load = nir_vec(b, loads, num_components);
2122 nir_def_rewrite_uses(&intr->def, new_load);
2123 nir_instr_remove_v(instr);
2124 return true;
2125 }
2126
2127 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)2128 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2129 {
2130 uint32_t bits = 0;
2131 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2132 bits |= BITFIELD_BIT(var->data.driver_location);
2133 bits = ~bits;
2134 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2135 nir_variable *split[5];
2136 struct decompose_state state;
2137 state.split = split;
2138 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2139 assert(var);
2140 split[0] = var;
2141 bits |= BITFIELD_BIT(var->data.driver_location);
2142 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2143 unsigned num_components = glsl_get_vector_elements(var->type);
2144 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2145 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2146 split[i+1] = nir_variable_clone(var, nir);
2147 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2148 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2149 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2150 else
2151 split[i+1]->type = new_type;
2152 split[i+1]->data.driver_location = ffs(bits) - 1;
2153 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2154 nir_shader_add_variable(nir, split[i+1]);
2155 }
2156 var->data.mode = nir_var_shader_temp;
2157 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2158 }
2159 nir_fixup_deref_modes(nir);
2160 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2161 optimize_nir(nir, NULL, true);
2162 return true;
2163 }
2164
2165 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2166 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2167 {
2168 struct zink_screen *screen = data;
2169 const bool has_int64 = screen->info.feats.features.shaderInt64;
2170 if (instr->type != nir_instr_type_intrinsic)
2171 return false;
2172 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2173 b->cursor = nir_before_instr(instr);
2174 switch (intr->intrinsic) {
2175 case nir_intrinsic_ssbo_atomic:
2176 case nir_intrinsic_ssbo_atomic_swap: {
2177 /* convert offset to uintN_t[idx] */
2178 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2179 nir_src_rewrite(&intr->src[1], offset);
2180 return true;
2181 }
2182 case nir_intrinsic_load_ssbo:
2183 case nir_intrinsic_load_ubo: {
2184 /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2185 bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2186 nir_src_is_const(intr->src[0]) &&
2187 nir_src_as_uint(intr->src[0]) == 0 &&
2188 intr->def.bit_size == 64 &&
2189 nir_intrinsic_align_offset(intr) % 8 != 0;
2190 force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2191 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2192 nir_src_rewrite(&intr->src[1], offset);
2193 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2194 if (force_2x32) {
2195 /* this is always scalarized */
2196 assert(intr->def.num_components == 1);
2197 /* rewrite as 2x32 */
2198 nir_def *load[2];
2199 for (unsigned i = 0; i < 2; i++) {
2200 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2201 load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2202 else
2203 load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2204 nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2205 }
2206 /* cast back to 64bit */
2207 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2208 nir_def_rewrite_uses(&intr->def, casted);
2209 nir_instr_remove(instr);
2210 }
2211 return true;
2212 }
2213 case nir_intrinsic_load_scratch:
2214 case nir_intrinsic_load_shared: {
2215 b->cursor = nir_before_instr(instr);
2216 bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2217 nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2218 nir_src_rewrite(&intr->src[0], offset);
2219 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2220 if (force_2x32) {
2221 /* this is always scalarized */
2222 assert(intr->def.num_components == 1);
2223 /* rewrite as 2x32 */
2224 nir_def *load[2];
2225 for (unsigned i = 0; i < 2; i++)
2226 load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2227 /* cast back to 64bit */
2228 nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2229 nir_def_rewrite_uses(&intr->def, casted);
2230 nir_instr_remove(instr);
2231 return true;
2232 }
2233 break;
2234 }
2235 case nir_intrinsic_store_ssbo: {
2236 b->cursor = nir_before_instr(instr);
2237 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2238 nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2239 nir_src_rewrite(&intr->src[2], offset);
2240 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2241 if (force_2x32) {
2242 /* this is always scalarized */
2243 assert(intr->src[0].ssa->num_components == 1);
2244 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2245 for (unsigned i = 0; i < 2; i++)
2246 nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2247 nir_instr_remove(instr);
2248 }
2249 return true;
2250 }
2251 case nir_intrinsic_store_scratch:
2252 case nir_intrinsic_store_shared: {
2253 b->cursor = nir_before_instr(instr);
2254 bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2255 nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2256 nir_src_rewrite(&intr->src[1], offset);
2257 /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2258 if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2259 /* this is always scalarized */
2260 assert(intr->src[0].ssa->num_components == 1);
2261 nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2262 for (unsigned i = 0; i < 2; i++)
2263 nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2264 nir_instr_remove(instr);
2265 }
2266 return true;
2267 }
2268 default:
2269 break;
2270 }
2271 return false;
2272 }
2273
2274 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2275 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2276 {
2277 return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2278 }
2279
2280 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2281 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2282 {
2283 nir_variable *var, **ptr;
2284 unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2285
2286 if (ssbo)
2287 ptr = &bo->ssbo[bit_size >> 4];
2288 else {
2289 if (!idx) {
2290 ptr = &bo->uniforms[bit_size >> 4];
2291 } else
2292 ptr = &bo->ubo[bit_size >> 4];
2293 }
2294 var = *ptr;
2295 if (!var) {
2296 if (ssbo)
2297 var = bo->ssbo[32 >> 4];
2298 else {
2299 if (!idx)
2300 var = bo->uniforms[32 >> 4];
2301 else
2302 var = bo->ubo[32 >> 4];
2303 }
2304 var = nir_variable_clone(var, shader);
2305 if (ssbo)
2306 var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2307 else
2308 var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2309 *ptr = var;
2310 nir_shader_add_variable(shader, var);
2311
2312 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2313 fields[0].name = ralloc_strdup(shader, "base");
2314 fields[1].name = ralloc_strdup(shader, "unsized");
2315 unsigned array_size = glsl_get_length(var->type);
2316 const struct glsl_type *bare_type = glsl_without_array(var->type);
2317 const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2318 unsigned length = glsl_get_length(array_type);
2319 const struct glsl_type *type;
2320 const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2321 if (bit_size > 32) {
2322 assert(bit_size == 64);
2323 type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2324 } else {
2325 type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2326 }
2327 fields[0].type = type;
2328 fields[1].type = unsized;
2329 var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2330 var->data.driver_location = idx;
2331 }
2332 return var;
2333 }
2334
2335 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2336 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2337 {
2338 nir_intrinsic_op op;
2339 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2340 if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2341 op = nir_intrinsic_deref_atomic;
2342 else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2343 op = nir_intrinsic_deref_atomic_swap;
2344 else
2345 unreachable("unknown intrinsic");
2346 nir_def *offset = intr->src[1].ssa;
2347 nir_src *src = &intr->src[0];
2348 nir_variable *var = get_bo_var(b->shader, bo, true, src,
2349 intr->def.bit_size);
2350 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2351 nir_def *idx = src->ssa;
2352 if (bo->first_ssbo)
2353 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2354 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2355 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2356
2357 /* generate new atomic deref ops for every component */
2358 nir_def *result[4];
2359 unsigned num_components = intr->def.num_components;
2360 for (unsigned i = 0; i < num_components; i++) {
2361 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2362 nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2363 nir_def_init(&new_instr->instr, &new_instr->def, 1,
2364 intr->def.bit_size);
2365 nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2366 new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2367 /* deref ops have no offset src, so copy the srcs after it */
2368 for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2369 new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2370 nir_builder_instr_insert(b, &new_instr->instr);
2371
2372 result[i] = &new_instr->def;
2373 offset = nir_iadd_imm(b, offset, 1);
2374 }
2375
2376 nir_def *load = nir_vec(b, result, num_components);
2377 nir_def_replace(&intr->def, load);
2378 }
2379
2380 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2381 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2382 {
2383 struct bo_vars *bo = data;
2384 if (instr->type != nir_instr_type_intrinsic)
2385 return false;
2386 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2387 nir_variable *var = NULL;
2388 nir_def *offset = NULL;
2389 bool is_load = true;
2390 b->cursor = nir_before_instr(instr);
2391 nir_src *src;
2392 bool ssbo = true;
2393 switch (intr->intrinsic) {
2394 case nir_intrinsic_ssbo_atomic:
2395 case nir_intrinsic_ssbo_atomic_swap:
2396 rewrite_atomic_ssbo_instr(b, instr, bo);
2397 return true;
2398 case nir_intrinsic_store_ssbo:
2399 src = &intr->src[1];
2400 var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2401 offset = intr->src[2].ssa;
2402 is_load = false;
2403 break;
2404 case nir_intrinsic_load_ssbo:
2405 src = &intr->src[0];
2406 var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2407 offset = intr->src[1].ssa;
2408 break;
2409 case nir_intrinsic_load_ubo:
2410 src = &intr->src[0];
2411 var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2412 offset = intr->src[1].ssa;
2413 ssbo = false;
2414 break;
2415 default:
2416 return false;
2417 }
2418 assert(var);
2419 assert(offset);
2420 nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2421 nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2422 if (!ssbo && bo->first_ubo && var->data.driver_location)
2423 idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2424 else if (ssbo && bo->first_ssbo)
2425 idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2426 nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2427 nir_i2iN(b, idx, deref_var->def.bit_size));
2428 nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2429 assert(intr->num_components <= 2);
2430 if (is_load) {
2431 nir_def *result[2];
2432 for (unsigned i = 0; i < intr->num_components; i++) {
2433 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2434 nir_i2iN(b, offset, deref_struct->def.bit_size));
2435 result[i] = nir_load_deref(b, deref_arr);
2436 if (intr->intrinsic == nir_intrinsic_load_ssbo)
2437 nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2438 offset = nir_iadd_imm(b, offset, 1);
2439 }
2440 nir_def *load = nir_vec(b, result, intr->num_components);
2441 nir_def_rewrite_uses(&intr->def, load);
2442 } else {
2443 nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2444 nir_i2iN(b, offset, deref_struct->def.bit_size));
2445 nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2446 }
2447 nir_instr_remove(instr);
2448 return true;
2449 }
2450
2451 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2452 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2453 {
2454 struct bo_vars bo = get_bo_vars(zs, shader);
2455 return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2456 }
2457
2458 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2459 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2460 {
2461 switch (intr->intrinsic) {
2462 case nir_intrinsic_load_interpolated_input:
2463 *is_interp = true;
2464 FALLTHROUGH;
2465 case nir_intrinsic_load_input:
2466 case nir_intrinsic_load_per_vertex_input:
2467 *is_input = true;
2468 FALLTHROUGH;
2469 case nir_intrinsic_load_output:
2470 case nir_intrinsic_load_per_vertex_output:
2471 case nir_intrinsic_load_per_primitive_output:
2472 *is_load = true;
2473 FALLTHROUGH;
2474 case nir_intrinsic_store_output:
2475 case nir_intrinsic_store_per_primitive_output:
2476 case nir_intrinsic_store_per_vertex_output:
2477 break;
2478 default:
2479 return false;
2480 }
2481 return true;
2482 }
2483
2484 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2485 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2486 {
2487 switch (intr->intrinsic) {
2488 case nir_intrinsic_load_per_vertex_input:
2489 case nir_intrinsic_load_per_vertex_output:
2490 case nir_intrinsic_load_per_primitive_output:
2491 case nir_intrinsic_store_per_primitive_output:
2492 case nir_intrinsic_store_per_vertex_output:
2493 return true;
2494 default:
2495 break;
2496 }
2497 return false;
2498 }
2499
2500 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2501 find_var_deref(nir_shader *nir, nir_variable *var)
2502 {
2503 nir_foreach_function_impl(impl, nir) {
2504 nir_foreach_block(block, impl) {
2505 nir_foreach_instr(instr, block) {
2506 if (instr->type != nir_instr_type_deref)
2507 continue;
2508 nir_deref_instr *deref = nir_instr_as_deref(instr);
2509 if (deref->deref_type == nir_deref_type_var && deref->var == var)
2510 return true;
2511 }
2512 }
2513 }
2514 return false;
2515 }
2516
2517 static bool
find_var_io(nir_shader * nir,nir_variable * var)2518 find_var_io(nir_shader *nir, nir_variable *var)
2519 {
2520 nir_foreach_function(function, nir) {
2521 if (!function->impl)
2522 continue;
2523
2524 nir_foreach_block(block, function->impl) {
2525 nir_foreach_instr(instr, block) {
2526 if (instr->type != nir_instr_type_intrinsic)
2527 continue;
2528 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2529 bool is_load = false;
2530 bool is_input = false;
2531 bool is_interp = false;
2532 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2533 continue;
2534 if (var->data.mode == nir_var_shader_in && !is_input)
2535 continue;
2536 if (var->data.mode == nir_var_shader_out && is_input)
2537 continue;
2538 unsigned slot_offset = 0;
2539 if (var->data.fb_fetch_output && !is_load)
2540 continue;
2541 if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2542 continue;
2543 nir_src *src_offset = nir_get_io_offset_src(intr);
2544 if (src_offset && nir_src_is_const(*src_offset))
2545 slot_offset = nir_src_as_uint(*src_offset);
2546 unsigned slot_count = get_var_slot_count(nir, var);
2547 if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2548 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2549 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2550 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2551 return true;
2552 }
2553 }
2554 }
2555 return false;
2556 }
2557
2558 struct clamp_layer_output_state {
2559 nir_variable *original;
2560 nir_variable *clamped;
2561 };
2562
2563 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2564 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2565 {
2566 nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2567 nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2568 nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2569 nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2570 nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2571 nir_load_deref(b, original_deref),
2572 nir_imm_int(b, 0));
2573 nir_store_deref(b, clamped_deref, layer, 0);
2574 }
2575
2576 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2577 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2578 {
2579 struct clamp_layer_output_state *state = data;
2580 switch (instr->type) {
2581 case nir_instr_type_intrinsic: {
2582 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2583 if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2584 intr->intrinsic != nir_intrinsic_emit_vertex)
2585 return false;
2586 b->cursor = nir_before_instr(instr);
2587 clamp_layer_output_emit(b, state);
2588 return true;
2589 }
2590 default: return false;
2591 }
2592 }
2593
2594 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2595 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2596 {
2597 switch (vs->info.stage) {
2598 case MESA_SHADER_VERTEX:
2599 case MESA_SHADER_GEOMETRY:
2600 case MESA_SHADER_TESS_EVAL:
2601 break;
2602 default:
2603 unreachable("invalid last vertex stage!");
2604 }
2605 struct clamp_layer_output_state state = {0};
2606 state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2607 if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2608 return false;
2609 state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2610 state.clamped->data.location = VARYING_SLOT_LAYER;
2611 nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2612 if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2613 state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2614 state.original->data.driver_location = (*next_location)++;
2615 if (fs_var) {
2616 fs_var->data.location = state.original->data.location;
2617 fs_var->data.driver_location = state.original->data.driver_location;
2618 }
2619 } else {
2620 if (state.original->data.explicit_xfb_buffer) {
2621 /* Will xfb the clamped output but still better than nothing */
2622 state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2623 state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2624 state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2625 state.clamped->data.offset = state.original->data.offset;
2626 state.clamped->data.stream = state.original->data.stream;
2627 }
2628 state.original->data.mode = nir_var_shader_temp;
2629 nir_fixup_deref_modes(vs);
2630 }
2631 if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2632 nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2633 } else {
2634 nir_builder b;
2635 nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2636 b = nir_builder_at(nir_after_impl(impl));
2637 assert(impl->end_block->predecessors->entries == 1);
2638 clamp_layer_output_emit(&b, &state);
2639 nir_metadata_preserve(impl, nir_metadata_dominance);
2640 }
2641 optimize_nir(vs, NULL, true);
2642 NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2643 return true;
2644 }
2645
2646 struct io_slot_map {
2647 uint64_t *patch_slot_track;
2648 uint64_t *slot_track;
2649 unsigned char *slot_map;
2650 unsigned reserved;
2651 unsigned char *patch_slot_map;
2652 unsigned patch_reserved;
2653 };
2654
2655 static void
assign_track_slot_mask(struct io_slot_map * io,nir_variable * var,unsigned slot,unsigned num_slots)2656 assign_track_slot_mask(struct io_slot_map *io, nir_variable *var, unsigned slot, unsigned num_slots)
2657 {
2658 uint64_t *track = var->data.patch ? io->patch_slot_track : io->slot_track;
2659 uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(glsl_without_array(var->type))) << var->data.location_frac;
2660 uint64_t slot_mask = BITFIELD64_RANGE(slot, num_slots);
2661 u_foreach_bit(c, mask) {
2662 assert((track[c] & slot_mask) == 0);
2663 track[c] |= slot_mask;
2664 }
2665 }
2666
2667 static void
assign_slot_io(gl_shader_stage stage,struct io_slot_map * io,nir_variable * var,unsigned slot)2668 assign_slot_io(gl_shader_stage stage, struct io_slot_map *io, nir_variable *var, unsigned slot)
2669 {
2670 unsigned num_slots;
2671 if (nir_is_arrayed_io(var, stage))
2672 num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2673 else
2674 num_slots = glsl_count_vec4_slots(var->type, false, false);
2675 uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2676 assign_track_slot_mask(io, var, slot, num_slots);
2677 if (slot_map[slot] != 0xff)
2678 return;
2679 unsigned *reserved = var->data.patch ? &io->patch_reserved : &io->reserved;
2680 assert(*reserved + num_slots <= MAX_VARYING);
2681 assert(*reserved < MAX_VARYING);
2682 for (unsigned i = 0; i < num_slots; i++)
2683 slot_map[slot + i] = (*reserved)++;
2684 }
2685
2686 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2687 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2688 {
2689 unsigned slot = var->data.location;
2690 switch (slot) {
2691 case -1:
2692 unreachable("there should be no UINT32_MAX location variables!");
2693 break;
2694 case VARYING_SLOT_POS:
2695 case VARYING_SLOT_PSIZ:
2696 case VARYING_SLOT_LAYER:
2697 case VARYING_SLOT_PRIMITIVE_ID:
2698 case VARYING_SLOT_CLIP_DIST0:
2699 case VARYING_SLOT_CULL_DIST0:
2700 case VARYING_SLOT_VIEWPORT:
2701 case VARYING_SLOT_FACE:
2702 case VARYING_SLOT_TESS_LEVEL_OUTER:
2703 case VARYING_SLOT_TESS_LEVEL_INNER:
2704 /* use a sentinel value to avoid counting later */
2705 var->data.driver_location = UINT32_MAX;
2706 return;
2707
2708 default:
2709 break;
2710 }
2711 if (var->data.patch) {
2712 assert(slot >= VARYING_SLOT_PATCH0);
2713 slot -= VARYING_SLOT_PATCH0;
2714 }
2715 assign_slot_io(stage, io, var, slot);
2716 slot = var->data.patch ? io->patch_slot_map[slot] : io->slot_map[slot];
2717 assert(slot < MAX_VARYING);
2718 var->data.driver_location = slot;
2719 }
2720
2721 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2722 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2723 {
2724 if (stage != MESA_SHADER_FRAGMENT)
2725 return false;
2726 return var->data.location >= VARYING_SLOT_TEX0 &&
2727 var->data.location <= VARYING_SLOT_TEX7;
2728 }
2729
2730 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2731 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2732 {
2733 unsigned slot = var->data.location;
2734 switch (slot) {
2735 case VARYING_SLOT_POS:
2736 case VARYING_SLOT_PSIZ:
2737 case VARYING_SLOT_LAYER:
2738 case VARYING_SLOT_PRIMITIVE_ID:
2739 case VARYING_SLOT_CLIP_DIST0:
2740 case VARYING_SLOT_CULL_DIST0:
2741 case VARYING_SLOT_VIEWPORT:
2742 case VARYING_SLOT_FACE:
2743 case VARYING_SLOT_TESS_LEVEL_OUTER:
2744 case VARYING_SLOT_TESS_LEVEL_INNER:
2745 /* use a sentinel value to avoid counting later */
2746 var->data.driver_location = UINT_MAX;
2747 return true;
2748 default:
2749 break;
2750 }
2751 if (var->data.patch) {
2752 assert(slot >= VARYING_SLOT_PATCH0);
2753 slot -= VARYING_SLOT_PATCH0;
2754 }
2755 uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2756 if (slot_map[slot] == (unsigned char)-1) {
2757 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2758 * so keep for now and eliminate later
2759 */
2760 if (is_texcoord(stage, var)) {
2761 var->data.driver_location = UINT32_MAX;
2762 return true;
2763 }
2764 /* patch variables may be read in the workgroup */
2765 if (stage != MESA_SHADER_TESS_CTRL)
2766 /* dead io */
2767 return false;
2768 assign_slot_io(stage, io, var, slot);
2769 }
2770 var->data.driver_location = slot_map[slot];
2771 return true;
2772 }
2773
2774
2775 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2776 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2777 {
2778 nir_variable *var = data;
2779 if (instr->type != nir_instr_type_intrinsic)
2780 return false;
2781
2782 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2783 bool is_load = false;
2784 bool is_input = false;
2785 bool is_interp = false;
2786 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2787 return false;
2788 if (!is_load)
2789 return false;
2790 unsigned location = nir_intrinsic_io_semantics(intr).location;
2791 if (location != var->data.location)
2792 return false;
2793 b->cursor = nir_before_instr(instr);
2794 nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2795 intr->def.bit_size);
2796 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2797 switch (location) {
2798 case VARYING_SLOT_COL0:
2799 case VARYING_SLOT_COL1:
2800 case VARYING_SLOT_BFC0:
2801 case VARYING_SLOT_BFC1:
2802 /* default color is 0,0,0,1 */
2803 if (intr->def.num_components == 4)
2804 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2805 break;
2806 default:
2807 break;
2808 }
2809 }
2810 nir_def_replace(&intr->def, zero);
2811 return true;
2812 }
2813
2814
2815
2816 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2817 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2818 {
2819 switch (intr->intrinsic) {
2820 case nir_intrinsic_store_output:
2821 case nir_intrinsic_store_per_primitive_output:
2822 case nir_intrinsic_store_per_vertex_output:
2823 break;
2824 default:
2825 return false;
2826 }
2827 if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2828 return false;
2829 if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2830 nir_instr_remove(&intr->instr);
2831 return true;
2832 }
2833 return false;
2834 }
2835
2836 static bool
delete_psiz_store(nir_shader * nir,bool one)2837 delete_psiz_store(nir_shader *nir, bool one)
2838 {
2839 bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2840 nir_metadata_dominance, one ? nir : NULL);
2841 if (progress)
2842 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2843 return progress;
2844 }
2845
2846 struct write_components {
2847 unsigned slot;
2848 uint32_t component_mask;
2849 };
2850
2851 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2852 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2853 {
2854 struct write_components *wc = data;
2855 bool is_load = false;
2856 bool is_input = false;
2857 bool is_interp = false;
2858 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2859 return false;
2860 if (!is_input)
2861 return false;
2862 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2863 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2864 return false;
2865 unsigned num_components = intr->num_components;
2866 unsigned c = nir_intrinsic_component(intr);
2867 if (intr->def.bit_size == 64)
2868 num_components *= 2;
2869 nir_src *src_offset = nir_get_io_offset_src(intr);
2870 if (!nir_src_is_const(*src_offset))
2871 return false;
2872 unsigned slot_offset = nir_src_as_uint(*src_offset);
2873 if (s.location + slot_offset != wc->slot)
2874 return false;
2875 uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2876 if (intr->def.bit_size == 64)
2877 readmask |= readmask << (intr->num_components + c);
2878 /* handle dvec3/dvec4 */
2879 if (num_components + c > 4)
2880 readmask >>= 4;
2881 if ((wc->component_mask & readmask) == readmask)
2882 return false;
2883 uint32_t rewrite_mask = readmask & ~wc->component_mask;
2884 if (!rewrite_mask)
2885 return false;
2886 b->cursor = nir_after_instr(&intr->instr);
2887 nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2888 if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2889 switch (wc->slot) {
2890 case VARYING_SLOT_COL0:
2891 case VARYING_SLOT_COL1:
2892 case VARYING_SLOT_BFC0:
2893 case VARYING_SLOT_BFC1:
2894 /* default color is 0,0,0,1 */
2895 if (intr->def.num_components == 4)
2896 zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2897 break;
2898 default:
2899 break;
2900 }
2901 }
2902 rewrite_mask >>= c;
2903 nir_def *dest = &intr->def;
2904 u_foreach_bit(component, rewrite_mask)
2905 dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2906 nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2907 return true;
2908 }
2909
2910 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2911 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2912 {
2913 struct write_components *wc = data;
2914 bool is_load = false;
2915 bool is_input = false;
2916 bool is_interp = false;
2917 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2918 return false;
2919 if (is_input || is_load)
2920 return false;
2921 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2922 if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2923 return false;
2924 unsigned location = s.location;
2925 unsigned c = nir_intrinsic_component(intr);
2926 uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2927 if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2928 unsigned num_components = intr->num_components * 2;
2929 nir_src *src_offset = nir_get_io_offset_src(intr);
2930 if (nir_src_is_const(*src_offset)) {
2931 if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2932 return false;
2933 }
2934 wrmask |= wrmask << intr->num_components;
2935 /* handle dvec3/dvec4 */
2936 if (num_components + c > 4)
2937 wrmask >>= 4;
2938 }
2939 wc->component_mask |= wrmask;
2940 return false;
2941 }
2942
2943 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2944 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2945 {
2946 uint64_t slot_track[4] = {0};
2947 uint64_t patch_slot_track[4] = {0};
2948 unsigned char slot_map[VARYING_SLOT_MAX];
2949 memset(slot_map, -1, sizeof(slot_map));
2950 unsigned char patch_slot_map[VARYING_SLOT_MAX];
2951 memset(patch_slot_map, -1, sizeof(patch_slot_map));
2952 struct io_slot_map io = {
2953 .patch_slot_track = patch_slot_track,
2954 .slot_track = slot_track,
2955 .slot_map = slot_map,
2956 .patch_slot_map = patch_slot_map,
2957 .reserved = 0,
2958 .patch_reserved = 0,
2959 };
2960 bool do_fixup = false;
2961 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2962 nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2963 if (var) {
2964 bool can_remove = false;
2965 if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2966 /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2967 if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2968 can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2969 else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2970 can_remove = !var->data.explicit_location;
2971 }
2972 /* remove injected pointsize from all but the last vertex stage */
2973 if (can_remove) {
2974 var->data.mode = nir_var_shader_temp;
2975 nir_fixup_deref_modes(producer);
2976 delete_psiz_store(producer, false);
2977 NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2978 optimize_nir(producer, NULL, true);
2979 }
2980 }
2981 if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2982 producer->info.has_transform_feedback_varyings = false;
2983 nir_foreach_shader_out_variable(var_out, producer)
2984 var_out->data.explicit_xfb_buffer = false;
2985 }
2986 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2987 /* never assign from tcs -> tes, always invert */
2988 nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2989 assign_producer_var_io(consumer->info.stage, var_in, &io);
2990 nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2991 if (!assign_consumer_var_io(producer->info.stage, var_out, &io))
2992 /* this is an output, nothing more needs to be done for it to be dropped */
2993 do_fixup = true;
2994 }
2995 } else {
2996 nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2997 assign_producer_var_io(producer->info.stage, var_out, &io);
2998 nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2999 if (!assign_consumer_var_io(consumer->info.stage, var_in, &io)) {
3000 do_fixup = true;
3001 /* input needs to be rewritten */
3002 nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
3003 }
3004 }
3005 if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_compiler_workarounds.needs_sanitised_layer)
3006 do_fixup |= clamp_layer_output(producer, consumer, &io.reserved);
3007 }
3008 nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
3009 if (producer->info.io_lowered && consumer->info.io_lowered) {
3010 u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
3011 struct write_components wc = {slot, 0};
3012 nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
3013 assert(wc.component_mask);
3014 if (wc.component_mask != BITFIELD_MASK(4))
3015 do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
3016 }
3017 }
3018 if (!do_fixup)
3019 return;
3020 nir_fixup_deref_modes(nir);
3021 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3022 optimize_nir(nir, NULL, true);
3023 }
3024
3025 /* all types that hit this function contain something that is 64bit */
3026 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)3027 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
3028 {
3029 if (glsl_type_is_array(type)) {
3030 const struct glsl_type *child = glsl_get_array_element(type);
3031 unsigned elements = glsl_array_size(type);
3032 unsigned stride = glsl_get_explicit_stride(type);
3033 return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
3034 }
3035 /* rewrite structs recursively */
3036 if (glsl_type_is_struct_or_ifc(type)) {
3037 unsigned nmembers = glsl_get_length(type);
3038 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
3039 unsigned xfb_offset = 0;
3040 for (unsigned i = 0; i < nmembers; i++) {
3041 const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
3042 fields[i] = *f;
3043 xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
3044 if (i < nmembers - 1 && xfb_offset % 8 &&
3045 (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
3046 (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
3047 var->data.is_xfb = true;
3048 }
3049 fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
3050 }
3051 return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
3052 }
3053 if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
3054 return type;
3055 if (doubles_only && glsl_type_is_vector_or_scalar(type))
3056 return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
3057 enum glsl_base_type base_type;
3058 switch (glsl_get_base_type(type)) {
3059 case GLSL_TYPE_UINT64:
3060 base_type = GLSL_TYPE_UINT;
3061 break;
3062 case GLSL_TYPE_INT64:
3063 base_type = GLSL_TYPE_INT;
3064 break;
3065 case GLSL_TYPE_DOUBLE:
3066 base_type = GLSL_TYPE_FLOAT;
3067 break;
3068 default:
3069 unreachable("unknown 64-bit vertex attribute format!");
3070 }
3071 if (glsl_type_is_scalar(type))
3072 return glsl_vector_type(base_type, 2);
3073 unsigned num_components;
3074 if (glsl_type_is_matrix(type)) {
3075 /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
3076 unsigned vec_components = glsl_get_vector_elements(type);
3077 if (vec_components == 3)
3078 vec_components = 4;
3079 num_components = vec_components * 2 * glsl_get_matrix_columns(type);
3080 } else {
3081 num_components = glsl_get_vector_elements(type) * 2;
3082 if (num_components <= 4)
3083 return glsl_vector_type(base_type, num_components);
3084 }
3085 /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
3086 struct glsl_struct_field fields[8] = {0};
3087 unsigned remaining = num_components;
3088 unsigned nfields = 0;
3089 for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
3090 assert(i < ARRAY_SIZE(fields));
3091 fields[i].name = "";
3092 fields[i].offset = i * 16;
3093 fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
3094 }
3095 char buf[64];
3096 snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
3097 return glsl_struct_type(fields, nfields, buf, true);
3098 }
3099
3100 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)3101 deref_is_matrix(nir_deref_instr *deref)
3102 {
3103 if (glsl_type_is_matrix(deref->type))
3104 return deref->type;
3105 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3106 if (parent)
3107 return deref_is_matrix(parent);
3108 return NULL;
3109 }
3110
3111 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3112 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
3113 struct hash_table *derefs, struct set *deletes, bool doubles_only)
3114 {
3115 bool func_progress = false;
3116 nir_builder b = nir_builder_create(impl);
3117 nir_foreach_block(block, impl) {
3118 nir_foreach_instr_safe(instr, block) {
3119 switch (instr->type) {
3120 case nir_instr_type_deref: {
3121 nir_deref_instr *deref = nir_instr_as_deref(instr);
3122 if (!(deref->modes & var->data.mode))
3123 continue;
3124 if (nir_deref_instr_get_variable(deref) != var)
3125 continue;
3126
3127 /* matrix types are special: store the original deref type for later use */
3128 const struct glsl_type *matrix = deref_is_matrix(deref);
3129 nir_deref_instr *parent = nir_deref_instr_parent(deref);
3130 if (!matrix) {
3131 /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
3132 hash_table_foreach(derefs, he) {
3133 /* propagate parent matrix type to row deref */
3134 if (he->key == parent)
3135 matrix = he->data;
3136 }
3137 }
3138 if (matrix)
3139 _mesa_hash_table_insert(derefs, deref, (void*)matrix);
3140 if (deref->deref_type == nir_deref_type_var)
3141 deref->type = var->type;
3142 else
3143 deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
3144 }
3145 break;
3146 case nir_instr_type_intrinsic: {
3147 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3148 if (intr->intrinsic != nir_intrinsic_store_deref &&
3149 intr->intrinsic != nir_intrinsic_load_deref)
3150 break;
3151 if (nir_intrinsic_get_var(intr, 0) != var)
3152 break;
3153 if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
3154 (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
3155 break;
3156 b.cursor = nir_before_instr(instr);
3157 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3158 unsigned num_components = intr->num_components * 2;
3159 nir_def *comp[NIR_MAX_VEC_COMPONENTS];
3160 /* this is the stored matrix type from the deref */
3161 struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
3162 const struct glsl_type *matrix = he ? he->data : NULL;
3163 if (doubles_only && !matrix)
3164 break;
3165 func_progress = true;
3166 if (intr->intrinsic == nir_intrinsic_store_deref) {
3167 /* first, unpack the src data to 32bit vec2 components */
3168 for (unsigned i = 0; i < intr->num_components; i++) {
3169 nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
3170 comp[i * 2] = nir_channel(&b, ssa, 0);
3171 comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
3172 }
3173 unsigned wrmask = nir_intrinsic_write_mask(intr);
3174 unsigned mask = 0;
3175 /* expand writemask for doubled components */
3176 for (unsigned i = 0; i < intr->num_components; i++) {
3177 if (wrmask & BITFIELD_BIT(i))
3178 mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
3179 }
3180 if (matrix) {
3181 /* matrix types always come from array (row) derefs */
3182 assert(deref->deref_type == nir_deref_type_array);
3183 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3184 /* let optimization clean up consts later */
3185 nir_def *index = deref->arr.index.ssa;
3186 /* this might be an indirect array index:
3187 * - iterate over matrix columns
3188 * - add if blocks for each column
3189 * - perform the store in the block
3190 */
3191 for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
3192 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3193 unsigned vec_components = glsl_get_vector_elements(matrix);
3194 /* always clamp dvec3 to 4 components */
3195 if (vec_components == 3)
3196 vec_components = 4;
3197 unsigned start_component = idx * vec_components * 2;
3198 /* struct member */
3199 unsigned member = start_component / 4;
3200 /* number of components remaining */
3201 unsigned remaining = num_components;
3202 for (unsigned i = 0; i < num_components; member++) {
3203 if (!(mask & BITFIELD_BIT(i)))
3204 continue;
3205 assert(member < glsl_get_length(var_deref->type));
3206 /* deref the rewritten struct to the appropriate vec4/vec2 */
3207 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3208 unsigned incr = MIN2(remaining, 4);
3209 /* assemble the write component vec */
3210 nir_def *val = nir_vec(&b, &comp[i], incr);
3211 /* use the number of components being written as the writemask */
3212 if (glsl_get_vector_elements(strct->type) > val->num_components)
3213 val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3214 nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3215 remaining -= incr;
3216 i += incr;
3217 }
3218 nir_pop_if(&b, NULL);
3219 }
3220 _mesa_set_add(deletes, &deref->instr);
3221 } else if (num_components <= 4) {
3222 /* simple store case: just write out the components */
3223 nir_def *dest = nir_vec(&b, comp, num_components);
3224 nir_store_deref(&b, deref, dest, mask);
3225 } else {
3226 /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3227 for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3228 if (!(mask & BITFIELD_MASK(4)))
3229 continue;
3230 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3231 nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3232 if (glsl_get_vector_elements(strct->type) > dest->num_components)
3233 dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3234 nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3235 mask >>= 4;
3236 }
3237 }
3238 } else {
3239 nir_def *dest = NULL;
3240 if (matrix) {
3241 /* matrix types always come from array (row) derefs */
3242 assert(deref->deref_type == nir_deref_type_array);
3243 nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3244 /* let optimization clean up consts later */
3245 nir_def *index = deref->arr.index.ssa;
3246 /* this might be an indirect array index:
3247 * - iterate over matrix columns
3248 * - add if blocks for each column
3249 * - phi the loads using the array index
3250 */
3251 unsigned cols = glsl_get_matrix_columns(matrix);
3252 nir_def *dests[4];
3253 for (unsigned idx = 0; idx < cols; idx++) {
3254 /* don't add an if for the final row: this will be handled in the else */
3255 if (idx < cols - 1)
3256 nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3257 unsigned vec_components = glsl_get_vector_elements(matrix);
3258 /* always clamp dvec3 to 4 components */
3259 if (vec_components == 3)
3260 vec_components = 4;
3261 unsigned start_component = idx * vec_components * 2;
3262 /* struct member */
3263 unsigned member = start_component / 4;
3264 /* number of components remaining */
3265 unsigned remaining = num_components;
3266 /* component index */
3267 unsigned comp_idx = 0;
3268 for (unsigned i = 0; i < num_components; member++) {
3269 assert(member < glsl_get_length(var_deref->type));
3270 nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3271 nir_def *load = nir_load_deref(&b, strct);
3272 unsigned incr = MIN2(remaining, 4);
3273 /* repack the loads to 64bit */
3274 for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3275 comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3276 remaining -= incr;
3277 i += incr;
3278 }
3279 dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3280 if (idx < cols - 1)
3281 nir_push_else(&b, NULL);
3282 }
3283 /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3284 for (unsigned idx = cols - 1; idx >= 1; idx--) {
3285 nir_pop_if(&b, NULL);
3286 dest = nir_if_phi(&b, dests[idx - 1], dest);
3287 }
3288 _mesa_set_add(deletes, &deref->instr);
3289 } else if (num_components <= 4) {
3290 /* simple load case */
3291 nir_def *load = nir_load_deref(&b, deref);
3292 /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3293 for (unsigned i = 0; i < intr->num_components; i++) {
3294 comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3295 }
3296 dest = nir_vec(&b, comp, intr->num_components);
3297 } else {
3298 /* writing > 4 components: access the struct and load the appropriate vec4 members */
3299 for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3300 nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3301 nir_def *load = nir_load_deref(&b, strct);
3302 comp[i * 2] = nir_pack_64_2x32(&b,
3303 nir_trim_vector(&b, load, 2));
3304 if (num_components > 2)
3305 comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3306 }
3307 dest = nir_vec(&b, comp, intr->num_components);
3308 }
3309 nir_def_rewrite_uses_after(&intr->def, dest, instr);
3310 }
3311 _mesa_set_add(deletes, instr);
3312 break;
3313 }
3314 break;
3315 default: break;
3316 }
3317 }
3318 }
3319 if (func_progress)
3320 nir_metadata_preserve(impl, nir_metadata_none);
3321 /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3322 set_foreach_remove(deletes, he)
3323 nir_instr_remove((void*)he->key);
3324 return func_progress;
3325 }
3326
3327 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3328 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3329 struct set *deletes, bool doubles_only)
3330 {
3331 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3332 return false;
3333 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3334 /* once type is rewritten, rewrite all loads and stores */
3335 nir_foreach_function_impl(impl, shader)
3336 lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3337 return true;
3338 }
3339
3340 /* rewrite all input/output variables using 32bit types and load/stores */
3341 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3342 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3343 {
3344 bool progress = false;
3345 struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3346 struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3347 nir_foreach_function_impl(impl, shader) {
3348 nir_foreach_function_temp_variable(var, impl) {
3349 if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3350 continue;
3351 var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3352 progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3353 }
3354 }
3355 ralloc_free(deletes);
3356 ralloc_free(derefs);
3357 if (progress) {
3358 nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3359 nir_lower_phis_to_scalar(shader, false);
3360 optimize_nir(shader, NULL, true);
3361 }
3362 return progress;
3363 }
3364
3365 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3366 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3367 {
3368 FILE *fp = fopen(file, "wb");
3369 if (fp) {
3370 fwrite(words, 1, size, fp);
3371 fclose(fp);
3372 fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3373 }
3374 }
3375
3376 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3377 zink_get_next_stage(gl_shader_stage stage)
3378 {
3379 switch (stage) {
3380 case MESA_SHADER_VERTEX:
3381 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3382 VK_SHADER_STAGE_GEOMETRY_BIT |
3383 VK_SHADER_STAGE_FRAGMENT_BIT;
3384 case MESA_SHADER_TESS_CTRL:
3385 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3386 case MESA_SHADER_TESS_EVAL:
3387 return VK_SHADER_STAGE_GEOMETRY_BIT |
3388 VK_SHADER_STAGE_FRAGMENT_BIT;
3389 case MESA_SHADER_GEOMETRY:
3390 return VK_SHADER_STAGE_FRAGMENT_BIT;
3391 case MESA_SHADER_FRAGMENT:
3392 case MESA_SHADER_COMPUTE:
3393 case MESA_SHADER_KERNEL:
3394 return 0;
3395 default:
3396 unreachable("invalid shader stage");
3397 }
3398 }
3399
3400 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3401 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3402 {
3403 VkShaderModuleCreateInfo smci = {0};
3404 VkShaderCreateInfoEXT sci = {0};
3405
3406 if (!spirv)
3407 spirv = zs->spirv;
3408
3409 if (zink_debug & ZINK_DEBUG_SPIRV) {
3410 char buf[256];
3411 static int i;
3412 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3413 zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3414 }
3415
3416 sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3417 sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3418 sci.nextStage = zink_get_next_stage(zs->info.stage);
3419 sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3420 sci.codeSize = spirv->num_words * sizeof(uint32_t);
3421 sci.pCode = spirv->words;
3422 sci.pName = "main";
3423 VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3424 if (pg) {
3425 sci.setLayoutCount = pg->num_dsl;
3426 sci.pSetLayouts = pg->dsl;
3427 } else {
3428 sci.setLayoutCount = zs->info.stage + 1;
3429 dsl[zs->info.stage] = zs->precompile.dsl;;
3430 sci.pSetLayouts = dsl;
3431 }
3432 VkPushConstantRange pcr;
3433 pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3434 pcr.offset = 0;
3435 pcr.size = sizeof(struct zink_gfx_push_constant);
3436 sci.pushConstantRangeCount = 1;
3437 sci.pPushConstantRanges = &pcr;
3438
3439 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3440 smci.codeSize = spirv->num_words * sizeof(uint32_t);
3441 smci.pCode = spirv->words;
3442
3443 #ifndef NDEBUG
3444 if (zink_debug & ZINK_DEBUG_VALIDATION) {
3445 static const struct spirv_to_nir_options spirv_options = {
3446 .environment = NIR_SPIRV_VULKAN,
3447 .capabilities = NULL,
3448 .ubo_addr_format = nir_address_format_32bit_index_offset,
3449 .ssbo_addr_format = nir_address_format_32bit_index_offset,
3450 .phys_ssbo_addr_format = nir_address_format_64bit_global,
3451 .push_const_addr_format = nir_address_format_logical,
3452 .shared_addr_format = nir_address_format_32bit_offset,
3453 };
3454 uint32_t num_spec_entries = 0;
3455 struct nir_spirv_specialization *spec_entries = NULL;
3456 VkSpecializationInfo sinfo = {0};
3457 VkSpecializationMapEntry me[3];
3458 uint32_t size[3] = {1,1,1};
3459 if (!zs->info.workgroup_size[0]) {
3460 sinfo.mapEntryCount = 3;
3461 sinfo.pMapEntries = &me[0];
3462 sinfo.dataSize = sizeof(uint32_t) * 3;
3463 sinfo.pData = size;
3464 uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3465 for (int i = 0; i < 3; i++) {
3466 me[i].size = sizeof(uint32_t);
3467 me[i].constantID = ids[i];
3468 me[i].offset = i * sizeof(uint32_t);
3469 }
3470 spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3471 }
3472 nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3473 spec_entries, num_spec_entries,
3474 clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3475 assert(nir);
3476 ralloc_free(nir);
3477 free(spec_entries);
3478 }
3479 #endif
3480
3481 VkResult ret;
3482 struct zink_shader_object obj = {0};
3483 if (!can_shobj || !screen->info.have_EXT_shader_object)
3484 ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3485 else
3486 ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3487 ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3488 assert(success);
3489 return obj;
3490 }
3491
3492 static void
prune_io(nir_shader * nir)3493 prune_io(nir_shader *nir)
3494 {
3495 nir_foreach_shader_in_variable_safe(var, nir) {
3496 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3497 var->data.mode = nir_var_shader_temp;
3498 }
3499 nir_foreach_shader_out_variable_safe(var, nir) {
3500 if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3501 var->data.mode = nir_var_shader_temp;
3502 }
3503 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3504 }
3505
3506 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3507 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3508 {
3509 assert(var->data.driver_location < 32); //bitfield size for tracking
3510 zs->fs.legacy_shadow_mask |= BITFIELD_BIT(var->data.driver_location);
3511 }
3512
3513 static void
flag_shadow_tex_instr(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3514 flag_shadow_tex_instr(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3515 {
3516 assert(var);
3517 unsigned num_components = tex->def.num_components;
3518 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3519 if (rewrite_depth && nir_def_components_read( &tex->def) & ~1) {
3520 /* this needs recompiles */
3521 if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3522 flag_shadow_tex(var, zs);
3523 else
3524 mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3525 }
3526 }
3527
3528 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3529 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3530 {
3531 assert(var);
3532 const struct glsl_type *type = glsl_without_array(var->type);
3533 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3534 bool is_int = glsl_base_type_is_integer(ret_type);
3535 unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3536 unsigned dest_size = tex->def.bit_size;
3537 b->cursor = nir_after_instr(&tex->instr);
3538 unsigned num_components = tex->def.num_components;
3539 bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3540 if (bit_size == dest_size && !rewrite_depth)
3541 return NULL;
3542 nir_def *dest = &tex->def;
3543 if (rewrite_depth && zs) {
3544 if (nir_def_components_read(dest) & ~1) {
3545 /* handled above */
3546 return NULL;
3547 }
3548 /* If only .x is used in the NIR, then it's effectively not a legacy depth
3549 * sample anyway and we don't want to ask for shader recompiles. This is
3550 * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3551 * LUMINANCE, so apps just use the first channel.
3552 */
3553 tex->def.num_components = 1;
3554 tex->is_new_style_shadow = true;
3555 }
3556 if (bit_size != dest_size) {
3557 tex->def.bit_size = bit_size;
3558 tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3559
3560 if (is_int) {
3561 if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3562 dest = nir_u2uN(b, &tex->def, dest_size);
3563 else
3564 dest = nir_i2iN(b, &tex->def, dest_size);
3565 } else {
3566 dest = nir_f2fN(b, &tex->def, dest_size);
3567 }
3568 if (!rewrite_depth)
3569 nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3570 }
3571 return dest;
3572 }
3573
3574 struct lower_zs_swizzle_state {
3575 bool shadow_only;
3576 unsigned base_sampler_id;
3577 const struct zink_zs_swizzle_key *swizzle;
3578 };
3579
3580 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3581 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3582 {
3583 struct lower_zs_swizzle_state *state = data;
3584 const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3585 assert(state->shadow_only || swizzle_key);
3586 if (instr->type != nir_instr_type_tex)
3587 return false;
3588 nir_tex_instr *tex = nir_instr_as_tex(instr);
3589 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3590 (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3591 return false;
3592 if (tex->is_shadow && tex->op == nir_texop_tg4)
3593 /* Will not even try to emulate the shadow comparison */
3594 return false;
3595 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3596 nir_variable *var = NULL;
3597 if (handle != -1)
3598 /* gtfo bindless depth texture mode */
3599 return false;
3600 var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
3601 assert(var);
3602 uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3603 const struct glsl_type *type = glsl_without_array(var->type);
3604 enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3605 bool is_int = glsl_base_type_is_integer(ret_type);
3606 unsigned num_components = tex->def.num_components;
3607 if (tex->is_shadow)
3608 tex->is_new_style_shadow = true;
3609 nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3610 assert(dest || !state->shadow_only);
3611 if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3612 return false;
3613 else if (!dest)
3614 dest = &tex->def;
3615 else
3616 tex->def.num_components = 1;
3617 if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3618 /* these require manual swizzles */
3619 if (tex->op == nir_texop_tg4) {
3620 assert(!tex->is_shadow);
3621 nir_def *swizzle;
3622 switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3623 case PIPE_SWIZZLE_0:
3624 swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3625 break;
3626 case PIPE_SWIZZLE_1:
3627 if (is_int)
3628 swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3629 else
3630 swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3631 break;
3632 default:
3633 if (!tex->component)
3634 return false;
3635 tex->component = 0;
3636 return true;
3637 }
3638 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3639 return true;
3640 }
3641 nir_def *vec[4];
3642 for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3643 switch (swizzle_key->swizzle[sampler_id].s[i]) {
3644 case PIPE_SWIZZLE_0:
3645 vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3646 break;
3647 case PIPE_SWIZZLE_1:
3648 if (is_int)
3649 vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3650 else
3651 vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3652 break;
3653 default:
3654 vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3655 break;
3656 }
3657 }
3658 nir_def *swizzle = nir_vec(b, vec, num_components);
3659 nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3660 } else {
3661 assert(tex->is_shadow);
3662 nir_def *vec[4] = {dest, dest, dest, dest};
3663 nir_def *splat = nir_vec(b, vec, num_components);
3664 nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3665 }
3666 return true;
3667 }
3668
3669 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3670 *
3671 * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3672 * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3673 * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3674 * shader to expand out to vec4. Since this depends on sampler state, it's a
3675 * draw-time shader recompile to do so.
3676 *
3677 * We may also need to apply shader swizzles for
3678 * driver_compiler_workarounds.needs_zs_shader_swizzle.
3679 */
3680 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3681 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3682 {
3683 /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3684 unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3685 struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3686 return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr,
3687 nir_metadata_control_flow,
3688 (void*)&state);
3689 }
3690
3691 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3692 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3693 void *data)
3694 {
3695 if (intr->intrinsic != nir_intrinsic_load_point_coord)
3696 return false;
3697 b->cursor = nir_after_instr(&intr->instr);
3698 nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3699 nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3700 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3701 return true;
3702 }
3703
3704 static bool
invert_point_coord(nir_shader * nir)3705 invert_point_coord(nir_shader *nir)
3706 {
3707 if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3708 return false;
3709 return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3710 nir_metadata_dominance, NULL);
3711 }
3712
3713 static bool
lower_sparse_instr(nir_builder * b,nir_instr * instr,void * data)3714 lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data)
3715 {
3716 b->cursor = nir_after_instr(instr);
3717
3718 switch (instr->type) {
3719 case nir_instr_type_tex: {
3720 nir_tex_instr *tex = nir_instr_as_tex(instr);
3721 if (!tex->is_sparse)
3722 return false;
3723
3724 nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def));
3725 nir_def *vec = nir_vector_insert_imm(b, &tex->def, res,
3726 tex->def.num_components - 1);
3727 nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr);
3728 return true;
3729 }
3730
3731 case nir_instr_type_intrinsic: {
3732 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3733 switch (intrin->intrinsic) {
3734 case nir_intrinsic_image_deref_sparse_load: {
3735 nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def));
3736 nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4);
3737 nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
3738 return true;
3739 }
3740
3741 case nir_intrinsic_sparse_residency_code_and: {
3742 nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa);
3743 nir_def_rewrite_uses(&intrin->def, res);
3744 return true;
3745 }
3746
3747 case nir_intrinsic_is_sparse_texels_resident: {
3748 nir_def *res = nir_i2b(b, intrin->src[0].ssa);
3749 nir_def_rewrite_uses(&intrin->def, res);
3750 return true;
3751 }
3752
3753 default:
3754 return false;
3755 }
3756 }
3757
3758 default:
3759 return false;
3760 }
3761 }
3762
3763 static bool
lower_sparse(nir_shader * shader)3764 lower_sparse(nir_shader *shader)
3765 {
3766 return nir_shader_instructions_pass(shader, lower_sparse_instr,
3767 nir_metadata_dominance, NULL);
3768 }
3769
3770 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3771 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3772 {
3773 bool is_load = false;
3774 bool is_input = false;
3775 bool is_interp = false;
3776 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3777 return false;
3778 bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) ||
3779 (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input);
3780 unsigned loc = nir_intrinsic_io_semantics(intr).location;
3781 nir_src *src_offset = nir_get_io_offset_src(intr);
3782 const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3783 unsigned location = loc + slot_offset;
3784 unsigned frac = nir_intrinsic_component(intr);
3785 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3786 /* set c aligned/rounded down to dword */
3787 unsigned c = frac;
3788 if (frac && bit_size < 32)
3789 c = frac * bit_size / 32;
3790 /* loop over all the variables and rewrite corresponding access */
3791 nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3792 const struct glsl_type *type = var->type;
3793 if (nir_is_arrayed_io(var, b->shader->info.stage))
3794 type = glsl_get_array_element(type);
3795 unsigned slot_count = get_var_slot_count(b->shader, var);
3796 /* filter access that isn't specific to this variable */
3797 if (var->data.location > location || var->data.location + slot_count <= location)
3798 continue;
3799 if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3800 continue;
3801 if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3802 continue;
3803
3804 unsigned size = 0;
3805 bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3806 if (is_struct)
3807 size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3808 else if (!is_special_io && var->data.compact)
3809 size = glsl_get_aoa_size(type);
3810 else
3811 size = glsl_get_vector_elements(glsl_without_array(type));
3812 assert(size);
3813 if (glsl_type_is_64bit(glsl_without_array(var->type)))
3814 size *= 2;
3815 if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3816 /* adjust for dvec3-type slot overflow */
3817 assert(location > var->data.location);
3818 size -= (location - var->data.location) * 4;
3819 }
3820 assert(size);
3821 if (var->data.location_frac + size <= c || var->data.location_frac > c)
3822 continue;
3823
3824 b->cursor = nir_before_instr(&intr->instr);
3825 nir_deref_instr *deref = nir_build_deref_var(b, var);
3826 if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3827 assert(intr->intrinsic != nir_intrinsic_store_output);
3828 deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3829 }
3830 if (glsl_type_is_array(type)) {
3831 /* unroll array derefs */
3832 unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0;
3833 assert(src_offset);
3834 if (var->data.location < VARYING_SLOT_VAR0) {
3835 if (src_offset) {
3836 /* clip/cull dist and tess levels use different array offset semantics */
3837 bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3838 is_clipcull_dist(var->data.location);
3839 bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3840 var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3841 bool is_builtin_array = is_clipdist || is_tess_level;
3842 /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3843 if (nir_src_is_const(*src_offset)) {
3844 unsigned offset = slot_offset;
3845 if (is_builtin_array)
3846 offset *= 4;
3847 if (is_clipdist) {
3848 if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1)
3849 offset += 4;
3850 }
3851 deref = nir_build_deref_array_imm(b, deref, offset + idx);
3852 } else {
3853 nir_def *offset = src_offset->ssa;
3854 if (is_builtin_array)
3855 nir_imul_imm(b, offset, 4);
3856 deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3857 }
3858 } else {
3859 deref = nir_build_deref_array_imm(b, deref, idx);
3860 }
3861 type = glsl_get_array_element(type);
3862 } else {
3863 idx += location - var->data.location;
3864 /* need to convert possible N*M to [N][M] */
3865 nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3866 while (glsl_type_is_array(type)) {
3867 const struct glsl_type *elem = glsl_get_array_element(type);
3868 unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3869 nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3870 if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3871 n = nir_udiv_imm(b, n, 2);
3872 deref = nir_build_deref_array(b, deref, n);
3873 nm = nir_umod_imm(b, nm, type_size);
3874 type = glsl_get_array_element(type);
3875 }
3876 }
3877 } else if (glsl_type_is_struct(type)) {
3878 deref = nir_build_deref_struct(b, deref, slot_offset);
3879 }
3880 assert(!glsl_type_is_array(type));
3881 unsigned num_components = glsl_get_vector_elements(type);
3882 if (is_load) {
3883 nir_def *load;
3884 if (is_interp) {
3885 nir_def *interp = intr->src[0].ssa;
3886 nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3887 assert(interp_intr);
3888 var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3889 switch (interp_intr->intrinsic) {
3890 case nir_intrinsic_load_barycentric_centroid:
3891 load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def);
3892 break;
3893 case nir_intrinsic_load_barycentric_sample:
3894 var->data.sample = 1;
3895 load = nir_load_deref(b, deref);
3896 break;
3897 case nir_intrinsic_load_barycentric_pixel:
3898 load = nir_load_deref(b, deref);
3899 break;
3900 case nir_intrinsic_load_barycentric_at_sample:
3901 load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3902 break;
3903 case nir_intrinsic_load_barycentric_at_offset:
3904 load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3905 break;
3906 default:
3907 unreachable("unhandled interp!");
3908 }
3909 } else {
3910 load = nir_load_deref(b, deref);
3911 }
3912 /* filter needed components */
3913 if (intr->num_components < load->num_components)
3914 load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3915 nir_def_rewrite_uses(&intr->def, load);
3916 } else {
3917 nir_def *store = intr->src[0].ssa;
3918 /* pad/filter components to match deref type */
3919 if (intr->num_components < num_components) {
3920 nir_def *zero = nir_imm_zero(b, 1, bit_size);
3921 nir_def *vec[4] = {zero, zero, zero, zero};
3922 u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3923 vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3924 store = nir_vec(b, vec, num_components);
3925 } if (store->num_components > num_components) {
3926 store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3927 }
3928 if (store->bit_size != glsl_get_bit_size(type)) {
3929 /* this should be some weird bindless io conversion */
3930 assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3931 assert(num_components != store->num_components);
3932 store = nir_unpack_64_2x32(b, store);
3933 }
3934 nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3935 }
3936 nir_instr_remove(&intr->instr);
3937 return true;
3938 }
3939 unreachable("failed to find variable for explicit io!");
3940 return true;
3941 }
3942
3943 static bool
add_derefs(nir_shader * nir)3944 add_derefs(nir_shader *nir)
3945 {
3946 return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3947 nir_metadata_dominance, NULL);
3948 }
3949
3950 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3951 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3952 {
3953 struct zink_shader_info *sinfo = &zs->sinfo;
3954 prune_io(nir);
3955
3956 switch (nir->info.stage) {
3957 case MESA_SHADER_VERTEX:
3958 case MESA_SHADER_TESS_EVAL:
3959 case MESA_SHADER_GEOMETRY:
3960 NIR_PASS_V(nir, nir_divergence_analysis);
3961 break;
3962 default: break;
3963 }
3964 NIR_PASS_V(nir, nir_convert_from_ssa, true);
3965
3966 if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3967 nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3968 if (zink_debug & ZINK_DEBUG_NIR) {
3969 fprintf(stderr, "NIR shader:\n---8<---\n");
3970 nir_print_shader(nir, stderr);
3971 fprintf(stderr, "---8<---\n");
3972 }
3973
3974 struct zink_shader_object obj = {0};
3975 struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen);
3976 if (spirv)
3977 obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3978
3979 /* TODO: determine if there's any reason to cache spirv output? */
3980 if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3981 zs->spirv = spirv;
3982 else
3983 obj.spirv = spirv;
3984 return obj;
3985 }
3986
3987 static bool
remove_interpolate_at_sample(struct nir_builder * b,nir_intrinsic_instr * interp,void * data)3988 remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data)
3989 {
3990 if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample)
3991 return false;
3992
3993 b->cursor = nir_before_instr(&interp->instr);
3994 nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0]));
3995 nir_def_rewrite_uses(&interp->def, res);
3996
3997 return true;
3998 }
3999
4000 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)4001 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
4002 nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
4003 {
4004 bool need_optimize = true;
4005 bool inlined_uniforms = false;
4006
4007 NIR_PASS_V(nir, add_derefs);
4008 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4009 if (key) {
4010 if (key->inline_uniforms) {
4011 NIR_PASS_V(nir, nir_inline_uniforms,
4012 nir->info.num_inlinable_uniforms,
4013 key->base.inlined_uniform_values,
4014 nir->info.inlinable_uniform_dw_offsets);
4015
4016 inlined_uniforms = true;
4017 }
4018
4019 /* TODO: use a separate mem ctx here for ralloc */
4020
4021 if (!screen->optimal_keys) {
4022 switch (zs->info.stage) {
4023 case MESA_SHADER_VERTEX: {
4024 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
4025 const struct zink_vs_key *vs_key = zink_vs_key(key);
4026 switch (vs_key->size) {
4027 case 4:
4028 decomposed_attrs = vs_key->u32.decomposed_attrs;
4029 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
4030 break;
4031 case 2:
4032 decomposed_attrs = vs_key->u16.decomposed_attrs;
4033 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
4034 break;
4035 case 1:
4036 decomposed_attrs = vs_key->u8.decomposed_attrs;
4037 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
4038 break;
4039 default: break;
4040 }
4041 if (decomposed_attrs || decomposed_attrs_without_w)
4042 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
4043 break;
4044 }
4045
4046 case MESA_SHADER_GEOMETRY:
4047 if (zink_gs_key(key)->lower_line_stipple) {
4048 NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
4049 NIR_PASS_V(nir, nir_lower_var_copies);
4050 need_optimize = true;
4051 }
4052
4053 if (zink_gs_key(key)->lower_line_smooth) {
4054 NIR_PASS_V(nir, lower_line_smooth_gs);
4055 NIR_PASS_V(nir, nir_lower_var_copies);
4056 need_optimize = true;
4057 }
4058
4059 if (zink_gs_key(key)->lower_gl_point) {
4060 NIR_PASS_V(nir, lower_gl_point_gs);
4061 need_optimize = true;
4062 }
4063
4064 if (zink_gs_key(key)->lower_pv_mode) {
4065 NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
4066 need_optimize = true; //TODO verify that this is required
4067 }
4068 break;
4069
4070 default:
4071 break;
4072 }
4073 }
4074
4075 switch (zs->info.stage) {
4076 case MESA_SHADER_VERTEX:
4077 case MESA_SHADER_TESS_EVAL:
4078 case MESA_SHADER_GEOMETRY:
4079 if (zink_vs_key_base(key)->last_vertex_stage) {
4080 if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
4081 NIR_PASS_V(nir, nir_lower_clip_halfz);
4082 }
4083 if (zink_vs_key_base(key)->push_drawid) {
4084 NIR_PASS_V(nir, lower_drawid);
4085 }
4086 } else {
4087 nir->xfb_info = NULL;
4088 }
4089 if (zink_vs_key_base(key)->robust_access)
4090 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4091 break;
4092 case MESA_SHADER_FRAGMENT:
4093 if (zink_fs_key(key)->lower_line_smooth) {
4094 NIR_PASS_V(nir, lower_line_smooth_fs,
4095 zink_fs_key(key)->lower_line_stipple);
4096 need_optimize = true;
4097 } else if (zink_fs_key(key)->lower_line_stipple)
4098 NIR_PASS_V(nir, lower_line_stipple_fs);
4099
4100 if (zink_fs_key(key)->lower_point_smooth) {
4101 NIR_PASS_V(nir, nir_lower_point_smooth);
4102 NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
4103 nir->info.fs.uses_discard = true;
4104 need_optimize = true;
4105 }
4106
4107 if (zink_fs_key(key)->robust_access)
4108 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4109
4110 if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) {
4111 /* VK will always use gl_SampleMask[] values even if sample count is 0,
4112 * so we need to skip this write here to mimic GL's behavior of ignoring it
4113 */
4114 nir_foreach_shader_out_variable(var, nir) {
4115 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
4116 var->data.mode = nir_var_shader_temp;
4117 }
4118 nir_fixup_deref_modes(nir);
4119 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4120 NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample,
4121 nir_metadata_control_flow, NULL);
4122
4123 need_optimize = true;
4124 }
4125 if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
4126 NIR_PASS_V(nir, lower_dual_blend);
4127 }
4128 if (zink_fs_key_base(key)->coord_replace_bits)
4129 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
4130 if (zink_fs_key_base(key)->point_coord_yinvert)
4131 NIR_PASS_V(nir, invert_point_coord);
4132 if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
4133 nir_foreach_shader_in_variable(var, nir)
4134 var->data.sample = true;
4135 nir->info.fs.uses_sample_qualifier = true;
4136 nir->info.fs.uses_sample_shading = true;
4137 }
4138 if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
4139 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
4140 if (nir->info.fs.uses_fbfetch_output) {
4141 nir_variable *fbfetch = NULL;
4142 NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
4143 /* old variable must be deleted to avoid spirv errors */
4144 fbfetch->data.mode = nir_var_shader_temp;
4145 nir_fixup_deref_modes(nir);
4146 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4147 need_optimize = true;
4148 }
4149 nir_foreach_shader_in_variable_safe(var, nir) {
4150 if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
4151 continue;
4152 nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
4153 var->data.mode = nir_var_shader_temp;
4154 nir_fixup_deref_modes(nir);
4155 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4156 need_optimize = true;
4157 }
4158 break;
4159 case MESA_SHADER_COMPUTE:
4160 if (zink_cs_key(key)->robust_access)
4161 NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4162 break;
4163 default: break;
4164 }
4165 if (key->base.needs_zs_shader_swizzle) {
4166 assert(extra_data);
4167 NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4168 }
4169 if (key->base.nonseamless_cube_mask) {
4170 NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4171 need_optimize = true;
4172 }
4173 }
4174 if (screen->driconf.inline_uniforms) {
4175 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4176 NIR_PASS_V(nir, rewrite_bo_access, screen);
4177 NIR_PASS_V(nir, remove_bo_access, zs);
4178 need_optimize = true;
4179 }
4180 if (inlined_uniforms) {
4181 optimize_nir(nir, zs, true);
4182
4183 /* This must be done again. */
4184 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4185 nir_var_shader_out);
4186
4187 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4188 if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4189 zs->can_inline = false;
4190 } else if (need_optimize)
4191 optimize_nir(nir, zs, true);
4192 bool has_sparse = false;
4193 NIR_PASS(has_sparse, nir, lower_sparse);
4194 if (has_sparse)
4195 optimize_nir(nir, zs, false);
4196
4197 struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4198 ralloc_free(nir);
4199 return obj;
4200 }
4201
4202 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4203 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4204 {
4205 nir_shader *nir = zs->nir;
4206 /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4207 int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4208 if (screen->info.have_EXT_shader_object)
4209 set = zs->info.stage;
4210 unsigned offsets[4];
4211 zink_descriptor_shader_get_binding_offsets(zs, offsets);
4212 nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4213 if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4214 continue;
4215 var->data.descriptor_set = set;
4216 switch (var->data.mode) {
4217 case nir_var_mem_ubo:
4218 var->data.binding = !!var->data.driver_location;
4219 break;
4220 case nir_var_uniform:
4221 if (glsl_type_is_sampler(glsl_without_array(var->type)))
4222 var->data.binding += offsets[1];
4223 break;
4224 case nir_var_mem_ssbo:
4225 var->data.binding += offsets[2];
4226 break;
4227 case nir_var_image:
4228 var->data.binding += offsets[3];
4229 break;
4230 default: break;
4231 }
4232 }
4233 NIR_PASS_V(nir, add_derefs);
4234 NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4235 if (screen->driconf.inline_uniforms) {
4236 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4237 NIR_PASS_V(nir, rewrite_bo_access, screen);
4238 NIR_PASS_V(nir, remove_bo_access, zs);
4239 }
4240 optimize_nir(nir, zs, true);
4241 zink_descriptor_shader_init(screen, zs);
4242 nir_shader *nir_clone = NULL;
4243 if (screen->info.have_EXT_shader_object)
4244 nir_clone = nir_shader_clone(nir, nir);
4245 struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4246 if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4247 /* always try to pre-generate a tcs in case it's needed */
4248 if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4249 nir_shader *nir_tcs = NULL;
4250 /* use max pcp for compat */
4251 zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, 32);
4252 zink_shader_tcs_init(screen, zs->non_fs.generated_tcs, nir_clone, &nir_tcs);
4253 nir_tcs->info.separate_shader = true;
4254 zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4255 ralloc_free(nir_tcs);
4256 zs->non_fs.generated_tcs->nir = NULL;
4257 }
4258 }
4259 spirv_shader_delete(obj.spirv);
4260 obj.spirv = NULL;
4261 return obj;
4262 }
4263
4264 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4265 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4266 void *data)
4267 {
4268 if (intr->intrinsic != nir_intrinsic_load_instance_id)
4269 return false;
4270 b->cursor = nir_after_instr(&intr->instr);
4271 nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4272 nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4273 return true;
4274 }
4275
4276 static bool
lower_baseinstance(nir_shader * shader)4277 lower_baseinstance(nir_shader *shader)
4278 {
4279 if (shader->info.stage != MESA_SHADER_VERTEX)
4280 return false;
4281 return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4282 nir_metadata_dominance, NULL);
4283 }
4284
4285 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4286 * so instead we delete all those broken variables and just make new ones
4287 */
4288 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4289 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4290 {
4291 uint64_t max_ssbo_size = 0;
4292 uint64_t max_ubo_size = 0;
4293 uint64_t max_uniform_size = 0;
4294
4295 if (!shader->info.num_ssbos && !shader->info.num_ubos)
4296 return false;
4297
4298 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4299 const struct glsl_type *type = glsl_without_array(var->type);
4300 if (type_is_counter(type))
4301 continue;
4302 /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4303 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4304 const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4305 if (interface_type) {
4306 unsigned block_size = glsl_get_explicit_size(interface_type, true);
4307 if (glsl_get_length(interface_type) == 1) {
4308 /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4309 const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4310 if (glsl_type_is_array(f) && !glsl_array_size(f))
4311 block_size = 0;
4312 }
4313 if (block_size) {
4314 block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4315 size = MAX2(size, block_size);
4316 }
4317 }
4318 if (var->data.mode == nir_var_mem_ubo) {
4319 if (var->data.driver_location)
4320 max_ubo_size = MAX2(max_ubo_size, size);
4321 else
4322 max_uniform_size = MAX2(max_uniform_size, size);
4323 } else {
4324 max_ssbo_size = MAX2(max_ssbo_size, size);
4325 if (interface_type) {
4326 if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4327 needs_size = true;
4328 }
4329 }
4330 var->data.mode = nir_var_shader_temp;
4331 }
4332 nir_fixup_deref_modes(shader);
4333 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4334 optimize_nir(shader, NULL, true);
4335
4336 struct glsl_struct_field field = {0};
4337 field.name = ralloc_strdup(shader, "base");
4338 if (shader->info.num_ubos) {
4339 if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4340 field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4341 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4342 glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4343 "uniform_0@32");
4344 var->interface_type = var->type;
4345 var->data.mode = nir_var_mem_ubo;
4346 var->data.driver_location = 0;
4347 }
4348
4349 unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4350 uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4351 if (num_ubos && ubos_used) {
4352 field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4353 /* shrink array as much as possible */
4354 unsigned first_ubo = ffs(ubos_used) - 2;
4355 assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4356 num_ubos -= first_ubo;
4357 assert(num_ubos);
4358 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4359 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4360 "ubos@32");
4361 var->interface_type = var->type;
4362 var->data.mode = nir_var_mem_ubo;
4363 var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4364 }
4365 }
4366 if (shader->info.num_ssbos && zs->ssbos_used) {
4367 /* shrink array as much as possible */
4368 unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4369 assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4370 unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4371 assert(num_ssbos);
4372 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4373 field.type = ssbo_type;
4374 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4375 glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4376 "ssbos@32");
4377 var->interface_type = var->type;
4378 var->data.mode = nir_var_mem_ssbo;
4379 var->data.driver_location = first_ssbo;
4380 }
4381 return true;
4382 }
4383
4384 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4385 get_src_mask_ssbo(unsigned total, nir_src src)
4386 {
4387 if (nir_src_is_const(src))
4388 return BITFIELD_BIT(nir_src_as_uint(src));
4389 return BITFIELD_MASK(total);
4390 }
4391
4392 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4393 get_src_mask_ubo(unsigned total, nir_src src)
4394 {
4395 if (nir_src_is_const(src))
4396 return BITFIELD_BIT(nir_src_as_uint(src));
4397 return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4398 }
4399
4400 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4401 analyze_io(struct zink_shader *zs, nir_shader *shader)
4402 {
4403 bool ret = false;
4404 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4405 nir_foreach_block(block, impl) {
4406 nir_foreach_instr(instr, block) {
4407 if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4408 /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4409 nir_tex_instr *tex = nir_instr_as_tex(instr);
4410 int deref_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
4411 if (deref_idx >= 0) {
4412 nir_variable *img = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[deref_idx].src.ssa->parent_instr));
4413 unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4414 BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4415 }
4416 continue;
4417 }
4418 if (instr->type != nir_instr_type_intrinsic)
4419 continue;
4420
4421 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4422 switch (intrin->intrinsic) {
4423 case nir_intrinsic_store_ssbo:
4424 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4425 break;
4426
4427 case nir_intrinsic_get_ssbo_size: {
4428 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4429 ret = true;
4430 break;
4431 }
4432 case nir_intrinsic_ssbo_atomic:
4433 case nir_intrinsic_ssbo_atomic_swap:
4434 case nir_intrinsic_load_ssbo:
4435 zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4436 break;
4437 case nir_intrinsic_load_ubo:
4438 case nir_intrinsic_load_ubo_vec4:
4439 zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4440 break;
4441 default:
4442 break;
4443 }
4444 }
4445 }
4446 return ret;
4447 }
4448
4449 struct zink_bindless_info {
4450 nir_variable *bindless[4];
4451 unsigned bindless_set;
4452 };
4453
4454 /* this is a "default" bindless texture used if the shader has no texture variables */
4455 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4456 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4457 {
4458 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4459 nir_variable *var;
4460
4461 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4462 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4463 var->data.descriptor_set = descriptor_set;
4464 var->data.driver_location = var->data.binding = binding;
4465 return var;
4466 }
4467
4468 /* this is a "default" bindless image used if the shader has no image variables */
4469 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4470 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4471 {
4472 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4473 nir_variable *var;
4474
4475 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4476 var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4477 var->data.descriptor_set = descriptor_set;
4478 var->data.driver_location = var->data.binding = binding;
4479 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4480 return var;
4481 }
4482
4483 /* rewrite bindless instructions as array deref instructions */
4484 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4485 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4486 {
4487 struct zink_bindless_info *bindless = data;
4488
4489 if (in->type == nir_instr_type_tex) {
4490 nir_tex_instr *tex = nir_instr_as_tex(in);
4491 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4492 if (idx == -1)
4493 return false;
4494
4495 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4496 if (!var) {
4497 var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4498 if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4499 bindless->bindless[1] = var;
4500 else
4501 bindless->bindless[0] = var;
4502 }
4503 b->cursor = nir_before_instr(in);
4504 nir_deref_instr *deref = nir_build_deref_var(b, var);
4505 if (glsl_type_is_array(var->type))
4506 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4507 nir_src_rewrite(&tex->src[idx].src, &deref->def);
4508
4509 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4510 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4511 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4512 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4513 *
4514 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4515 * - Warhammer 40k: Dawn of War III
4516 */
4517 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4518 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4519 unsigned coord_components = nir_src_num_components(tex->src[c].src);
4520 if (coord_components < needed_components) {
4521 nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4522 nir_src_rewrite(&tex->src[c].src, def);
4523 tex->coord_components = needed_components;
4524 }
4525 return true;
4526 }
4527 if (in->type != nir_instr_type_intrinsic)
4528 return false;
4529 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4530
4531 nir_intrinsic_op op;
4532 #define OP_SWAP(OP) \
4533 case nir_intrinsic_bindless_image_##OP: \
4534 op = nir_intrinsic_image_deref_##OP; \
4535 break;
4536
4537
4538 /* convert bindless intrinsics to deref intrinsics */
4539 switch (instr->intrinsic) {
4540 OP_SWAP(atomic)
4541 OP_SWAP(atomic_swap)
4542 OP_SWAP(format)
4543 OP_SWAP(load)
4544 OP_SWAP(order)
4545 OP_SWAP(samples)
4546 OP_SWAP(size)
4547 OP_SWAP(store)
4548 default:
4549 return false;
4550 }
4551
4552 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4553 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4554 if (!var)
4555 var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4556 instr->intrinsic = op;
4557 b->cursor = nir_before_instr(in);
4558 nir_deref_instr *deref = nir_build_deref_var(b, var);
4559 if (glsl_type_is_array(var->type))
4560 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4561 nir_src_rewrite(&instr->src[0], &deref->def);
4562 return true;
4563 }
4564
4565 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4566 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4567 {
4568 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4569 return false;
4570 nir_fixup_deref_modes(shader);
4571 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4572 optimize_nir(shader, NULL, true);
4573 return true;
4574 }
4575
4576 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4577 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4578 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4579 void *data)
4580 {
4581 bool is_load = false;
4582 bool is_input = false;
4583 bool is_interp = false;
4584 if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4585 return false;
4586
4587 nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4588 if (var->data.bindless)
4589 return false;
4590 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4591 return false;
4592 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4593 return false;
4594
4595 var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4596 var->data.bindless = 1;
4597 return true;
4598 }
4599
4600 static bool
lower_bindless_io(nir_shader * shader)4601 lower_bindless_io(nir_shader *shader)
4602 {
4603 return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4604 nir_metadata_dominance, NULL);
4605 }
4606
4607 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4608 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4609 {
4610 if (stage == MESA_SHADER_NONE) {
4611 unreachable("not supported");
4612 } else {
4613 unsigned base = stage;
4614 /* clamp compute bindings for better driver efficiency */
4615 if (gl_shader_stage_is_compute(stage))
4616 base = 0;
4617 switch (type) {
4618 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4619 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4620 return base * 2 + !!index;
4621
4622 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4623 assert(stage == MESA_SHADER_KERNEL);
4624 FALLTHROUGH;
4625 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4626 if (stage == MESA_SHADER_KERNEL) {
4627 assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4628 return index + PIPE_MAX_SAMPLERS;
4629 }
4630 FALLTHROUGH;
4631 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4632 assert(index < PIPE_MAX_SAMPLERS);
4633 assert(stage != MESA_SHADER_KERNEL);
4634 return (base * PIPE_MAX_SAMPLERS) + index;
4635
4636 case VK_DESCRIPTOR_TYPE_SAMPLER:
4637 assert(index < PIPE_MAX_SAMPLERS);
4638 assert(stage == MESA_SHADER_KERNEL);
4639 return index;
4640
4641 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4642 return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4643
4644 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4645 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4646 assert(index < ZINK_MAX_SHADER_IMAGES);
4647 if (stage == MESA_SHADER_KERNEL)
4648 return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4649 return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4650
4651 default:
4652 unreachable("unexpected type");
4653 }
4654 }
4655 }
4656
4657 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4658 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4659 {
4660 if (glsl_type_is_struct(type)) {
4661 for (unsigned i = 0; i < glsl_get_length(type); i++)
4662 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4663 return;
4664 }
4665
4666 /* just a random scalar in a struct */
4667 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4668 return;
4669
4670 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4671 unsigned binding;
4672 switch (vktype) {
4673 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4674 binding = 0;
4675 break;
4676 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4677 binding = 1;
4678 break;
4679 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4680 binding = 2;
4681 break;
4682 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4683 binding = 3;
4684 break;
4685 default:
4686 unreachable("unknown");
4687 }
4688 if (!bindless->bindless[binding]) {
4689 bindless->bindless[binding] = nir_variable_clone(var, nir);
4690 bindless->bindless[binding]->data.bindless = 0;
4691 bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4692 bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4693 bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4694 if (!bindless->bindless[binding]->data.image.format)
4695 bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4696 nir_shader_add_variable(nir, bindless->bindless[binding]);
4697 } else {
4698 assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4699 }
4700 var->data.mode = nir_var_shader_temp;
4701 }
4702
4703 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4704 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4705 {
4706 struct zink_screen *screen = data;
4707 if (instr->type != nir_instr_type_tex)
4708 return false;
4709 nir_tex_instr *tex = nir_instr_as_tex(instr);
4710 if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4711 return false;
4712 if (tex->is_sparse && screen->need_2D_sparse) {
4713 /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4714 mesa_loge("unhandled/unsupported 1D sparse texture!");
4715 abort();
4716 }
4717 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4718 b->cursor = nir_before_instr(instr);
4719 tex->coord_components++;
4720 unsigned srcs[] = {
4721 nir_tex_src_coord,
4722 nir_tex_src_offset,
4723 nir_tex_src_ddx,
4724 nir_tex_src_ddy,
4725 };
4726 for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4727 unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4728 if (c == -1)
4729 continue;
4730 if (tex->src[c].src.ssa->num_components == tex->coord_components)
4731 continue;
4732 nir_def *def;
4733 nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4734 if (tex->src[c].src.ssa->num_components == 1)
4735 def = nir_vec2(b, tex->src[c].src.ssa, zero);
4736 else
4737 def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4738 nir_src_rewrite(&tex->src[c].src, def);
4739 }
4740 b->cursor = nir_after_instr(instr);
4741 unsigned needed_components = nir_tex_instr_dest_size(tex);
4742 unsigned num_components = tex->def.num_components;
4743 if (needed_components > num_components) {
4744 tex->def.num_components = needed_components;
4745 assert(num_components < 3);
4746 /* take either xz or just x since this is promoted to 2D from 1D */
4747 uint32_t mask = num_components == 2 ? (1|4) : 1;
4748 nir_def *dst = nir_channels(b, &tex->def, mask);
4749 nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4750 }
4751 return true;
4752 }
4753
4754 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4755 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4756 {
4757 bool found = false;
4758 nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4759 const struct glsl_type *type = glsl_without_array(var->type);
4760 unsigned length = glsl_get_length(var->type);
4761 if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4762 continue;
4763 const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4764 var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4765
4766 found = true;
4767 }
4768 if (found) {
4769 nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4770 nir_fixup_deref_types(shader);
4771 }
4772 return found;
4773 }
4774
4775 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4776 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4777 {
4778 nir_foreach_function_impl(impl, shader) {
4779 nir_foreach_block_safe(block, impl) {
4780 nir_foreach_instr_safe(instr, block) {
4781 if (instr->type == nir_instr_type_tex) {
4782 nir_tex_instr *tex = nir_instr_as_tex(instr);
4783 zs->sinfo.have_sparse |= tex->is_sparse;
4784 }
4785 if (instr->type != nir_instr_type_intrinsic)
4786 continue;
4787 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4788 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4789 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4790 intr->intrinsic == nir_intrinsic_image_deref_store ||
4791 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4792 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4793 intr->intrinsic == nir_intrinsic_image_deref_size ||
4794 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4795 intr->intrinsic == nir_intrinsic_image_deref_format ||
4796 intr->intrinsic == nir_intrinsic_image_deref_order) {
4797
4798 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4799
4800 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4801 const unsigned size =
4802 glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4803 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4804 var->data.binding + (MAX2(size, 1) - 1));
4805 }
4806 if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4807 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4808 zs->sinfo.have_sparse = true;
4809
4810 bool is_load = false;
4811 bool is_input = false;
4812 bool is_interp = false;
4813 if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) {
4814 nir_io_semantics s = nir_intrinsic_io_semantics(intr);
4815 if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) {
4816 if (is_input)
4817 zs->arrayed_inputs |= BITFIELD64_BIT(s.location);
4818 else
4819 zs->arrayed_outputs |= BITFIELD64_BIT(s.location);
4820 }
4821 /* TODO: delete this once #10826 is fixed */
4822 if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) {
4823 if (is_clipcull_dist(s.location)) {
4824 unsigned frac = nir_intrinsic_component(intr) + 1;
4825 if (s.location < VARYING_SLOT_CULL_DIST0) {
4826 if (s.location == VARYING_SLOT_CLIP_DIST1)
4827 frac += 4;
4828 shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots);
4829 } else {
4830 if (s.location == VARYING_SLOT_CULL_DIST1)
4831 frac += 4;
4832 shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots);
4833 }
4834 }
4835 }
4836 }
4837
4838 static bool warned = false;
4839 if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4840 switch (intr->intrinsic) {
4841 case nir_intrinsic_image_deref_atomic: {
4842 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4843 if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4844 util_format_is_float(var->data.image.format))
4845 fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4846 break;
4847 }
4848 default:
4849 break;
4850 }
4851 }
4852 }
4853 }
4854 }
4855 }
4856
4857 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data,bool pre)4858 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data, bool pre)
4859 {
4860 if (in->type != nir_instr_type_tex)
4861 return false;
4862 nir_tex_instr *tex = nir_instr_as_tex(in);
4863 if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4864 return false;
4865 int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4866 nir_variable *var = NULL;
4867 if (handle != -1) {
4868 if (pre)
4869 return false;
4870 var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4871 } else {
4872 var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
4873 }
4874 if (pre) {
4875 flag_shadow_tex_instr(b, tex, var, data);
4876 return false;
4877 }
4878 return !!rewrite_tex_dest(b, tex, var, data);
4879 }
4880
4881 static bool
match_tex_dests_instr_pre(nir_builder * b,nir_instr * in,void * data)4882 match_tex_dests_instr_pre(nir_builder *b, nir_instr *in, void *data)
4883 {
4884 return match_tex_dests_instr(b, in, data, true);
4885 }
4886
4887 static bool
match_tex_dests_instr_post(nir_builder * b,nir_instr * in,void * data)4888 match_tex_dests_instr_post(nir_builder *b, nir_instr *in, void *data)
4889 {
4890 return match_tex_dests_instr(b, in, data, false);
4891 }
4892
4893 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs,bool pre_mangle)4894 match_tex_dests(nir_shader *shader, struct zink_shader *zs, bool pre_mangle)
4895 {
4896 return nir_shader_instructions_pass(shader, pre_mangle ? match_tex_dests_instr_pre : match_tex_dests_instr_post, nir_metadata_dominance, zs);
4897 }
4898
4899 static bool
split_bitfields_instr(nir_builder * b,nir_alu_instr * alu,void * data)4900 split_bitfields_instr(nir_builder *b, nir_alu_instr *alu, void *data)
4901 {
4902 switch (alu->op) {
4903 case nir_op_ubitfield_extract:
4904 case nir_op_ibitfield_extract:
4905 case nir_op_bitfield_insert:
4906 break;
4907 default:
4908 return false;
4909 }
4910 unsigned num_components = alu->def.num_components;
4911 if (num_components == 1)
4912 return false;
4913 b->cursor = nir_before_instr(&alu->instr);
4914 nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4915 for (unsigned i = 0; i < num_components; i++) {
4916 if (alu->op == nir_op_bitfield_insert)
4917 dests[i] = nir_bitfield_insert(b,
4918 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4919 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4920 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4921 nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4922 else if (alu->op == nir_op_ubitfield_extract)
4923 dests[i] = nir_ubitfield_extract(b,
4924 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4925 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4926 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4927 else
4928 dests[i] = nir_ibitfield_extract(b,
4929 nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4930 nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4931 nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4932 }
4933 nir_def *dest = nir_vec(b, dests, num_components);
4934 nir_def_rewrite_uses_after(&alu->def, dest, &alu->instr);
4935 nir_instr_remove(&alu->instr);
4936 return true;
4937 }
4938
4939
4940 static bool
split_bitfields(nir_shader * shader)4941 split_bitfields(nir_shader *shader)
4942 {
4943 return nir_shader_alu_pass(shader, split_bitfields_instr,
4944 nir_metadata_dominance, NULL);
4945 }
4946
4947 static bool
strip_tex_ms_instr(nir_builder * b,nir_instr * in,void * data)4948 strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data)
4949 {
4950 if (in->type != nir_instr_type_intrinsic)
4951 return false;
4952 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in);
4953 switch (intr->intrinsic) {
4954 case nir_intrinsic_image_deref_samples:
4955 b->cursor = nir_before_instr(in);
4956 nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in);
4957 nir_instr_remove(in);
4958 break;
4959 case nir_intrinsic_image_deref_store:
4960 case nir_intrinsic_image_deref_load:
4961 case nir_intrinsic_image_deref_atomic:
4962 case nir_intrinsic_image_deref_atomic_swap:
4963 break;
4964 default:
4965 return false;
4966 }
4967 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
4968 if (dim != GLSL_SAMPLER_DIM_MS)
4969 return false;
4970
4971 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4972 nir_variable *var = nir_deref_instr_get_variable(deref);
4973 nir_deref_instr *parent = nir_deref_instr_parent(deref);
4974 if (parent) {
4975 parent->type = var->type;
4976 deref->type = glsl_without_array(var->type);
4977 } else {
4978 deref->type = var->type;
4979 }
4980 nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
4981 return true;
4982 }
4983
4984
4985 static bool
strip_tex_ms(nir_shader * shader)4986 strip_tex_ms(nir_shader *shader)
4987 {
4988 bool progress = false;
4989 nir_foreach_image_variable(var, shader) {
4990 const struct glsl_type *bare_type = glsl_without_array(var->type);
4991 if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS)
4992 continue;
4993 unsigned array_size = 0;
4994 if (glsl_type_is_array(var->type))
4995 array_size = glsl_array_size(var->type);
4996
4997 const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type));
4998 if (array_size)
4999 new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type));
5000 var->type = new_type;
5001 progress = true;
5002 }
5003 if (!progress)
5004 return false;
5005 return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL);
5006 }
5007
5008 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)5009 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
5010 {
5011 nir_foreach_function_impl(impl, nir) {
5012 nir_foreach_block(block, impl) {
5013 nir_foreach_instr_safe(instr, block) {
5014 if (instr->type != nir_instr_type_deref)
5015 continue;
5016 nir_deref_instr *deref = nir_instr_as_deref(instr);
5017 nir_variable *img = nir_deref_instr_get_variable(deref);
5018 if (img != var)
5019 continue;
5020 if (glsl_type_is_array(var->type)) {
5021 if (deref->deref_type == nir_deref_type_array)
5022 deref->type = glsl_without_array(var->type);
5023 else
5024 deref->type = var->type;
5025 } else {
5026 deref->type = var->type;
5027 }
5028 }
5029 }
5030 }
5031 }
5032
5033 static void
type_image(nir_shader * nir,nir_variable * var)5034 type_image(nir_shader *nir, nir_variable *var)
5035 {
5036 nir_foreach_function_impl(impl, nir) {
5037 nir_foreach_block(block, impl) {
5038 nir_foreach_instr_safe(instr, block) {
5039 if (instr->type != nir_instr_type_intrinsic)
5040 continue;
5041 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5042 if (intr->intrinsic == nir_intrinsic_image_deref_load ||
5043 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
5044 intr->intrinsic == nir_intrinsic_image_deref_store ||
5045 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
5046 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
5047 intr->intrinsic == nir_intrinsic_image_deref_samples ||
5048 intr->intrinsic == nir_intrinsic_image_deref_format ||
5049 intr->intrinsic == nir_intrinsic_image_deref_order) {
5050 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5051 nir_variable *img = nir_deref_instr_get_variable(deref);
5052 if (img != var)
5053 continue;
5054
5055 nir_alu_type alu_type;
5056 if (nir_intrinsic_has_src_type(intr))
5057 alu_type = nir_intrinsic_src_type(intr);
5058 else
5059 alu_type = nir_intrinsic_dest_type(intr);
5060
5061 const struct glsl_type *type = glsl_without_array(var->type);
5062 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5063 assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
5064 continue;
5065 }
5066 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5067 if (glsl_type_is_array(var->type))
5068 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5069 var->type = img_type;
5070 rewrite_cl_derefs(nir, var);
5071 return;
5072 }
5073 }
5074 }
5075 }
5076 nir_foreach_function_impl(impl, nir) {
5077 nir_foreach_block(block, impl) {
5078 nir_foreach_instr_safe(instr, block) {
5079 if (instr->type != nir_instr_type_intrinsic)
5080 continue;
5081 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5082 if (intr->intrinsic != nir_intrinsic_image_deref_size)
5083 continue;
5084 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5085 nir_variable *img = nir_deref_instr_get_variable(deref);
5086 if (img != var)
5087 continue;
5088 nir_alu_type alu_type = nir_type_uint32;
5089 const struct glsl_type *type = glsl_without_array(var->type);
5090 if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5091 continue;
5092 }
5093 const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5094 if (glsl_type_is_array(var->type))
5095 img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5096 var->type = img_type;
5097 rewrite_cl_derefs(nir, var);
5098 return;
5099 }
5100 }
5101 }
5102 var->data.mode = nir_var_shader_temp;
5103 }
5104
5105 static bool
type_sampler_vars(nir_shader * nir)5106 type_sampler_vars(nir_shader *nir)
5107 {
5108 bool progress = false;
5109 nir_foreach_function_impl(impl, nir) {
5110 nir_foreach_block(block, impl) {
5111 nir_foreach_instr(instr, block) {
5112 if (instr->type != nir_instr_type_tex)
5113 continue;
5114 nir_tex_instr *tex = nir_instr_as_tex(instr);
5115 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
5116 assert(var);
5117 if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
5118 nir_tex_instr_is_query(tex))
5119 continue;
5120 const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
5121 unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
5122 if (size > 1)
5123 img_type = glsl_array_type(img_type, size, 0);
5124 var->type = img_type;
5125 progress = true;
5126 }
5127 }
5128 }
5129 return progress;
5130 }
5131
5132 static bool
type_images(nir_shader * nir)5133 type_images(nir_shader *nir)
5134 {
5135 bool progress = false;
5136 progress |= type_sampler_vars(nir);
5137 nir_foreach_variable_with_modes(var, nir, nir_var_image) {
5138 type_image(nir, var);
5139 progress = true;
5140 }
5141 if (progress) {
5142 nir_fixup_deref_types(nir);
5143 nir_fixup_deref_modes(nir);
5144 }
5145 return progress;
5146 }
5147
5148 /* attempt to assign io for separate shaders */
5149 static bool
fixup_io_locations(nir_shader * nir)5150 fixup_io_locations(nir_shader *nir)
5151 {
5152 nir_variable_mode modes;
5153 if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
5154 modes = nir_var_shader_in | nir_var_shader_out;
5155 else
5156 modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
5157 u_foreach_bit(mode, modes) {
5158 nir_variable_mode m = BITFIELD_BIT(mode);
5159 if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
5160 (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
5161 /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
5162 * - i/o interface blocks don't need to match
5163 * - any location can be present or not
5164 * - it just has to work
5165 *
5166 * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
5167 * since it's a builtin and yolo it with all the other legacy crap
5168 */
5169 nir_foreach_variable_with_modes(var, nir, m) {
5170 if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
5171 continue;
5172 if (var->data.location == VARYING_SLOT_VAR0)
5173 var->data.driver_location = 0;
5174 else if (var->data.patch)
5175 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5176 else
5177 var->data.driver_location = var->data.location;
5178 }
5179 continue;
5180 }
5181 /* i/o interface blocks are required to be EXACT matches between stages:
5182 * iterate over all locations and set locations incrementally
5183 */
5184 unsigned slot = 0;
5185 for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) {
5186 if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
5187 continue;
5188 bool found = false;
5189 unsigned size = 0;
5190 nir_foreach_variable_with_modes(var, nir, m) {
5191 if (var->data.location != i)
5192 continue;
5193 /* only add slots for non-component vars or first-time component vars */
5194 if (!var->data.location_frac || !size) {
5195 /* ensure variable is given enough slots */
5196 if (nir_is_arrayed_io(var, nir->info.stage))
5197 size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
5198 else
5199 size += glsl_count_vec4_slots(var->type, false, false);
5200 }
5201 if (var->data.patch)
5202 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5203 else
5204 var->data.driver_location = slot;
5205 found = true;
5206 }
5207 slot += size;
5208 if (found) {
5209 /* ensure the consumed slots aren't double iterated */
5210 i += size - 1;
5211 } else {
5212 /* locations used between stages are not required to be contiguous */
5213 if (i >= VARYING_SLOT_VAR0)
5214 slot++;
5215 }
5216 }
5217 }
5218 return true;
5219 }
5220
5221 static uint64_t
zink_flat_flags(struct nir_shader * shader)5222 zink_flat_flags(struct nir_shader *shader)
5223 {
5224 uint64_t flat_flags = 0;
5225 nir_foreach_shader_in_variable(var, shader) {
5226 if (var->data.interpolation == INTERP_MODE_FLAT)
5227 flat_flags |= BITFIELD64_BIT(var->data.location);
5228 }
5229
5230 return flat_flags;
5231 }
5232
5233 struct rework_io_state {
5234 /* these are search criteria */
5235 bool indirect_only;
5236 unsigned location;
5237 nir_variable_mode mode;
5238 gl_shader_stage stage;
5239 nir_shader *nir;
5240 const char *name;
5241
5242 /* these are found by scanning */
5243 bool arrayed_io;
5244 bool medium_precision;
5245 bool fb_fetch_output;
5246 bool dual_source_blend_index;
5247 uint32_t component_mask;
5248 uint32_t ignored_component_mask;
5249 unsigned array_size;
5250 unsigned bit_size;
5251 unsigned base;
5252 nir_alu_type type;
5253 /* must be last */
5254 char *newname;
5255 };
5256
5257 /* match an existing variable against the rework state */
5258 static nir_variable *
find_rework_var(nir_shader * nir,struct rework_io_state * ris)5259 find_rework_var(nir_shader *nir, struct rework_io_state *ris)
5260 {
5261 nir_foreach_variable_with_modes(var, nir, ris->mode) {
5262 const struct glsl_type *type = var->type;
5263 if (nir_is_arrayed_io(var, nir->info.stage))
5264 type = glsl_get_array_element(type);
5265 if (var->data.fb_fetch_output != ris->fb_fetch_output)
5266 continue;
5267 if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
5268 continue;
5269 unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5270 if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
5271 continue;
5272 unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5273 assert(!glsl_type_contains_64bit(type));
5274 uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
5275 if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
5276 return var;
5277 }
5278 return NULL;
5279 }
5280
5281 static void
update_io_var_name(struct rework_io_state * ris,const char * name)5282 update_io_var_name(struct rework_io_state *ris, const char *name)
5283 {
5284 if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
5285 return;
5286 if (!name)
5287 return;
5288 if (ris->name && !strcmp(ris->name, name))
5289 return;
5290 if (ris->newname && !strcmp(ris->newname, name))
5291 return;
5292 if (ris->newname) {
5293 ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
5294 } else if (ris->name) {
5295 ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
5296 } else {
5297 ris->newname = ralloc_strdup(ris->nir, name);
5298 }
5299 }
5300
5301 /* check/update tracking state for variable info */
5302 static void
update_io_var_state(nir_intrinsic_instr * intr,struct rework_io_state * ris)5303 update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
5304 {
5305 bool is_load = false;
5306 bool is_input = false;
5307 bool is_interp = false;
5308 filter_io_instr(intr, &is_load, &is_input, &is_interp);
5309 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5310 unsigned frac = nir_intrinsic_component(intr);
5311 /* the mask of components for the instruction */
5312 uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
5313
5314 /* always check for existing variables first */
5315 struct rework_io_state test = {
5316 .location = ris->location,
5317 .mode = ris->mode,
5318 .stage = ris->stage,
5319 .arrayed_io = io_instr_is_arrayed(intr),
5320 .medium_precision = sem.medium_precision,
5321 .fb_fetch_output = sem.fb_fetch_output,
5322 .dual_source_blend_index = sem.dual_source_blend_index,
5323 .component_mask = cmask,
5324 .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
5325 };
5326 if (find_rework_var(ris->nir, &test))
5327 return;
5328
5329 /* filter ignored components to scan later:
5330 * - ignore no-overlapping-components case
5331 * - always match fbfetch and dual src blend
5332 */
5333 if (ris->component_mask &&
5334 (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
5335 ris->ignored_component_mask |= cmask;
5336 return;
5337 }
5338
5339 assert(!ris->indirect_only || sem.num_slots > 1);
5340 if (sem.num_slots > 1)
5341 ris->array_size = MAX2(ris->array_size, sem.num_slots);
5342
5343 assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
5344 ris->arrayed_io = io_instr_is_arrayed(intr);
5345
5346 ris->component_mask |= cmask;
5347
5348 unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5349 assert(!ris->bit_size || ris->bit_size == bit_size);
5350 ris->bit_size = bit_size;
5351
5352 nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5353 if (ris->type) {
5354 /* in the case of clashing types, this heuristic guarantees some semblance of a match */
5355 if (ris->type & nir_type_float || type & nir_type_float) {
5356 ris->type = nir_type_float | bit_size;
5357 } else if (ris->type & nir_type_int || type & nir_type_int) {
5358 ris->type = nir_type_int | bit_size;
5359 } else if (ris->type & nir_type_uint || type & nir_type_uint) {
5360 ris->type = nir_type_uint | bit_size;
5361 } else {
5362 assert(bit_size == 1);
5363 ris->type = nir_type_bool;
5364 }
5365 } else {
5366 ris->type = type;
5367 }
5368
5369 update_io_var_name(ris, intr->name);
5370
5371 ris->medium_precision |= sem.medium_precision;
5372 ris->fb_fetch_output |= sem.fb_fetch_output;
5373 ris->dual_source_blend_index |= sem.dual_source_blend_index;
5374 if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5375 ris->base = nir_intrinsic_base(intr);
5376 }
5377
5378 /* instruction-level scanning for variable data */
5379 static bool
scan_io_var_usage(nir_builder * b,nir_intrinsic_instr * intr,void * data)5380 scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5381 {
5382 struct rework_io_state *ris = data;
5383 bool is_load = false;
5384 bool is_input = false;
5385 bool is_interp = false;
5386 /* mode-based filtering */
5387 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5388 return false;
5389 if (ris->mode == nir_var_shader_in) {
5390 if (!is_input)
5391 return false;
5392 } else {
5393 if (is_input)
5394 return false;
5395 }
5396 /* location-based filtering */
5397 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5398 if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
5399 return false;
5400
5401 /* only scan indirect i/o when indirect_only is set */
5402 nir_src *src_offset = nir_get_io_offset_src(intr);
5403 if (!nir_src_is_const(*src_offset)) {
5404 if (!ris->indirect_only)
5405 return false;
5406 update_io_var_state(intr, ris);
5407 return false;
5408 }
5409
5410 /* don't scan direct i/o when indirect_only is set */
5411 if (ris->indirect_only)
5412 return false;
5413
5414 update_io_var_state(intr, ris);
5415 return false;
5416 }
5417
5418 /* scan a given i/o slot for state info */
5419 static struct rework_io_state
scan_io_var_slot(nir_shader * nir,nir_variable_mode mode,unsigned location,bool scan_indirects)5420 scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
5421 {
5422 struct rework_io_state ris = {
5423 .location = location,
5424 .mode = mode,
5425 .stage = nir->info.stage,
5426 .nir = nir,
5427 };
5428
5429 struct rework_io_state test;
5430 do {
5431 update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
5432 test = ris;
5433 /* always run indirect scan first to detect potential overlaps */
5434 if (scan_indirects) {
5435 ris.indirect_only = true;
5436 nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5437 }
5438 ris.indirect_only = false;
5439 nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5440 /* keep scanning until no changes found */
5441 } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
5442 return ris;
5443 }
5444
5445 /* create a variable using explicit/scan info */
5446 static void
create_io_var(nir_shader * nir,struct rework_io_state * ris)5447 create_io_var(nir_shader *nir, struct rework_io_state *ris)
5448 {
5449 char name[1024];
5450 assert(ris->component_mask);
5451 if (ris->newname || ris->name) {
5452 snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
5453 /* always use builtin name where possible */
5454 } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
5455 snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
5456 } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
5457 snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
5458 } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
5459 snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
5460 } else {
5461 int c = ffs(ris->component_mask) - 1;
5462 if (c)
5463 snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
5464 else
5465 snprintf(name, sizeof(name), "slot_%u", ris->location);
5466 }
5467 /* calculate vec/array type */
5468 int frac = ffs(ris->component_mask) - 1;
5469 int num_components = util_last_bit(ris->component_mask) - frac;
5470 assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
5471 const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
5472 if (ris->array_size)
5473 vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
5474 if (ris->arrayed_io) {
5475 /* tess size may be unknown with generated tcs */
5476 unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
5477 nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */;
5478 vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
5479 }
5480 nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
5481 var->data.location_frac = frac;
5482 var->data.location = ris->location;
5483 /* gallium vertex inputs use intrinsic 'base' indexing */
5484 if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5485 var->data.driver_location = ris->base;
5486 var->data.patch = ris->location >= VARYING_SLOT_PATCH0 ||
5487 ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5488 (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER));
5489 /* set flat by default: add_derefs will fill this in later after more shader passes */
5490 if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
5491 var->data.interpolation = INTERP_MODE_FLAT;
5492 var->data.fb_fetch_output = ris->fb_fetch_output;
5493 var->data.index = ris->dual_source_blend_index;
5494 var->data.precision = ris->medium_precision;
5495 /* only clip/cull dist and tess levels are compact */
5496 if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
5497 var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
5498 }
5499
5500 /* loop the i/o mask and generate variables for specified locations */
5501 static void
loop_io_var_mask(nir_shader * nir,nir_variable_mode mode,bool indirect,bool patch,uint64_t mask)5502 loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
5503 {
5504 ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
5505 u_foreach_bit64(slot, mask) {
5506 if (patch)
5507 slot += VARYING_SLOT_PATCH0;
5508
5509 /* this should've been handled explicitly */
5510 assert(is_vertex_input || !is_clipcull_dist(slot));
5511
5512 unsigned remaining = 0;
5513 do {
5514 /* scan the slot for usage */
5515 struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect);
5516 /* one of these must be true or things have gone very wrong */
5517 assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
5518 /* release builds only */
5519 if (!ris.component_mask)
5520 break;
5521
5522 /* whatever reaches this point is either enough info to create a variable or an existing variable */
5523 if (!find_rework_var(nir, &ris))
5524 create_io_var(nir, &ris);
5525 /* scanning may detect multiple potential variables per location at component offsets: process again */
5526 remaining = ris.ignored_component_mask;
5527 } while (remaining);
5528 }
5529 }
5530
5531 /* for a given mode, generate variables */
5532 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode,struct zink_shader * zs)5533 rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
5534 {
5535 assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5536 assert(util_bitcount(mode) == 1);
5537 bool found = false;
5538 /* if no i/o, skip */
5539 if (mode == nir_var_shader_out)
5540 found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
5541 else
5542 found = nir->info.inputs_read || nir->info.patch_inputs_read;
5543 if (!found)
5544 return;
5545
5546 /* use local copies to enable incremental processing */
5547 uint64_t inputs_read = nir->info.inputs_read;
5548 uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
5549 uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
5550 uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly;
5551
5552 /* fragment outputs are special: handle separately */
5553 if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5554 assert(!outputs_accessed_indirectly);
5555 u_foreach_bit64(slot, outputs_accessed) {
5556 struct rework_io_state ris = {
5557 .location = slot,
5558 .mode = mode,
5559 .stage = nir->info.stage,
5560 };
5561 /* explicitly handle builtins */
5562 switch (slot) {
5563 case FRAG_RESULT_DEPTH:
5564 case FRAG_RESULT_STENCIL:
5565 case FRAG_RESULT_SAMPLE_MASK:
5566 ris.bit_size = 32;
5567 ris.component_mask = 0x1;
5568 ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
5569 create_io_var(nir, &ris);
5570 outputs_accessed &= ~BITFIELD64_BIT(slot);
5571 break;
5572 default:
5573 break;
5574 }
5575 }
5576 /* the rest of the outputs can be generated normally */
5577 loop_io_var_mask(nir, mode, false, false, outputs_accessed);
5578 return;
5579 }
5580
5581 /* vertex inputs are special: handle separately */
5582 if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5583 assert(!inputs_read_indirectly);
5584 u_foreach_bit64(slot, inputs_read) {
5585 /* explicitly handle builtins */
5586 if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
5587 continue;
5588
5589 uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
5590 struct rework_io_state ris = {
5591 .location = slot,
5592 .mode = mode,
5593 .stage = nir->info.stage,
5594 .bit_size = 32,
5595 .component_mask = component_mask,
5596 .type = nir_type_float32,
5597 .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
5598 };
5599 create_io_var(nir, &ris);
5600 inputs_read &= ~BITFIELD64_BIT(slot);
5601 }
5602 /* the rest of the inputs can be generated normally */
5603 loop_io_var_mask(nir, mode, false, false, inputs_read);
5604 return;
5605 }
5606
5607 /* these are the masks to process based on the mode: nothing "special" as above */
5608 uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
5609 uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
5610 u_foreach_bit64(slot, mask) {
5611 struct rework_io_state ris = {
5612 .location = slot,
5613 .mode = mode,
5614 .stage = nir->info.stage,
5615 .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
5616 };
5617 /* explicitly handle builtins */
5618 unsigned max_components = 0;
5619 switch (slot) {
5620 case VARYING_SLOT_FOGC:
5621 /* use intr components */
5622 break;
5623 case VARYING_SLOT_POS:
5624 case VARYING_SLOT_CLIP_VERTEX:
5625 case VARYING_SLOT_PNTC:
5626 case VARYING_SLOT_BOUNDING_BOX0:
5627 case VARYING_SLOT_BOUNDING_BOX1:
5628 max_components = 4;
5629 ris.type = nir_type_float32;
5630 break;
5631 case VARYING_SLOT_CLIP_DIST0:
5632 max_components = nir->info.clip_distance_array_size;
5633 assert(max_components);
5634 ris.type = nir_type_float32;
5635 break;
5636 case VARYING_SLOT_CULL_DIST0:
5637 max_components = nir->info.cull_distance_array_size;
5638 assert(max_components);
5639 ris.type = nir_type_float32;
5640 break;
5641 case VARYING_SLOT_CLIP_DIST1:
5642 case VARYING_SLOT_CULL_DIST1:
5643 mask &= ~BITFIELD64_BIT(slot);
5644 indirect_mask &= ~BITFIELD64_BIT(slot);
5645 continue;
5646 case VARYING_SLOT_TESS_LEVEL_OUTER:
5647 max_components = 4;
5648 ris.type = nir_type_float32;
5649 break;
5650 case VARYING_SLOT_TESS_LEVEL_INNER:
5651 max_components = 2;
5652 ris.type = nir_type_float32;
5653 break;
5654 case VARYING_SLOT_PRIMITIVE_ID:
5655 case VARYING_SLOT_LAYER:
5656 case VARYING_SLOT_VIEWPORT:
5657 case VARYING_SLOT_FACE:
5658 case VARYING_SLOT_VIEW_INDEX:
5659 case VARYING_SLOT_VIEWPORT_MASK:
5660 ris.type = nir_type_int32;
5661 max_components = 1;
5662 break;
5663 case VARYING_SLOT_PSIZ:
5664 max_components = 1;
5665 ris.type = nir_type_float32;
5666 break;
5667 default:
5668 break;
5669 }
5670 if (!max_components)
5671 continue;
5672 switch (slot) {
5673 case VARYING_SLOT_CLIP_DIST0:
5674 case VARYING_SLOT_CLIP_DIST1:
5675 case VARYING_SLOT_CULL_DIST0:
5676 case VARYING_SLOT_CULL_DIST1:
5677 case VARYING_SLOT_TESS_LEVEL_OUTER:
5678 case VARYING_SLOT_TESS_LEVEL_INNER:
5679 /* compact arrays */
5680 ris.component_mask = 0x1;
5681 ris.array_size = max_components;
5682 break;
5683 default:
5684 ris.component_mask = BITFIELD_MASK(max_components);
5685 break;
5686 }
5687 ris.bit_size = 32;
5688 create_io_var(nir, &ris);
5689 mask &= ~BITFIELD64_BIT(slot);
5690 /* eliminate clip/cull distance scanning early */
5691 indirect_mask &= ~BITFIELD64_BIT(slot);
5692 }
5693
5694 /* patch i/o */
5695 if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
5696 (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
5697 uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
5698 uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly;
5699 uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
5700
5701 loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
5702 loop_io_var_mask(nir, mode, false, true, patch_mask);
5703 }
5704
5705 /* regular i/o */
5706 loop_io_var_mask(nir, mode, true, false, indirect_mask);
5707 loop_io_var_mask(nir, mode, false, false, mask);
5708 }
5709
5710 static int
zink_type_size(const struct glsl_type * type,bool bindless)5711 zink_type_size(const struct glsl_type *type, bool bindless)
5712 {
5713 return glsl_count_attribute_slots(type, false);
5714 }
5715
5716 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5717 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5718 uint8_t bit_size, uint32_t align,
5719 uint32_t align_offset, bool offset_is_const,
5720 const void *cb_data)
5721 {
5722 align = nir_combined_align(align, align_offset);
5723
5724 assert(util_is_power_of_two_nonzero(align));
5725
5726 /* simply drop the bit_size for unaligned load/stores */
5727 if (align < (bit_size / 8)) {
5728 return (nir_mem_access_size_align){
5729 .num_components = MIN2(bytes / align, 4),
5730 .bit_size = align * 8,
5731 .align = align,
5732 };
5733 } else {
5734 return (nir_mem_access_size_align){
5735 .num_components = MIN2(bytes / (bit_size / 8), 4),
5736 .bit_size = bit_size,
5737 .align = bit_size / 8,
5738 };
5739 }
5740 }
5741
5742 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5743 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5744 uint8_t bit_size, uint32_t align,
5745 uint32_t align_offset, bool offset_is_const,
5746 const void *cb_data)
5747 {
5748 bit_size = *(const uint8_t *)cb_data;
5749 align = nir_combined_align(align, align_offset);
5750
5751 assert(util_is_power_of_two_nonzero(align));
5752
5753 return (nir_mem_access_size_align){
5754 .num_components = MIN2(bytes / (bit_size / 8), 4),
5755 .bit_size = bit_size,
5756 .align = bit_size / 8,
5757 };
5758 }
5759
5760 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5761 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5762 {
5763 uint8_t *bit_size = data;
5764 switch (instr->intrinsic) {
5765 case nir_intrinsic_load_scratch:
5766 *bit_size = MIN2(*bit_size, instr->def.bit_size);
5767 return false;
5768 case nir_intrinsic_store_scratch:
5769 *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5770 return false;
5771 default:
5772 return false;
5773 }
5774 }
5775
5776 static bool
alias_scratch_memory(nir_shader * nir)5777 alias_scratch_memory(nir_shader *nir)
5778 {
5779 uint8_t bit_size = 64;
5780
5781 nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5782 nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5783 .modes = nir_var_function_temp,
5784 .may_lower_unaligned_stores_to_atomics = true,
5785 .callback = mem_access_scratch_size_align_cb,
5786 .cb_data = &bit_size,
5787 };
5788 return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5789 }
5790
5791 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5792 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5793 {
5794 return 4;
5795 }
5796
5797 static unsigned
zink_lower_bit_size_cb(const nir_instr * instr,void * data)5798 zink_lower_bit_size_cb(const nir_instr *instr, void *data)
5799 {
5800 switch (instr->type) {
5801 case nir_instr_type_alu: {
5802 nir_alu_instr *alu = nir_instr_as_alu(instr);
5803 switch (alu->op) {
5804 case nir_op_bit_count:
5805 case nir_op_find_lsb:
5806 case nir_op_ifind_msb:
5807 case nir_op_ufind_msb:
5808 return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32;
5809 default:
5810 return 0;
5811 }
5812 }
5813 default:
5814 return 0;
5815 }
5816 }
5817
5818 static bool
fix_vertex_input_locations_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)5819 fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5820 {
5821 bool is_load = false;
5822 bool is_input = false;
5823 bool is_interp = false;
5824 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input)
5825 return false;
5826
5827 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5828 if (sem.location < VERT_ATTRIB_GENERIC0)
5829 return false;
5830 sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr);
5831 nir_intrinsic_set_io_semantics(intr, sem);
5832 return true;
5833 }
5834
5835 static bool
fix_vertex_input_locations(nir_shader * nir)5836 fix_vertex_input_locations(nir_shader *nir)
5837 {
5838 if (nir->info.stage != MESA_SHADER_VERTEX)
5839 return false;
5840
5841 return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
5842 }
5843
5844 struct trivial_revectorize_state {
5845 bool has_xfb;
5846 uint32_t component_mask;
5847 nir_intrinsic_instr *base;
5848 nir_intrinsic_instr *next_emit_vertex;
5849 nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
5850 struct set *deletions;
5851 };
5852
5853 /* always skip xfb; scalarized xfb is preferred */
5854 static bool
intr_has_xfb(nir_intrinsic_instr * intr)5855 intr_has_xfb(nir_intrinsic_instr *intr)
5856 {
5857 if (!nir_intrinsic_has_io_xfb(intr))
5858 return false;
5859 for (unsigned i = 0; i < 2; i++) {
5860 if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
5861 return true;
5862 }
5863 }
5864 return false;
5865 }
5866
5867 /* helper to avoid vectorizing i/o for different vertices */
5868 static nir_intrinsic_instr *
find_next_emit_vertex(nir_intrinsic_instr * intr)5869 find_next_emit_vertex(nir_intrinsic_instr *intr)
5870 {
5871 bool found = false;
5872 nir_foreach_instr_safe(instr, intr->instr.block) {
5873 if (instr->type == nir_instr_type_intrinsic) {
5874 nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5875 if (!found && test_intr != intr)
5876 continue;
5877 if (!found) {
5878 assert(intr == test_intr);
5879 found = true;
5880 continue;
5881 }
5882 if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5883 return test_intr;
5884 }
5885 }
5886 return NULL;
5887 }
5888
5889 /* scan for vectorizable instrs on a given location */
5890 static bool
trivial_revectorize_intr_scan(nir_shader * nir,nir_intrinsic_instr * intr,struct trivial_revectorize_state * state)5891 trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
5892 {
5893 nir_intrinsic_instr *base = state->base;
5894
5895 if (intr == base)
5896 return false;
5897
5898 if (intr->intrinsic != base->intrinsic)
5899 return false;
5900
5901 if (_mesa_set_search(state->deletions, intr))
5902 return false;
5903
5904 bool is_load = false;
5905 bool is_input = false;
5906 bool is_interp = false;
5907 filter_io_instr(intr, &is_load, &is_input, &is_interp);
5908
5909 nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
5910 nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
5911 nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
5912 nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5913 int c = nir_intrinsic_component(intr);
5914 /* already detected */
5915 if (state->component_mask & BITFIELD_BIT(c))
5916 return false;
5917 /* not a match */
5918 if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
5919 return false;
5920 /* only vectorize when all srcs match */
5921 for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
5922 if (!nir_srcs_equal(intr->src[i], base->src[i]))
5923 return false;
5924 }
5925 /* never match xfb */
5926 state->has_xfb |= intr_has_xfb(intr);
5927 if (state->has_xfb)
5928 return false;
5929 if (nir->info.stage == MESA_SHADER_GEOMETRY) {
5930 /* only match same vertex */
5931 if (state->next_emit_vertex != find_next_emit_vertex(intr))
5932 return false;
5933 }
5934 uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
5935 state->component_mask |= mask;
5936 u_foreach_bit(component, mask)
5937 state->merge[component] = intr;
5938
5939 return true;
5940 }
5941
5942 static bool
trivial_revectorize_scan(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)5943 trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
5944 {
5945 bool is_load = false;
5946 bool is_input = false;
5947 bool is_interp = false;
5948 if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5949 return false;
5950 if (intr->num_components != 1)
5951 return false;
5952 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5953 if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
5954 /* always ignore compact arrays */
5955 switch (sem.location) {
5956 case VARYING_SLOT_CLIP_DIST0:
5957 case VARYING_SLOT_CLIP_DIST1:
5958 case VARYING_SLOT_CULL_DIST0:
5959 case VARYING_SLOT_CULL_DIST1:
5960 case VARYING_SLOT_TESS_LEVEL_INNER:
5961 case VARYING_SLOT_TESS_LEVEL_OUTER:
5962 return false;
5963 default: break;
5964 }
5965 }
5966 /* always ignore to-be-deleted instrs */
5967 if (_mesa_set_search(data, intr))
5968 return false;
5969
5970 /* never vectorize xfb */
5971 if (intr_has_xfb(intr))
5972 return false;
5973
5974 int ic = nir_intrinsic_component(intr);
5975 uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
5976 /* already vectorized */
5977 if (util_bitcount(mask) == 4)
5978 return false;
5979 struct trivial_revectorize_state state = {
5980 .component_mask = mask,
5981 .base = intr,
5982 /* avoid clobbering i/o for different vertices */
5983 .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
5984 .deletions = data,
5985 };
5986 u_foreach_bit(bit, mask)
5987 state.merge[bit] = intr;
5988 bool progress = false;
5989 nir_foreach_instr(instr, intr->instr.block) {
5990 if (instr->type != nir_instr_type_intrinsic)
5991 continue;
5992 nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5993 /* no matching across vertex emission */
5994 if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5995 break;
5996 progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
5997 }
5998 if (!progress || state.has_xfb)
5999 return false;
6000
6001 /* verify nothing crazy happened */
6002 assert(state.component_mask);
6003 for (unsigned i = 0; i < 4; i++) {
6004 assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
6005 }
6006
6007 unsigned first_component = ffs(state.component_mask) - 1;
6008 unsigned num_components = util_bitcount(state.component_mask);
6009 unsigned num_contiguous = 0;
6010 uint32_t contiguous_mask = 0;
6011 for (unsigned i = 0; i < num_components; i++) {
6012 unsigned c = i + first_component;
6013 /* calc mask of contiguous components to vectorize */
6014 if (state.component_mask & BITFIELD_BIT(c)) {
6015 num_contiguous++;
6016 contiguous_mask |= BITFIELD_BIT(c);
6017 }
6018 /* on the first gap or the the last component, vectorize */
6019 if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
6020 if (num_contiguous > 1) {
6021 /* reindex to enable easy src/dest index comparison */
6022 nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
6023 /* determine the first/last instr to use for the base (vectorized) load/store */
6024 unsigned first_c = ffs(contiguous_mask) - 1;
6025 nir_intrinsic_instr *base = NULL;
6026 unsigned test_idx = is_load ? UINT32_MAX : 0;
6027 for (unsigned j = 0; j < num_contiguous; j++) {
6028 unsigned merge_c = j + first_c;
6029 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6030 /* avoid breaking ssa ordering by using:
6031 * - first instr for vectorized load
6032 * - last instr for vectorized store
6033 * this guarantees all srcs have been seen
6034 */
6035 if ((is_load && merge_intr->def.index < test_idx) ||
6036 (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
6037 test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
6038 base = merge_intr;
6039 }
6040 }
6041 assert(base);
6042 /* update instr components */
6043 nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
6044 unsigned orig_components = base->num_components;
6045 base->num_components = num_contiguous;
6046 /* do rewrites after loads and before stores */
6047 b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
6048 if (is_load) {
6049 base->def.num_components = num_contiguous;
6050 /* iterate the contiguous loaded components and rewrite merged dests */
6051 for (unsigned j = 0; j < num_contiguous; j++) {
6052 unsigned merge_c = j + first_c;
6053 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6054 /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
6055 unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
6056 nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
6057 nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr);
6058 j += use_components - 1;
6059 }
6060 } else {
6061 nir_def *comp[NIR_MAX_VEC_COMPONENTS];
6062 /* generate swizzled vec of store components and rewrite store src */
6063 for (unsigned j = 0; j < num_contiguous; j++) {
6064 unsigned merge_c = j + first_c;
6065 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6066 /* detect if the merged instr stored multiple components and extract them for rewrite */
6067 unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
6068 for (unsigned k = 0; k < use_components; k++)
6069 comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
6070 j += use_components - 1;
6071 }
6072 nir_def *val = nir_vec(b, comp, num_contiguous);
6073 nir_src_rewrite(&base->src[0], val);
6074 nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
6075 }
6076 /* deleting instructions during a foreach explodes the compiler, so delete later */
6077 for (unsigned j = 0; j < num_contiguous; j++) {
6078 unsigned merge_c = j + first_c;
6079 nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6080 if (merge_intr != base)
6081 _mesa_set_add(data, &merge_intr->instr);
6082 }
6083 }
6084 contiguous_mask = 0;
6085 num_contiguous = 0;
6086 }
6087 }
6088
6089 return true;
6090 }
6091
6092 /* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
6093 static bool
trivial_revectorize(nir_shader * nir)6094 trivial_revectorize(nir_shader *nir)
6095 {
6096 struct set deletions;
6097
6098 if (nir->info.stage > MESA_SHADER_FRAGMENT)
6099 return false;
6100
6101 _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6102 bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions);
6103 /* now it's safe to delete */
6104 set_foreach_remove(&deletions, entry) {
6105 nir_instr *instr = (void*)entry->key;
6106 nir_instr_remove(instr);
6107 }
6108 ralloc_free(deletions.table);
6109 return progress;
6110 }
6111
6112 static bool
flatten_image_arrays_intr(struct nir_builder * b,nir_instr * instr,void * data)6113 flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data)
6114 {
6115 if (instr->type != nir_instr_type_deref)
6116 return false;
6117
6118 nir_deref_instr *deref = nir_instr_as_deref(instr);
6119 if (deref->deref_type != nir_deref_type_array)
6120 return false;
6121 nir_deref_instr *parent = nir_deref_instr_parent(deref);
6122 if (!parent || parent->deref_type != nir_deref_type_array)
6123 return false;
6124 nir_variable *var = nir_deref_instr_get_variable(deref);
6125 const struct glsl_type *type = glsl_without_array(var->type);
6126 if (type == var->type || (!glsl_type_is_sampler(type) && !glsl_type_is_image(type)))
6127 return false;
6128
6129 nir_deref_instr *parent_parent = nir_deref_instr_parent(parent);
6130 int parent_size = glsl_array_size(parent->type);
6131 b->cursor = nir_after_instr(instr);
6132 nir_deref_instr *new_deref = nir_build_deref_array(b, parent_parent, nir_iadd(b, nir_imul_imm(b, parent->arr.index.ssa, parent_size), deref->arr.index.ssa));
6133 nir_def_rewrite_uses_after(&deref->def, &new_deref->def, &new_deref->instr);
6134 _mesa_set_add(data, instr);
6135 _mesa_set_add(data, &parent->instr);
6136 return true;
6137 }
6138
6139 static bool
flatten_image_arrays(nir_shader * nir)6140 flatten_image_arrays(nir_shader *nir)
6141 {
6142 bool progress = false;
6143 nir_foreach_variable_with_modes(var, nir, nir_var_uniform | nir_var_image) {
6144 const struct glsl_type *type = glsl_without_array(var->type);
6145 if (!glsl_type_is_sampler(type) && !glsl_type_is_image(type))
6146 continue;
6147 if (type == var->type)
6148 continue;
6149 var->type = glsl_array_type(type, glsl_get_aoa_size(var->type), sizeof(void*));
6150 progress = true;
6151 }
6152 struct set *deletions = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6153 progress |= nir_shader_instructions_pass(nir, flatten_image_arrays_intr, nir_metadata_dominance, deletions);
6154 set_foreach_remove(deletions, he) {
6155 nir_instr *instr = (void*)he->key;
6156 nir_instr_remove_v(instr);
6157 }
6158 _mesa_set_destroy(deletions, NULL);
6159 if (progress)
6160 nir_fixup_deref_types(nir);
6161 return progress;
6162 }
6163
6164 static bool
bound_image_arrays_instr(struct nir_builder * b,nir_instr * instr,void * data)6165 bound_image_arrays_instr(struct nir_builder *b, nir_instr *instr, void *data)
6166 {
6167 if (instr->type != nir_instr_type_deref)
6168 return false;
6169
6170 nir_deref_instr *deref = nir_instr_as_deref(instr);
6171 if (deref->deref_type != nir_deref_type_array)
6172 return false;
6173
6174 if (!nir_src_is_const(deref->arr.index))
6175 return false;
6176 nir_deref_instr *parent = nir_deref_instr_parent(deref);
6177 int parent_size = glsl_array_size(parent->type);
6178 unsigned idx = nir_src_as_uint(deref->arr.index);
6179 if (idx >= parent_size) {
6180 b->cursor = nir_before_instr(instr);
6181 nir_src_rewrite(&deref->arr.index, nir_imm_zero(b, 1, 32));
6182 return true;
6183 }
6184 return false;
6185 }
6186
6187 static bool
bound_image_arrays(nir_shader * nir)6188 bound_image_arrays(nir_shader *nir)
6189 {
6190 return nir_shader_instructions_pass(nir, bound_image_arrays_instr, nir_metadata_dominance, NULL);
6191 }
6192
6193 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)6194 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
6195 {
6196 struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6197
6198 zs->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
6199 nir->info.outputs_written & VARYING_BIT_EDGE;
6200
6201 zs->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
6202 zs->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
6203 if (screen->info.have_KHR_shader_float_controls) {
6204 if (screen->info.props12.shaderDenormFlushToZeroFloat16)
6205 zs->sinfo.float_controls.flush_denorms |= 0x1;
6206 if (screen->info.props12.shaderDenormFlushToZeroFloat32)
6207 zs->sinfo.float_controls.flush_denorms |= 0x2;
6208 if (screen->info.props12.shaderDenormFlushToZeroFloat64)
6209 zs->sinfo.float_controls.flush_denorms |= 0x4;
6210
6211 if (screen->info.props12.shaderDenormPreserveFloat16)
6212 zs->sinfo.float_controls.preserve_denorms |= 0x1;
6213 if (screen->info.props12.shaderDenormPreserveFloat32)
6214 zs->sinfo.float_controls.preserve_denorms |= 0x2;
6215 if (screen->info.props12.shaderDenormPreserveFloat64)
6216 zs->sinfo.float_controls.preserve_denorms |= 0x4;
6217
6218 zs->sinfo.float_controls.denorms_all_independence =
6219 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
6220
6221 zs->sinfo.float_controls.denorms_32_bit_independence =
6222 zs->sinfo.float_controls.denorms_all_independence ||
6223 screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
6224 }
6225 zs->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6226
6227 util_queue_fence_init(&zs->precompile.fence);
6228 util_dynarray_init(&zs->pipeline_libs, zs);
6229 zs->hash = _mesa_hash_pointer(zs);
6230
6231 zs->programs = _mesa_pointer_set_create(NULL);
6232 simple_mtx_init(&zs->lock, mtx_plain);
6233 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6234 zs->info.name = ralloc_strdup(zs, nir->info.name);
6235
6236 zs->can_inline = true;
6237 zs->nir = nir;
6238
6239 if (nir->info.stage != MESA_SHADER_KERNEL)
6240 match_tex_dests(nir, zs, true);
6241
6242 return zs;
6243 }
6244
6245 void
zink_shader_init(struct zink_screen * screen,struct zink_shader * zs)6246 zink_shader_init(struct zink_screen *screen, struct zink_shader *zs)
6247 {
6248 bool have_psiz = false;
6249 nir_shader *nir = zs->nir;
6250
6251 if (nir->info.stage == MESA_SHADER_KERNEL) {
6252 nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
6253 .modes = nir_var_all ^ nir_var_function_temp,
6254 .may_lower_unaligned_stores_to_atomics = true,
6255 .callback = mem_access_size_align_cb,
6256 .cb_data = screen,
6257 };
6258 NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
6259 NIR_PASS_V(nir, nir_lower_bit_size, zink_lower_bit_size_cb, NULL);
6260 NIR_PASS_V(nir, alias_scratch_memory);
6261 NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
6262 NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
6263 }
6264
6265 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
6266 optimize_nir(nir, NULL, true);
6267 NIR_PASS_V(nir, bound_image_arrays);
6268 NIR_PASS_V(nir, flatten_image_arrays);
6269 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
6270 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
6271 NIR_PASS_V(nir, lower_bindless_io);
6272 break;
6273 }
6274 }
6275 if (nir->info.stage < MESA_SHADER_FRAGMENT)
6276 nir_gather_xfb_info_from_intrinsics(nir);
6277 NIR_PASS_V(nir, fix_vertex_input_locations);
6278 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6279 scan_nir(screen, nir, zs);
6280 NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL);
6281 NIR_PASS_V(nir, trivial_revectorize);
6282 if (nir->info.io_lowered) {
6283 rework_io_vars(nir, nir_var_shader_in, zs);
6284 rework_io_vars(nir, nir_var_shader_out, zs);
6285 nir_sort_variables_by_location(nir, nir_var_shader_in);
6286 nir_sort_variables_by_location(nir, nir_var_shader_out);
6287 }
6288
6289 if (nir->info.stage < MESA_SHADER_COMPUTE)
6290 create_gfx_pushconst(nir);
6291
6292 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
6293 nir->info.stage == MESA_SHADER_TESS_EVAL)
6294 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
6295
6296 if (nir->info.stage < MESA_SHADER_FRAGMENT)
6297 have_psiz = check_psiz(nir);
6298 if (nir->info.stage == MESA_SHADER_FRAGMENT)
6299 zs->flat_flags = zink_flat_flags(nir);
6300
6301 if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
6302 NIR_PASS_V(nir, fixup_io_locations);
6303
6304 NIR_PASS_V(nir, lower_basevertex);
6305 NIR_PASS_V(nir, lower_baseinstance);
6306 NIR_PASS_V(nir, split_bitfields);
6307 if (!screen->info.feats.features.shaderStorageImageMultisample)
6308 NIR_PASS_V(nir, strip_tex_ms);
6309 NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
6310
6311 if (screen->need_2D_zs)
6312 NIR_PASS_V(nir, lower_1d_shadow, screen);
6313
6314 {
6315 nir_lower_subgroups_options subgroup_options = {0};
6316 subgroup_options.lower_to_scalar = true;
6317 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
6318 subgroup_options.ballot_bit_size = 32;
6319 subgroup_options.ballot_components = 4;
6320 subgroup_options.lower_subgroup_masks = true;
6321 if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
6322 subgroup_options.subgroup_size = 1;
6323 subgroup_options.lower_vote_trivial = true;
6324 }
6325 subgroup_options.lower_inverse_ballot = true;
6326 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
6327 }
6328
6329 optimize_nir(nir, NULL, true);
6330 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6331 NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
6332 nir_lower_demote_if_to_cf |
6333 nir_lower_terminate_if_to_cf));
6334
6335 bool needs_size = analyze_io(zs, nir);
6336 NIR_PASS_V(nir, unbreak_bos, zs, needs_size);
6337 /* run in compile if there could be inlined uniforms */
6338 if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
6339 NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
6340 NIR_PASS_V(nir, rewrite_bo_access, screen);
6341 NIR_PASS_V(nir, remove_bo_access, zs);
6342 }
6343
6344 struct zink_bindless_info bindless = {0};
6345 bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6346 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
6347 var->data.is_xfb = false;
6348
6349 optimize_nir(nir, NULL, true);
6350 prune_io(nir);
6351
6352 if (nir->info.stage == MESA_SHADER_KERNEL) {
6353 NIR_PASS_V(nir, type_images);
6354 }
6355
6356 unsigned ubo_binding_mask = 0;
6357 unsigned ssbo_binding_mask = 0;
6358 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
6359 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
6360 nir_var_image |
6361 nir_var_mem_ubo |
6362 nir_var_mem_ssbo)) {
6363 enum zink_descriptor_type ztype;
6364 const struct glsl_type *type = glsl_without_array(var->type);
6365 if (var->data.mode == nir_var_mem_ubo) {
6366 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
6367 /* buffer 0 is a push descriptor */
6368 var->data.descriptor_set = !!var->data.driver_location;
6369 var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
6370 zink_binding(nir->info.stage,
6371 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
6372 var->data.driver_location,
6373 screen->compact_descriptors);
6374 assert(var->data.driver_location || var->data.binding < 10);
6375 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6376 int binding = var->data.binding;
6377
6378 if (!var->data.driver_location) {
6379 zs->has_uniforms = true;
6380 } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
6381 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6382 zs->bindings[ztype][zs->num_bindings[ztype]].binding = binding;
6383 zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6384 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6385 assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6386 zs->num_bindings[ztype]++;
6387 ubo_binding_mask |= BITFIELD_BIT(binding);
6388 }
6389 } else if (var->data.mode == nir_var_mem_ssbo) {
6390 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
6391 var->data.descriptor_set = screen->desc_set_id[ztype];
6392 var->data.binding = zink_binding(clamp_stage(&nir->info),
6393 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
6394 var->data.driver_location,
6395 screen->compact_descriptors);
6396 if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
6397 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6398 zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6399 zs->bindings[ztype][zs->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
6400 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6401 assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6402 zs->num_bindings[ztype]++;
6403 ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
6404 }
6405 } else {
6406 assert(var->data.mode == nir_var_uniform ||
6407 var->data.mode == nir_var_image);
6408 if (var->data.bindless) {
6409 zs->bindless = true;
6410 handle_bindless_var(nir, var, type, &bindless);
6411 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
6412 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : glsl_type_is_bare_sampler(type) ? VK_DESCRIPTOR_TYPE_SAMPLER : zink_sampler_type(type);
6413 if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
6414 vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
6415 ztype = zink_desc_type_from_vktype(vktype);
6416 var->data.driver_location = var->data.binding;
6417 var->data.descriptor_set = screen->desc_set_id[ztype];
6418 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
6419 zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6420 zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6421 zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6422 if (glsl_type_is_array(var->type))
6423 zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
6424 else
6425 zs->bindings[ztype][zs->num_bindings[ztype]].size = 1;
6426 zs->num_bindings[ztype]++;
6427 } else if (var->data.mode == nir_var_uniform) {
6428 /* this is a dead uniform */
6429 var->data.mode = 0;
6430 exec_node_remove(&var->node);
6431 }
6432 }
6433 }
6434 }
6435 bool bindless_lowered = false;
6436 NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
6437 zs->bindless |= bindless_lowered;
6438
6439 if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
6440 NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
6441 if (nir->info.stage != MESA_SHADER_KERNEL)
6442 NIR_PASS_V(nir, match_tex_dests, zs, false);
6443
6444 if (!nir->info.internal)
6445 nir_foreach_shader_out_variable(var, nir)
6446 var->data.explicit_xfb_buffer = 0;
6447 if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
6448 update_so_info(zs, nir, nir->info.outputs_written, have_psiz);
6449 zink_shader_serialize_blob(nir, &zs->blob);
6450 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6451 }
6452
6453 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)6454 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
6455 {
6456 struct zink_screen *screen = zink_screen(pscreen);
6457 nir_shader *nir = nirptr;
6458
6459 nir_lower_tex_options tex_opts = {
6460 .lower_invalid_implicit_lod = true,
6461 };
6462 /*
6463 Sampled Image must be an object whose type is OpTypeSampledImage.
6464 The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
6465 or Rect, and the Arrayed and MS operands must be 0.
6466 - SPIRV, OpImageSampleProj* opcodes
6467 */
6468 tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
6469 BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
6470 tex_opts.lower_txp_array = true;
6471 if (!screen->info.feats.features.shaderImageGatherExtended)
6472 tex_opts.lower_tg4_offsets = true;
6473 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
6474 optimize_nir(nir, NULL, false);
6475 if (nir->info.stage == MESA_SHADER_VERTEX)
6476 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6477 if (screen->driconf.inline_uniforms)
6478 nir_find_inlinable_uniforms(nir);
6479
6480 return NULL;
6481 }
6482
6483 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)6484 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6485 {
6486 _mesa_set_destroy(shader->programs, NULL);
6487 util_queue_fence_wait(&shader->precompile.fence);
6488 util_queue_fence_destroy(&shader->precompile.fence);
6489 zink_descriptor_shader_deinit(screen, shader);
6490 if (screen->info.have_EXT_shader_object) {
6491 VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
6492 } else {
6493 if (shader->precompile.obj.mod)
6494 VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
6495 if (shader->precompile.gpl)
6496 VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
6497 }
6498 blob_finish(&shader->blob);
6499 ralloc_free(shader->spirv);
6500 free(shader->precompile.bindings);
6501 ralloc_free(shader);
6502 }
6503
6504 static bool
gfx_shader_prune(struct zink_screen * screen,struct zink_shader * shader)6505 gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader)
6506 {
6507 /* this shader may still be precompiling, so access here must be locked and singular */
6508 simple_mtx_lock(&shader->lock);
6509 struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL);
6510 struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL);
6511 if (entry)
6512 _mesa_set_remove(shader->programs, entry);
6513 simple_mtx_unlock(&shader->lock);
6514 if (!prog)
6515 return false;
6516 gl_shader_stage stage = shader->info.stage;
6517 assert(stage < ZINK_GFX_SHADER_COUNT);
6518 util_queue_fence_wait(&prog->base.cache_fence);
6519 unsigned stages_present = prog->stages_present;
6520 if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
6521 prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
6522 stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
6523 unsigned idx = zink_program_cache_stages(stages_present);
6524 if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
6525 (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
6526 struct hash_table *ht = &prog->base.ctx->program_cache[idx];
6527 simple_mtx_lock(&prog->base.ctx->program_lock[idx]);
6528 struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
6529 assert(he && he->data == prog);
6530 _mesa_hash_table_remove(ht, he);
6531 prog->base.removed = true;
6532 simple_mtx_unlock(&prog->base.ctx->program_lock[idx]);
6533
6534 for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
6535 for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
6536 hash_table_foreach(&prog->pipelines[r][i], table_entry) {
6537 struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
6538
6539 util_queue_fence_wait(&pc_entry->fence);
6540 }
6541 }
6542 }
6543 }
6544 if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
6545 prog->shaders[stage] = NULL;
6546 prog->stages_remaining &= ~BITFIELD_BIT(stage);
6547 }
6548 /* only remove generated tcs during parent tes destruction */
6549 if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
6550 prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
6551 if (stage != MESA_SHADER_FRAGMENT &&
6552 prog->shaders[MESA_SHADER_GEOMETRY] &&
6553 prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
6554 shader) {
6555 prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
6556 }
6557 zink_gfx_program_reference(screen, &prog, NULL);
6558 return true;
6559 }
6560
6561 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)6562 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6563 {
6564 assert(shader->info.stage != MESA_SHADER_COMPUTE);
6565 util_queue_fence_wait(&shader->precompile.fence);
6566
6567 /* if the shader is still precompiling, the program set must be pruned under lock */
6568 while (gfx_shader_prune(screen, shader));
6569
6570 while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
6571 struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
6572 if (!libs->removed) {
6573 libs->removed = true;
6574 unsigned idx = zink_program_cache_stages(libs->stages_present);
6575 simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
6576 _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
6577 simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
6578 }
6579 zink_gfx_lib_cache_unref(screen, libs);
6580 }
6581 if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
6582 shader->non_fs.generated_tcs) {
6583 /* automatically destroy generated tcs shaders when tes is destroyed */
6584 zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
6585 shader->non_fs.generated_tcs = NULL;
6586 }
6587 if (shader->info.stage != MESA_SHADER_FRAGMENT) {
6588 for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
6589 for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
6590 if (shader->non_fs.generated_gs[i][j]) {
6591 /* automatically destroy generated gs shaders when owner is destroyed */
6592 zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
6593 shader->non_fs.generated_gs[i][j] = NULL;
6594 }
6595 }
6596 }
6597 }
6598 zink_shader_free(screen, shader);
6599 }
6600
6601
6602 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)6603 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
6604 {
6605 assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
6606 /* shortcut all the nir passes since we just have to change this one word */
6607 zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
6608 return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
6609 }
6610
6611 /* creating a passthrough tcs shader that's roughly:
6612
6613 #version 150
6614 #extension GL_ARB_tessellation_shader : require
6615
6616 in vec4 some_var[gl_MaxPatchVertices];
6617 out vec4 some_var_out;
6618
6619 layout(push_constant) uniform tcsPushConstants {
6620 layout(offset = 0) float TessLevelInner[2];
6621 layout(offset = 8) float TessLevelOuter[4];
6622 } u_tcsPushConstants;
6623 layout(vertices = $vertices_per_patch) out;
6624 void main()
6625 {
6626 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
6627 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
6628 some_var_out = some_var[gl_InvocationID];
6629 }
6630
6631 */
6632 void
zink_shader_tcs_init(struct zink_screen * screen,struct zink_shader * zs,nir_shader * tes,nir_shader ** nir_ret)6633 zink_shader_tcs_init(struct zink_screen *screen, struct zink_shader *zs, nir_shader *tes, nir_shader **nir_ret)
6634 {
6635 nir_shader *nir = zs->nir;
6636
6637 nir_builder b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir)));
6638
6639 nir_def *invocation_id = nir_load_invocation_id(&b);
6640
6641 nir_foreach_shader_in_variable(var, tes) {
6642 if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
6643 continue;
6644 const struct glsl_type *in_type = var->type;
6645 const struct glsl_type *out_type = var->type;
6646 char buf[1024];
6647 snprintf(buf, sizeof(buf), "%s_out", var->name);
6648 if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
6649 const struct glsl_type *type = var->type;
6650 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
6651 out_type = glsl_array_type(type, nir->info.tess.tcs_vertices_out, 0);
6652 }
6653
6654 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
6655 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
6656 out->data.location = in->data.location = var->data.location;
6657 out->data.location_frac = in->data.location_frac = var->data.location_frac;
6658
6659 /* gl_in[] receives values from equivalent built-in output
6660 variables written by the vertex shader (section 2.14.7). Each array
6661 element of gl_in[] is a structure holding values for a specific vertex of
6662 the input patch. The length of gl_in[] is equal to the
6663 implementation-dependent maximum patch size (gl_MaxPatchVertices).
6664 - ARB_tessellation_shader
6665 */
6666 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
6667 nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
6668 nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
6669 copy_vars(&b, out_value, in_value);
6670 }
6671 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
6672 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
6673 gl_TessLevelInner->data.patch = 1;
6674 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
6675 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
6676 gl_TessLevelOuter->data.patch = 1;
6677
6678 create_gfx_pushconst(nir);
6679
6680 nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
6681 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
6682 nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
6683 nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
6684
6685 for (unsigned i = 0; i < 2; i++) {
6686 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
6687 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
6688 }
6689 for (unsigned i = 0; i < 4; i++) {
6690 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
6691 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
6692 }
6693
6694 nir_validate_shader(nir, "created");
6695
6696 optimize_nir(nir, NULL, true);
6697 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6698 NIR_PASS_V(nir, nir_convert_from_ssa, true);
6699
6700 *nir_ret = nir;
6701 zink_shader_serialize_blob(nir, &zs->blob);
6702 }
6703
6704 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,unsigned vertices_per_patch)6705 zink_shader_tcs_create(struct zink_screen *screen, unsigned vertices_per_patch)
6706 {
6707 struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6708 util_queue_fence_init(&zs->precompile.fence);
6709 zs->hash = _mesa_hash_pointer(zs);
6710 zs->programs = _mesa_pointer_set_create(NULL);
6711 simple_mtx_init(&zs->lock, mtx_plain);
6712
6713 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
6714 nir_function *fn = nir_function_create(nir, "main");
6715 fn->is_entrypoint = true;
6716 nir_function_impl_create(fn);
6717 zs->nir = nir;
6718
6719 nir->info.tess.tcs_vertices_out = vertices_per_patch;
6720 memcpy(&zs->info, &nir->info, sizeof(nir->info));
6721 zs->non_fs.is_generated = true;
6722 return zs;
6723 }
6724
6725 bool
zink_shader_has_cubes(nir_shader * nir)6726 zink_shader_has_cubes(nir_shader *nir)
6727 {
6728 nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
6729 const struct glsl_type *type = glsl_without_array(var->type);
6730 if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
6731 return true;
6732 }
6733 return false;
6734 }
6735
6736 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)6737 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
6738 {
6739 struct blob_reader blob_reader;
6740 blob_reader_init(&blob_reader, blob->data, blob->size);
6741 return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
6742 }
6743
6744 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)6745 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
6746 {
6747 return zink_shader_blob_deserialize(screen, &zs->blob);
6748 }
6749
6750 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)6751 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
6752 {
6753 blob_init(blob);
6754 #ifndef NDEBUG
6755 bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
6756 #else
6757 bool strip = false;
6758 #endif
6759 nir_serialize(blob, nir, strip);
6760 }
6761
6762 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)6763 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
6764 {
6765 nir_shader *nir = zink_shader_deserialize(screen, zs);
6766 nir_print_shader(nir, fp);
6767 ralloc_free(nir);
6768 }
6769