xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/zink/zink_compiler.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nir_opcodes.h"
25 #include "zink_context.h"
26 #include "zink_compiler.h"
27 #include "zink_descriptors.h"
28 #include "zink_program.h"
29 #include "zink_screen.h"
30 #include "nir_to_spirv/nir_to_spirv.h"
31 
32 #include "pipe/p_state.h"
33 
34 #include "nir.h"
35 #include "nir_xfb_info.h"
36 #include "nir/nir_draw_helpers.h"
37 #include "compiler/nir/nir_builder.h"
38 #include "compiler/nir/nir_serialize.h"
39 #include "compiler/nir/nir_builtin_builder.h"
40 
41 #include "nir/tgsi_to_nir.h"
42 #include "tgsi/tgsi_dump.h"
43 
44 #include "util/u_memory.h"
45 
46 #include "compiler/spirv/nir_spirv.h"
47 #include "compiler/spirv/spirv_info.h"
48 #include "vk_util.h"
49 
50 bool
51 zink_lower_cubemap_to_array(nir_shader *s, uint32_t nonseamless_cube_mask);
52 
53 
54 static void
copy_vars(nir_builder * b,nir_deref_instr * dst,nir_deref_instr * src)55 copy_vars(nir_builder *b, nir_deref_instr *dst, nir_deref_instr *src)
56 {
57    assert(glsl_get_bare_type(dst->type) == glsl_get_bare_type(src->type));
58    if (glsl_type_is_struct_or_ifc(dst->type)) {
59       for (unsigned i = 0; i < glsl_get_length(dst->type); ++i) {
60          copy_vars(b, nir_build_deref_struct(b, dst, i), nir_build_deref_struct(b, src, i));
61       }
62    } else if (glsl_type_is_array_or_matrix(dst->type)) {
63       unsigned count = glsl_type_is_array(dst->type) ? glsl_array_size(dst->type) : glsl_get_matrix_columns(dst->type);
64       for (unsigned i = 0; i < count; i++) {
65          copy_vars(b, nir_build_deref_array_imm(b, dst, i), nir_build_deref_array_imm(b, src, i));
66       }
67    } else {
68       nir_def *load = nir_load_deref(b, src);
69       nir_store_deref(b, dst, load, BITFIELD_MASK(load->num_components));
70    }
71 }
72 
73 static bool
is_clipcull_dist(int location)74 is_clipcull_dist(int location)
75 {
76    switch (location) {
77    case VARYING_SLOT_CLIP_DIST0:
78    case VARYING_SLOT_CLIP_DIST1:
79    case VARYING_SLOT_CULL_DIST0:
80    case VARYING_SLOT_CULL_DIST1:
81       return true;
82    default: break;
83    }
84    return false;
85 }
86 
87 #define SIZEOF_FIELD(type, field) sizeof(((type *)0)->field)
88 
89 static void
create_gfx_pushconst(nir_shader * nir)90 create_gfx_pushconst(nir_shader *nir)
91 {
92 #define PUSHCONST_MEMBER(member_idx, field)                                                                     \
93 fields[member_idx].type =                                                                                       \
94    glsl_array_type(glsl_uint_type(), SIZEOF_FIELD(struct zink_gfx_push_constant, field) / sizeof(uint32_t), 0); \
95 fields[member_idx].name = ralloc_asprintf(nir, #field);                                                         \
96 fields[member_idx].offset = offsetof(struct zink_gfx_push_constant, field);
97 
98    nir_variable *pushconst;
99    /* create compatible layout for the ntv push constant loader */
100    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, ZINK_GFX_PUSHCONST_MAX);
101    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED, draw_mode_is_indexed);
102    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DRAW_ID, draw_id);
103    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED, framebuffer_is_layered);
104    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL, default_inner_level);
105    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL, default_outer_level);
106    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN, line_stipple_pattern);
107    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_VIEWPORT_SCALE, viewport_scale);
108    PUSHCONST_MEMBER(ZINK_GFX_PUSHCONST_LINE_WIDTH, line_width);
109 
110    pushconst = nir_variable_create(nir, nir_var_mem_push_const,
111                                    glsl_struct_type(fields, ZINK_GFX_PUSHCONST_MAX, "struct", false),
112                                    "gfx_pushconst");
113    pushconst->data.location = INT_MAX; //doesn't really matter
114 
115 #undef PUSHCONST_MEMBER
116 }
117 
118 static bool
lower_basevertex_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)119 lower_basevertex_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
120 {
121    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
122       return false;
123 
124    b->cursor = nir_after_instr(&instr->instr);
125    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
126    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_MODE_IS_INDEXED));
127    load->num_components = 1;
128    nir_def_init(&load->instr, &load->def, 1, 32);
129    nir_builder_instr_insert(b, &load->instr);
130 
131    nir_def *composite = nir_build_alu(b, nir_op_bcsel,
132                                           nir_build_alu(b, nir_op_ieq, &load->def, nir_imm_int(b, 1), NULL, NULL),
133                                           &instr->def,
134                                           nir_imm_int(b, 0),
135                                           NULL);
136 
137    nir_def_rewrite_uses_after(&instr->def, composite,
138                                   composite->parent_instr);
139    return true;
140 }
141 
142 static bool
lower_basevertex(nir_shader * shader)143 lower_basevertex(nir_shader *shader)
144 {
145    if (shader->info.stage != MESA_SHADER_VERTEX)
146       return false;
147 
148    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
149       return false;
150 
151    return nir_shader_intrinsics_pass(shader, lower_basevertex_instr,
152                                      nir_metadata_dominance, NULL);
153 }
154 
155 
156 static bool
lower_drawid_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)157 lower_drawid_instr(nir_builder *b, nir_intrinsic_instr *instr, void *data)
158 {
159    if (instr->intrinsic != nir_intrinsic_load_draw_id)
160       return false;
161 
162    b->cursor = nir_before_instr(&instr->instr);
163    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant_zink);
164    load->src[0] = nir_src_for_ssa(nir_imm_int(b, ZINK_GFX_PUSHCONST_DRAW_ID));
165    load->num_components = 1;
166    nir_def_init(&load->instr, &load->def, 1, 32);
167    nir_builder_instr_insert(b, &load->instr);
168 
169    nir_def_rewrite_uses(&instr->def, &load->def);
170 
171    return true;
172 }
173 
174 static bool
lower_drawid(nir_shader * shader)175 lower_drawid(nir_shader *shader)
176 {
177    if (shader->info.stage != MESA_SHADER_VERTEX)
178       return false;
179 
180    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
181       return false;
182 
183    return nir_shader_intrinsics_pass(shader, lower_drawid_instr,
184                                      nir_metadata_dominance, NULL);
185 }
186 
187 struct lower_gl_point_state {
188    nir_variable *gl_pos_out;
189    nir_variable *gl_point_size;
190 };
191 
192 static bool
lower_gl_point_gs_instr(nir_builder * b,nir_instr * instr,void * data)193 lower_gl_point_gs_instr(nir_builder *b, nir_instr *instr, void *data)
194 {
195    struct lower_gl_point_state *state = data;
196    nir_def *vp_scale, *pos;
197 
198    if (instr->type != nir_instr_type_intrinsic)
199       return false;
200 
201    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
202    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
203        intrin->intrinsic != nir_intrinsic_emit_vertex)
204       return false;
205 
206    if (nir_intrinsic_stream_id(intrin) != 0)
207       return false;
208 
209    if (intrin->intrinsic == nir_intrinsic_end_primitive_with_counter ||
210          intrin->intrinsic == nir_intrinsic_end_primitive) {
211       nir_instr_remove(&intrin->instr);
212       return true;
213    }
214 
215    b->cursor = nir_before_instr(instr);
216 
217    // viewport-map endpoints
218    nir_def *vp_const_pos = nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE);
219    vp_scale = nir_load_push_constant_zink(b, 2, 32, vp_const_pos);
220 
221    // Load point info values
222    nir_def *point_size = nir_load_var(b, state->gl_point_size);
223    nir_def *point_pos = nir_load_var(b, state->gl_pos_out);
224 
225    // w_delta = gl_point_size / width_viewport_size_scale * gl_Position.w
226    nir_def *w_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 0));
227    w_delta = nir_fmul(b, w_delta, nir_channel(b, point_pos, 3));
228    // halt_w_delta = w_delta / 2
229    nir_def *half_w_delta = nir_fmul_imm(b, w_delta, 0.5);
230 
231    // h_delta = gl_point_size / height_viewport_size_scale * gl_Position.w
232    nir_def *h_delta = nir_fdiv(b, point_size, nir_channel(b, vp_scale, 1));
233    h_delta = nir_fmul(b, h_delta, nir_channel(b, point_pos, 3));
234    // halt_h_delta = h_delta / 2
235    nir_def *half_h_delta = nir_fmul_imm(b, h_delta, 0.5);
236 
237    nir_def *point_dir[4][2] = {
238       { nir_imm_float(b, -1), nir_imm_float(b, -1) },
239       { nir_imm_float(b, -1), nir_imm_float(b, 1) },
240       { nir_imm_float(b, 1), nir_imm_float(b, -1) },
241       { nir_imm_float(b, 1), nir_imm_float(b, 1) }
242    };
243 
244    nir_def *point_pos_x = nir_channel(b, point_pos, 0);
245    nir_def *point_pos_y = nir_channel(b, point_pos, 1);
246 
247    for (size_t i = 0; i < 4; i++) {
248       pos = nir_vec4(b,
249                      nir_ffma(b, half_w_delta, point_dir[i][0], point_pos_x),
250                      nir_ffma(b, half_h_delta, point_dir[i][1], point_pos_y),
251                      nir_channel(b, point_pos, 2),
252                      nir_channel(b, point_pos, 3));
253 
254       nir_store_var(b, state->gl_pos_out, pos, 0xf);
255 
256       nir_emit_vertex(b);
257    }
258 
259    nir_end_primitive(b);
260 
261    nir_instr_remove(&intrin->instr);
262 
263    return true;
264 }
265 
266 static bool
lower_gl_point_gs(nir_shader * shader)267 lower_gl_point_gs(nir_shader *shader)
268 {
269    struct lower_gl_point_state state;
270 
271    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
272    shader->info.gs.vertices_out *= 4;
273 
274    // Gets the gl_Position in and out
275    state.gl_pos_out =
276       nir_find_variable_with_location(shader, nir_var_shader_out,
277                                       VARYING_SLOT_POS);
278    state.gl_point_size =
279       nir_find_variable_with_location(shader, nir_var_shader_out,
280                                       VARYING_SLOT_PSIZ);
281 
282    // if position in or gl_PointSize aren't written, we have nothing to do
283    if (!state.gl_pos_out || !state.gl_point_size)
284       return false;
285 
286    return nir_shader_instructions_pass(shader, lower_gl_point_gs_instr,
287                                        nir_metadata_dominance, &state);
288 }
289 
290 struct lower_pv_mode_state {
291    nir_variable *varyings[VARYING_SLOT_MAX][4];
292    nir_variable *pos_counter;
293    nir_variable *out_pos_counter;
294    nir_variable *ring_offset;
295    unsigned ring_size;
296    unsigned primitive_vert_count;
297    unsigned prim;
298 };
299 
300 static nir_def*
lower_pv_mode_gs_ring_index(nir_builder * b,struct lower_pv_mode_state * state,nir_def * index)301 lower_pv_mode_gs_ring_index(nir_builder *b,
302                             struct lower_pv_mode_state *state,
303                             nir_def *index)
304 {
305    nir_def *ring_offset = nir_load_var(b, state->ring_offset);
306    return nir_imod_imm(b, nir_iadd(b, index, ring_offset),
307                           state->ring_size);
308 }
309 
310 /* Given the final deref of chain of derefs this function will walk up the chain
311  * until it finds a var deref.
312  *
313  * It will then recreate an identical chain that ends with the provided deref.
314  */
315 static nir_deref_instr*
replicate_derefs(nir_builder * b,nir_deref_instr * old,nir_deref_instr * new)316 replicate_derefs(nir_builder *b, nir_deref_instr *old, nir_deref_instr *new)
317 {
318    nir_deref_instr *parent = nir_deref_instr_parent(old);
319    if (!parent)
320       return new;
321    switch(old->deref_type) {
322    case nir_deref_type_var:
323       return new;
324    case nir_deref_type_array:
325       return nir_build_deref_array(b, replicate_derefs(b, parent, new), old->arr.index.ssa);
326    case nir_deref_type_struct:
327       return nir_build_deref_struct(b, replicate_derefs(b, parent, new), old->strct.index);
328    case nir_deref_type_array_wildcard:
329    case nir_deref_type_ptr_as_array:
330    case nir_deref_type_cast:
331       unreachable("unexpected deref type");
332    }
333    unreachable("impossible deref type");
334 }
335 
336 static bool
lower_pv_mode_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)337 lower_pv_mode_gs_store(nir_builder *b,
338                        nir_intrinsic_instr *intrin,
339                        struct lower_pv_mode_state *state)
340 {
341    b->cursor = nir_before_instr(&intrin->instr);
342    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
343    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
344       nir_variable *var = nir_deref_instr_get_variable(deref);
345 
346       gl_varying_slot location = var->data.location;
347       unsigned location_frac = var->data.location_frac;
348       assert(state->varyings[location][location_frac]);
349       nir_def *pos_counter = nir_load_var(b, state->pos_counter);
350       nir_def *index = lower_pv_mode_gs_ring_index(b, state, pos_counter);
351       nir_deref_instr *varying_deref = nir_build_deref_var(b, state->varyings[location][location_frac]);
352       nir_deref_instr *ring_deref = nir_build_deref_array(b, varying_deref, index);
353       // recreate the chain of deref that lead to the store.
354       nir_deref_instr *new_top_deref = replicate_derefs(b, deref, ring_deref);
355       nir_store_deref(b, new_top_deref, intrin->src[1].ssa, nir_intrinsic_write_mask(intrin));
356       nir_instr_remove(&intrin->instr);
357       return true;
358    }
359 
360    return false;
361 }
362 
363 static void
lower_pv_mode_emit_rotated_prim(nir_builder * b,struct lower_pv_mode_state * state,nir_def * current_vertex)364 lower_pv_mode_emit_rotated_prim(nir_builder *b,
365                                 struct lower_pv_mode_state *state,
366                                 nir_def *current_vertex)
367 {
368    nir_def *two = nir_imm_int(b, 2);
369    nir_def *three = nir_imm_int(b, 3);
370    bool is_triangle = state->primitive_vert_count == 3;
371    /* This shader will always see the last three vertices emitted by the user gs.
372     * The following table is used to to rotate primitives within a strip generated
373     * by the user gs such that the last vertex becomes the first.
374     *
375     * [lines, tris][even/odd index][vertex mod 3]
376     */
377    static const unsigned vert_maps[2][2][3] = {
378       {{1, 0, 0}, {1, 0, 0}},
379       {{2, 0, 1}, {2, 1, 0}}
380    };
381    /* When the primive supplied to the gs comes from a strip, the last provoking vertex
382     * is either the last or the second, depending on whether the triangle is at an odd
383     * or even position within the strip.
384     *
385     * odd or even primitive within draw
386     */
387    nir_def *odd_prim = nir_imod(b, nir_load_primitive_id(b), two);
388    for (unsigned i = 0; i < state->primitive_vert_count; i++) {
389       /* odd or even triangle within strip emitted by user GS
390        * this is handled using the table
391        */
392       nir_def *odd_user_prim = nir_imod(b, current_vertex, two);
393       unsigned offset_even = vert_maps[is_triangle][0][i];
394       unsigned offset_odd = vert_maps[is_triangle][1][i];
395       nir_def *offset_even_value = nir_imm_int(b, offset_even);
396       nir_def *offset_odd_value = nir_imm_int(b, offset_odd);
397       nir_def *rotated_i = nir_bcsel(b, nir_b2b1(b, odd_user_prim),
398                                             offset_odd_value, offset_even_value);
399       /* Here we account for how triangles are provided to the gs from a strip.
400        * For even primitives we rotate by 3, meaning we do nothing.
401        * For odd primitives we rotate by 2, combined with the previous rotation this
402        * means the second vertex becomes the last.
403        */
404       if (state->prim == ZINK_PVE_PRIMITIVE_TRISTRIP)
405         rotated_i = nir_imod(b, nir_iadd(b, rotated_i,
406                                             nir_isub(b, three,
407                                                         odd_prim)),
408                                             three);
409       /* Triangles that come from fans are provided to the gs the same way as
410        * odd triangles from a strip so always rotate by 2.
411        */
412       else if (state->prim == ZINK_PVE_PRIMITIVE_FAN)
413         rotated_i = nir_imod(b, nir_iadd_imm(b, rotated_i, 2),
414                                 three);
415       rotated_i = nir_iadd(b, rotated_i, current_vertex);
416       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
417          gl_varying_slot location = var->data.location;
418          unsigned location_frac = var->data.location_frac;
419          if (state->varyings[location][location_frac]) {
420             nir_def *index = lower_pv_mode_gs_ring_index(b, state, rotated_i);
421             nir_deref_instr *value = nir_build_deref_array(b, nir_build_deref_var(b, state->varyings[location][location_frac]), index);
422             copy_vars(b, nir_build_deref_var(b, var), value);
423          }
424       }
425       nir_emit_vertex(b);
426    }
427 }
428 
429 static bool
lower_pv_mode_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)430 lower_pv_mode_gs_emit_vertex(nir_builder *b,
431                              nir_intrinsic_instr *intrin,
432                              struct lower_pv_mode_state *state)
433 {
434    b->cursor = nir_before_instr(&intrin->instr);
435 
436    // increment pos_counter
437    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
438    nir_store_var(b, state->pos_counter, nir_iadd_imm(b, pos_counter, 1), 1);
439 
440    nir_instr_remove(&intrin->instr);
441    return true;
442 }
443 
444 static bool
lower_pv_mode_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_pv_mode_state * state)445 lower_pv_mode_gs_end_primitive(nir_builder *b,
446                                nir_intrinsic_instr *intrin,
447                                struct lower_pv_mode_state *state)
448 {
449    b->cursor = nir_before_instr(&intrin->instr);
450 
451    nir_def *pos_counter = nir_load_var(b, state->pos_counter);
452    nir_push_loop(b);
453    {
454       nir_def *out_pos_counter = nir_load_var(b, state->out_pos_counter);
455       nir_break_if(b, nir_ilt(b, nir_isub(b, pos_counter, out_pos_counter),
456                                  nir_imm_int(b, state->primitive_vert_count)));
457 
458       lower_pv_mode_emit_rotated_prim(b, state, out_pos_counter);
459       nir_end_primitive(b);
460 
461       nir_store_var(b, state->out_pos_counter, nir_iadd_imm(b, out_pos_counter, 1), 1);
462    }
463    nir_pop_loop(b, NULL);
464    /* Set the ring offset such that when position 0 is
465     * read we get the last value written
466     */
467    nir_store_var(b, state->ring_offset, pos_counter, 1);
468    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
469    nir_store_var(b, state->out_pos_counter, nir_imm_int(b, 0), 1);
470 
471    nir_instr_remove(&intrin->instr);
472    return true;
473 }
474 
475 static bool
lower_pv_mode_gs_instr(nir_builder * b,nir_instr * instr,void * data)476 lower_pv_mode_gs_instr(nir_builder *b, nir_instr *instr, void *data)
477 {
478    if (instr->type != nir_instr_type_intrinsic)
479       return false;
480 
481    struct lower_pv_mode_state *state = data;
482    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
483 
484    switch (intrin->intrinsic) {
485    case nir_intrinsic_store_deref:
486       return lower_pv_mode_gs_store(b, intrin, state);
487    case nir_intrinsic_copy_deref:
488       unreachable("should be lowered");
489    case nir_intrinsic_emit_vertex_with_counter:
490    case nir_intrinsic_emit_vertex:
491       return lower_pv_mode_gs_emit_vertex(b, intrin, state);
492    case nir_intrinsic_end_primitive:
493    case nir_intrinsic_end_primitive_with_counter:
494       return lower_pv_mode_gs_end_primitive(b, intrin, state);
495    default:
496       return false;
497    }
498 }
499 
500 static bool
lower_pv_mode_gs(nir_shader * shader,unsigned prim)501 lower_pv_mode_gs(nir_shader *shader, unsigned prim)
502 {
503    nir_builder b;
504    struct lower_pv_mode_state state;
505    memset(state.varyings, 0, sizeof(state.varyings));
506 
507    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
508    b = nir_builder_at(nir_before_impl(entry));
509 
510    state.primitive_vert_count =
511       mesa_vertices_per_prim(shader->info.gs.output_primitive);
512    state.ring_size = shader->info.gs.vertices_out;
513 
514    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
515       gl_varying_slot location = var->data.location;
516       unsigned location_frac = var->data.location_frac;
517 
518       char name[100];
519       snprintf(name, sizeof(name), "__tmp_primverts_%d_%d", location, location_frac);
520       state.varyings[location][location_frac] =
521          nir_local_variable_create(entry,
522                                    glsl_array_type(var->type,
523                                                    state.ring_size,
524                                                    false),
525                                    name);
526    }
527 
528    state.pos_counter = nir_local_variable_create(entry,
529                                                  glsl_uint_type(),
530                                                  "__pos_counter");
531 
532    state.out_pos_counter = nir_local_variable_create(entry,
533                                                      glsl_uint_type(),
534                                                      "__out_pos_counter");
535 
536    state.ring_offset = nir_local_variable_create(entry,
537                                                  glsl_uint_type(),
538                                                  "__ring_offset");
539 
540    state.prim = prim;
541 
542    // initialize pos_counter and out_pos_counter
543    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
544    nir_store_var(&b, state.out_pos_counter, nir_imm_int(&b, 0), 1);
545    nir_store_var(&b, state.ring_offset, nir_imm_int(&b, 0), 1);
546 
547    shader->info.gs.vertices_out = (shader->info.gs.vertices_out -
548                                    (state.primitive_vert_count - 1)) *
549                                   state.primitive_vert_count;
550    return nir_shader_instructions_pass(shader, lower_pv_mode_gs_instr,
551                                        nir_metadata_dominance, &state);
552 }
553 
554 struct lower_line_stipple_state {
555    nir_variable *pos_out;
556    nir_variable *stipple_out;
557    nir_variable *prev_pos;
558    nir_variable *pos_counter;
559    nir_variable *stipple_counter;
560    bool line_rectangular;
561 };
562 
563 static nir_def *
viewport_map(nir_builder * b,nir_def * vert,nir_def * scale)564 viewport_map(nir_builder *b, nir_def *vert,
565              nir_def *scale)
566 {
567    nir_def *w_recip = nir_frcp(b, nir_channel(b, vert, 3));
568    nir_def *ndc_point = nir_fmul(b, nir_trim_vector(b, vert, 2),
569                                         w_recip);
570    return nir_fmul(b, ndc_point, scale);
571 }
572 
573 static bool
lower_line_stipple_gs_instr(nir_builder * b,nir_instr * instr,void * data)574 lower_line_stipple_gs_instr(nir_builder *b, nir_instr *instr, void *data)
575 {
576    struct lower_line_stipple_state *state = data;
577    if (instr->type != nir_instr_type_intrinsic)
578       return false;
579 
580    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
581    if (intrin->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
582        intrin->intrinsic != nir_intrinsic_emit_vertex)
583       return false;
584 
585    b->cursor = nir_before_instr(instr);
586 
587    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
588    // viewport-map endpoints
589    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
590                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
591    nir_def *prev = nir_load_var(b, state->prev_pos);
592    nir_def *curr = nir_load_var(b, state->pos_out);
593    prev = viewport_map(b, prev, vp_scale);
594    curr = viewport_map(b, curr, vp_scale);
595 
596    // calculate length of line
597    nir_def *len;
598    if (state->line_rectangular)
599       len = nir_fast_distance(b, prev, curr);
600    else {
601       nir_def *diff = nir_fabs(b, nir_fsub(b, prev, curr));
602       len = nir_fmax(b, nir_channel(b, diff, 0), nir_channel(b, diff, 1));
603    }
604    // update stipple_counter
605    nir_store_var(b, state->stipple_counter,
606                     nir_fadd(b, nir_load_var(b, state->stipple_counter),
607                                 len), 1);
608    nir_pop_if(b, NULL);
609    // emit stipple out
610    nir_copy_var(b, state->stipple_out, state->stipple_counter);
611    nir_copy_var(b, state->prev_pos, state->pos_out);
612 
613    // update prev_pos and pos_counter for next vertex
614    b->cursor = nir_after_instr(instr);
615    nir_store_var(b, state->pos_counter,
616                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
617                                     1), 1);
618 
619    return true;
620 }
621 
622 static bool
lower_line_stipple_gs(nir_shader * shader,bool line_rectangular)623 lower_line_stipple_gs(nir_shader *shader, bool line_rectangular)
624 {
625    nir_builder b;
626    struct lower_line_stipple_state state;
627 
628    state.pos_out =
629       nir_find_variable_with_location(shader, nir_var_shader_out,
630                                       VARYING_SLOT_POS);
631 
632    // if position isn't written, we have nothing to do
633    if (!state.pos_out)
634       return false;
635 
636    state.stipple_out = nir_variable_create(shader, nir_var_shader_out,
637                                            glsl_float_type(),
638                                            "__stipple");
639    state.stipple_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
640    state.stipple_out->data.driver_location = shader->num_outputs++;
641    state.stipple_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
642    shader->info.outputs_written |= BITFIELD64_BIT(state.stipple_out->data.location);
643 
644    // create temp variables
645    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
646                                         glsl_vec4_type(),
647                                         "__prev_pos");
648    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
649                                            glsl_uint_type(),
650                                            "__pos_counter");
651    state.stipple_counter = nir_variable_create(shader, nir_var_shader_temp,
652                                                glsl_float_type(),
653                                                "__stipple_counter");
654 
655    state.line_rectangular = line_rectangular;
656    // initialize pos_counter and stipple_counter
657    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
658    b = nir_builder_at(nir_before_impl(entry));
659    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
660    nir_store_var(&b, state.stipple_counter, nir_imm_float(&b, 0), 1);
661 
662    return nir_shader_instructions_pass(shader, lower_line_stipple_gs_instr,
663                                        nir_metadata_dominance, &state);
664 }
665 
666 static bool
lower_line_stipple_fs(nir_shader * shader)667 lower_line_stipple_fs(nir_shader *shader)
668 {
669    nir_builder b;
670    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
671    b = nir_builder_at(nir_after_impl(entry));
672 
673    // create stipple counter
674    nir_variable *stipple = nir_variable_create(shader, nir_var_shader_in,
675                                                glsl_float_type(),
676                                                "__stipple");
677    stipple->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
678    stipple->data.driver_location = shader->num_inputs++;
679    stipple->data.location = MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
680    shader->info.inputs_read |= BITFIELD64_BIT(stipple->data.location);
681 
682    nir_variable *sample_mask_out =
683       nir_find_variable_with_location(shader, nir_var_shader_out,
684                                       FRAG_RESULT_SAMPLE_MASK);
685    if (!sample_mask_out) {
686       sample_mask_out = nir_variable_create(shader, nir_var_shader_out,
687                                         glsl_uint_type(), "sample_mask");
688       sample_mask_out->data.driver_location = shader->num_outputs++;
689       sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
690    }
691 
692    nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
693                                                       nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
694    nir_def *factor = nir_i2f32(&b, nir_ishr_imm(&b, pattern, 16));
695    pattern = nir_iand_imm(&b, pattern, 0xffff);
696 
697    nir_def *sample_mask_in = nir_load_sample_mask_in(&b);
698    nir_variable *v = nir_local_variable_create(entry, glsl_uint_type(), NULL);
699    nir_variable *sample_mask = nir_local_variable_create(entry, glsl_uint_type(), NULL);
700    nir_store_var(&b, v, sample_mask_in, 1);
701    nir_store_var(&b, sample_mask, sample_mask_in, 1);
702    nir_push_loop(&b);
703    {
704       nir_def *value = nir_load_var(&b, v);
705       nir_def *index = nir_ufind_msb(&b, value);
706       nir_def *index_mask = nir_ishl(&b, nir_imm_int(&b, 1), index);
707       nir_def *new_value = nir_ixor(&b, value, index_mask);
708       nir_store_var(&b, v, new_value,  1);
709       nir_push_if(&b, nir_ieq_imm(&b, value, 0));
710       nir_jump(&b, nir_jump_break);
711       nir_pop_if(&b, NULL);
712 
713       nir_def *stipple_pos =
714          nir_interp_deref_at_sample(&b, 1, 32,
715             &nir_build_deref_var(&b, stipple)->def, index);
716       stipple_pos = nir_fmod(&b, nir_fdiv(&b, stipple_pos, factor),
717                                  nir_imm_float(&b, 16.0));
718       stipple_pos = nir_f2i32(&b, stipple_pos);
719       nir_def *bit =
720          nir_iand_imm(&b, nir_ishr(&b, pattern, stipple_pos), 1);
721       nir_push_if(&b, nir_ieq_imm(&b, bit, 0));
722       {
723          nir_def *sample_mask_value = nir_load_var(&b, sample_mask);
724          sample_mask_value = nir_ixor(&b, sample_mask_value, index_mask);
725          nir_store_var(&b, sample_mask, sample_mask_value, 1);
726       }
727       nir_pop_if(&b, NULL);
728    }
729    nir_pop_loop(&b, NULL);
730    nir_store_var(&b, sample_mask_out, nir_load_var(&b, sample_mask), 1);
731 
732    return true;
733 }
734 
735 struct lower_line_smooth_state {
736    nir_variable *pos_out;
737    nir_variable *line_coord_out;
738    nir_variable *prev_pos;
739    nir_variable *pos_counter;
740    nir_variable *prev_varyings[VARYING_SLOT_MAX][4],
741                 *varyings[VARYING_SLOT_MAX][4]; // location_frac
742 };
743 
744 static bool
lower_line_smooth_gs_store(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)745 lower_line_smooth_gs_store(nir_builder *b,
746                            nir_intrinsic_instr *intrin,
747                            struct lower_line_smooth_state *state)
748 {
749    b->cursor = nir_before_instr(&intrin->instr);
750    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
751    if (nir_deref_mode_is(deref, nir_var_shader_out)) {
752       nir_variable *var = nir_deref_instr_get_variable(deref);
753 
754       // we take care of position elsewhere
755       gl_varying_slot location = var->data.location;
756       unsigned location_frac = var->data.location_frac;
757       if (location != VARYING_SLOT_POS) {
758          assert(state->varyings[location]);
759          nir_store_var(b, state->varyings[location][location_frac],
760                        intrin->src[1].ssa,
761                        nir_intrinsic_write_mask(intrin));
762          nir_instr_remove(&intrin->instr);
763          return true;
764       }
765    }
766 
767    return false;
768 }
769 
770 static bool
lower_line_smooth_gs_emit_vertex(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)771 lower_line_smooth_gs_emit_vertex(nir_builder *b,
772                                  nir_intrinsic_instr *intrin,
773                                  struct lower_line_smooth_state *state)
774 {
775    b->cursor = nir_before_instr(&intrin->instr);
776 
777    nir_push_if(b, nir_ine_imm(b, nir_load_var(b, state->pos_counter), 0));
778    nir_def *vp_scale = nir_load_push_constant_zink(b, 2, 32,
779                                                        nir_imm_int(b, ZINK_GFX_PUSHCONST_VIEWPORT_SCALE));
780    nir_def *prev = nir_load_var(b, state->prev_pos);
781    nir_def *curr = nir_load_var(b, state->pos_out);
782    nir_def *prev_vp = viewport_map(b, prev, vp_scale);
783    nir_def *curr_vp = viewport_map(b, curr, vp_scale);
784 
785    nir_def *width = nir_load_push_constant_zink(b, 1, 32,
786                                                     nir_imm_int(b, ZINK_GFX_PUSHCONST_LINE_WIDTH));
787    nir_def *half_width = nir_fadd_imm(b, nir_fmul_imm(b, width, 0.5), 0.5);
788 
789    const unsigned yx[2] = { 1, 0 };
790    nir_def *vec = nir_fsub(b, curr_vp, prev_vp);
791    nir_def *len = nir_fast_length(b, vec);
792    nir_def *dir = nir_normalize(b, vec);
793    nir_def *half_length = nir_fmul_imm(b, len, 0.5);
794    half_length = nir_fadd_imm(b, half_length, 0.5);
795 
796    nir_def *vp_scale_rcp = nir_frcp(b, vp_scale);
797    nir_def *tangent =
798       nir_fmul(b,
799                nir_fmul(b,
800                         nir_swizzle(b, dir, yx, 2),
801                         nir_imm_vec2(b, 1.0, -1.0)),
802                vp_scale_rcp);
803    tangent = nir_fmul(b, tangent, half_width);
804    tangent = nir_pad_vector_imm_int(b, tangent, 0, 4);
805    dir = nir_fmul_imm(b, nir_fmul(b, dir, vp_scale_rcp), 0.5);
806 
807    nir_def *line_offets[8] = {
808       nir_fadd(b, tangent, nir_fneg(b, dir)),
809       nir_fadd(b, nir_fneg(b, tangent), nir_fneg(b, dir)),
810       tangent,
811       nir_fneg(b, tangent),
812       tangent,
813       nir_fneg(b, tangent),
814       nir_fadd(b, tangent, dir),
815       nir_fadd(b, nir_fneg(b, tangent), dir),
816    };
817    nir_def *line_coord =
818       nir_vec4(b, half_width, half_width, half_length, half_length);
819    nir_def *line_coords[8] = {
820       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,  -1,  1)),
821       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,  -1,  1)),
822       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
823       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
824       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   0,  1)),
825       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   0,  1)),
826       nir_fmul(b, line_coord, nir_imm_vec4(b, -1,  1,   1,  1)),
827       nir_fmul(b, line_coord, nir_imm_vec4(b,  1,  1,   1,  1)),
828    };
829 
830    /* emit first end-cap, and start line */
831    for (int i = 0; i < 4; ++i) {
832       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
833          gl_varying_slot location = var->data.location;
834          unsigned location_frac = var->data.location_frac;
835          if (state->prev_varyings[location][location_frac])
836             nir_copy_var(b, var, state->prev_varyings[location][location_frac]);
837       }
838       nir_store_var(b, state->pos_out,
839                     nir_fadd(b, prev, nir_fmul(b, line_offets[i],
840                              nir_channel(b, prev, 3))), 0xf);
841       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
842       nir_emit_vertex(b);
843    }
844 
845    /* finish line and emit last end-cap */
846    for (int i = 4; i < 8; ++i) {
847       nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
848          gl_varying_slot location = var->data.location;
849          unsigned location_frac = var->data.location_frac;
850          if (state->varyings[location][location_frac])
851             nir_copy_var(b, var, state->varyings[location][location_frac]);
852       }
853       nir_store_var(b, state->pos_out,
854                     nir_fadd(b, curr, nir_fmul(b, line_offets[i],
855                              nir_channel(b, curr, 3))), 0xf);
856       nir_store_var(b, state->line_coord_out, line_coords[i], 0xf);
857       nir_emit_vertex(b);
858    }
859    nir_end_primitive(b);
860 
861    nir_pop_if(b, NULL);
862 
863    nir_copy_var(b, state->prev_pos, state->pos_out);
864    nir_foreach_variable_with_modes(var, b->shader, nir_var_shader_out) {
865       gl_varying_slot location = var->data.location;
866       unsigned location_frac = var->data.location_frac;
867       if (state->varyings[location][location_frac])
868          nir_copy_var(b, state->prev_varyings[location][location_frac], state->varyings[location][location_frac]);
869    }
870 
871    // update prev_pos and pos_counter for next vertex
872    b->cursor = nir_after_instr(&intrin->instr);
873    nir_store_var(b, state->pos_counter,
874                     nir_iadd_imm(b, nir_load_var(b, state->pos_counter),
875                                     1), 1);
876 
877    nir_instr_remove(&intrin->instr);
878    return true;
879 }
880 
881 static bool
lower_line_smooth_gs_end_primitive(nir_builder * b,nir_intrinsic_instr * intrin,struct lower_line_smooth_state * state)882 lower_line_smooth_gs_end_primitive(nir_builder *b,
883                                    nir_intrinsic_instr *intrin,
884                                    struct lower_line_smooth_state *state)
885 {
886    b->cursor = nir_before_instr(&intrin->instr);
887 
888    // reset line counter
889    nir_store_var(b, state->pos_counter, nir_imm_int(b, 0), 1);
890 
891    nir_instr_remove(&intrin->instr);
892    return true;
893 }
894 
895 static bool
lower_line_smooth_gs_instr(nir_builder * b,nir_instr * instr,void * data)896 lower_line_smooth_gs_instr(nir_builder *b, nir_instr *instr, void *data)
897 {
898    if (instr->type != nir_instr_type_intrinsic)
899       return false;
900 
901    struct lower_line_smooth_state *state = data;
902    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
903 
904    switch (intrin->intrinsic) {
905    case nir_intrinsic_store_deref:
906       return lower_line_smooth_gs_store(b, intrin, state);
907    case nir_intrinsic_copy_deref:
908       unreachable("should be lowered");
909    case nir_intrinsic_emit_vertex_with_counter:
910    case nir_intrinsic_emit_vertex:
911       return lower_line_smooth_gs_emit_vertex(b, intrin, state);
912    case nir_intrinsic_end_primitive:
913    case nir_intrinsic_end_primitive_with_counter:
914       return lower_line_smooth_gs_end_primitive(b, intrin, state);
915    default:
916       return false;
917    }
918 }
919 
920 static bool
lower_line_smooth_gs(nir_shader * shader)921 lower_line_smooth_gs(nir_shader *shader)
922 {
923    nir_builder b;
924    struct lower_line_smooth_state state;
925 
926    memset(state.varyings, 0, sizeof(state.varyings));
927    memset(state.prev_varyings, 0, sizeof(state.prev_varyings));
928    nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {
929       gl_varying_slot location = var->data.location;
930       unsigned location_frac = var->data.location_frac;
931       if (location == VARYING_SLOT_POS)
932          continue;
933 
934       char name[100];
935       snprintf(name, sizeof(name), "__tmp_%d_%d", location, location_frac);
936       state.varyings[location][location_frac] =
937          nir_variable_create(shader, nir_var_shader_temp,
938                               var->type, name);
939 
940       snprintf(name, sizeof(name), "__tmp_prev_%d_%d", location, location_frac);
941       state.prev_varyings[location][location_frac] =
942          nir_variable_create(shader, nir_var_shader_temp,
943                               var->type, name);
944    }
945 
946    state.pos_out =
947       nir_find_variable_with_location(shader, nir_var_shader_out,
948                                       VARYING_SLOT_POS);
949 
950    // if position isn't written, we have nothing to do
951    if (!state.pos_out)
952       return false;
953 
954    unsigned location = 0;
955    nir_foreach_shader_in_variable(var, shader) {
956      if (var->data.driver_location >= location)
957          location = var->data.driver_location + 1;
958    }
959 
960    state.line_coord_out =
961       nir_variable_create(shader, nir_var_shader_out, glsl_vec4_type(),
962                           "__line_coord");
963    state.line_coord_out->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
964    state.line_coord_out->data.driver_location = location;
965    state.line_coord_out->data.location = MAX2(util_last_bit64(shader->info.outputs_written), VARYING_SLOT_VAR0);
966    shader->info.outputs_written |= BITFIELD64_BIT(state.line_coord_out->data.location);
967    shader->num_outputs++;
968 
969    // create temp variables
970    state.prev_pos = nir_variable_create(shader, nir_var_shader_temp,
971                                         glsl_vec4_type(),
972                                         "__prev_pos");
973    state.pos_counter = nir_variable_create(shader, nir_var_shader_temp,
974                                            glsl_uint_type(),
975                                            "__pos_counter");
976 
977    // initialize pos_counter
978    nir_function_impl *entry = nir_shader_get_entrypoint(shader);
979    b = nir_builder_at(nir_before_impl(entry));
980    nir_store_var(&b, state.pos_counter, nir_imm_int(&b, 0), 1);
981 
982    shader->info.gs.vertices_out = 8 * shader->info.gs.vertices_out;
983    shader->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
984 
985    return nir_shader_instructions_pass(shader, lower_line_smooth_gs_instr,
986                                        nir_metadata_dominance, &state);
987 }
988 
989 static bool
lower_line_smooth_fs(nir_shader * shader,bool lower_stipple)990 lower_line_smooth_fs(nir_shader *shader, bool lower_stipple)
991 {
992    int dummy;
993    nir_builder b;
994 
995    nir_variable *stipple_counter = NULL, *stipple_pattern = NULL;
996    if (lower_stipple) {
997       stipple_counter = nir_variable_create(shader, nir_var_shader_in,
998                                             glsl_float_type(),
999                                             "__stipple");
1000       stipple_counter->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
1001       stipple_counter->data.driver_location = shader->num_inputs++;
1002       stipple_counter->data.location =
1003          MAX2(util_last_bit64(shader->info.inputs_read), VARYING_SLOT_VAR0);
1004       shader->info.inputs_read |= BITFIELD64_BIT(stipple_counter->data.location);
1005 
1006       stipple_pattern = nir_variable_create(shader, nir_var_shader_temp,
1007                                             glsl_uint_type(),
1008                                             "stipple_pattern");
1009 
1010       // initialize stipple_pattern
1011       nir_function_impl *entry = nir_shader_get_entrypoint(shader);
1012       b = nir_builder_at(nir_before_impl(entry));
1013       nir_def *pattern = nir_load_push_constant_zink(&b, 1, 32,
1014                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_LINE_STIPPLE_PATTERN));
1015       nir_store_var(&b, stipple_pattern, pattern, 1);
1016    }
1017 
1018    nir_lower_aaline_fs(shader, &dummy, stipple_counter, stipple_pattern);
1019    return true;
1020 }
1021 
1022 static bool
lower_dual_blend(nir_shader * shader)1023 lower_dual_blend(nir_shader *shader)
1024 {
1025    bool progress = false;
1026    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
1027    if (var) {
1028       var->data.location = FRAG_RESULT_DATA0;
1029       var->data.index = 1;
1030       progress = true;
1031    }
1032    nir_shader_preserve_all_metadata(shader);
1033    return progress;
1034 }
1035 
1036 static bool
lower_64bit_pack_instr(nir_builder * b,nir_instr * instr,void * data)1037 lower_64bit_pack_instr(nir_builder *b, nir_instr *instr, void *data)
1038 {
1039    if (instr->type != nir_instr_type_alu)
1040       return false;
1041    nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
1042    if (alu_instr->op != nir_op_pack_64_2x32 &&
1043        alu_instr->op != nir_op_unpack_64_2x32)
1044       return false;
1045    b->cursor = nir_before_instr(&alu_instr->instr);
1046    nir_def *src = nir_ssa_for_alu_src(b, alu_instr, 0);
1047    nir_def *dest;
1048    switch (alu_instr->op) {
1049    case nir_op_pack_64_2x32:
1050       dest = nir_pack_64_2x32_split(b, nir_channel(b, src, 0), nir_channel(b, src, 1));
1051       break;
1052    case nir_op_unpack_64_2x32:
1053       dest = nir_vec2(b, nir_unpack_64_2x32_split_x(b, src), nir_unpack_64_2x32_split_y(b, src));
1054       break;
1055    default:
1056       unreachable("Impossible opcode");
1057    }
1058    nir_def_replace(&alu_instr->def, dest);
1059    return true;
1060 }
1061 
1062 static bool
lower_64bit_pack(nir_shader * shader)1063 lower_64bit_pack(nir_shader *shader)
1064 {
1065    return nir_shader_instructions_pass(shader, lower_64bit_pack_instr,
1066                                        nir_metadata_control_flow, NULL);
1067 }
1068 
1069 nir_shader *
zink_create_quads_emulation_gs(const nir_shader_compiler_options * options,const nir_shader * prev_stage)1070 zink_create_quads_emulation_gs(const nir_shader_compiler_options *options,
1071                                const nir_shader *prev_stage)
1072 {
1073    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY,
1074                                                   options,
1075                                                   "filled quad gs");
1076 
1077    nir_shader *nir = b.shader;
1078    nir->info.gs.input_primitive = MESA_PRIM_LINES_ADJACENCY;
1079    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
1080    nir->info.gs.vertices_in = 4;
1081    nir->info.gs.vertices_out = 6;
1082    nir->info.gs.invocations = 1;
1083    nir->info.gs.active_stream_mask = 1;
1084 
1085    nir->info.has_transform_feedback_varyings = prev_stage->info.has_transform_feedback_varyings;
1086    memcpy(nir->info.xfb_stride, prev_stage->info.xfb_stride, sizeof(prev_stage->info.xfb_stride));
1087    if (prev_stage->xfb_info) {
1088       size_t size = nir_xfb_info_size(prev_stage->xfb_info->output_count);
1089       nir->xfb_info = ralloc_memdup(nir, prev_stage->xfb_info, size);
1090    }
1091 
1092    nir_variable *in_vars[VARYING_SLOT_MAX];
1093    nir_variable *out_vars[VARYING_SLOT_MAX];
1094    unsigned num_vars = 0;
1095 
1096    /* Create input/output variables. */
1097    nir_foreach_shader_out_variable(var, prev_stage) {
1098       assert(!var->data.patch);
1099 
1100       /* input vars can't be created for those */
1101       if (var->data.location == VARYING_SLOT_LAYER ||
1102           var->data.location == VARYING_SLOT_VIEW_INDEX ||
1103           /* psiz not needed for quads */
1104           var->data.location == VARYING_SLOT_PSIZ)
1105          continue;
1106 
1107       char name[100];
1108       if (var->name)
1109          snprintf(name, sizeof(name), "in_%s", var->name);
1110       else
1111          snprintf(name, sizeof(name), "in_%d", var->data.driver_location);
1112 
1113       nir_variable *in = nir_variable_clone(var, nir);
1114       ralloc_free(in->name);
1115       in->name = ralloc_strdup(in, name);
1116       in->type = glsl_array_type(var->type, 4, false);
1117       in->data.mode = nir_var_shader_in;
1118       nir_shader_add_variable(nir, in);
1119 
1120       if (var->name)
1121          snprintf(name, sizeof(name), "out_%s", var->name);
1122       else
1123          snprintf(name, sizeof(name), "out_%d", var->data.driver_location);
1124 
1125       nir_variable *out = nir_variable_clone(var, nir);
1126       ralloc_free(out->name);
1127       out->name = ralloc_strdup(out, name);
1128       out->data.mode = nir_var_shader_out;
1129       nir_shader_add_variable(nir, out);
1130 
1131       in_vars[num_vars] = in;
1132       out_vars[num_vars++] = out;
1133    }
1134 
1135    int mapping_first[] = {0, 1, 2, 0, 2, 3};
1136    int mapping_last[] = {0, 1, 3, 1, 2, 3};
1137    nir_def *last_pv_vert_def = nir_load_provoking_last(&b);
1138    last_pv_vert_def = nir_ine_imm(&b, last_pv_vert_def, 0);
1139    for (unsigned i = 0; i < 6; ++i) {
1140       /* swap indices 2 and 3 */
1141       nir_def *idx = nir_bcsel(&b, last_pv_vert_def,
1142                                    nir_imm_int(&b, mapping_last[i]),
1143                                    nir_imm_int(&b, mapping_first[i]));
1144       /* Copy inputs to outputs. */
1145       for (unsigned j = 0; j < num_vars; ++j) {
1146          if (in_vars[j]->data.location == VARYING_SLOT_EDGE) {
1147             continue;
1148          }
1149          nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in_vars[j]), idx);
1150          copy_vars(&b, nir_build_deref_var(&b, out_vars[j]), in_value);
1151       }
1152       nir_emit_vertex(&b, 0);
1153       if (i == 2)
1154         nir_end_primitive(&b, 0);
1155    }
1156 
1157    nir_end_primitive(&b, 0);
1158    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1159    nir_validate_shader(nir, "in zink_create_quads_emulation_gs");
1160    return nir;
1161 }
1162 
1163 static bool
lower_system_values_to_inlined_uniforms_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * data)1164 lower_system_values_to_inlined_uniforms_instr(nir_builder *b,
1165                                               nir_intrinsic_instr *intrin,
1166                                               void *data)
1167 {
1168    int inlined_uniform_offset;
1169    switch (intrin->intrinsic) {
1170    case nir_intrinsic_load_flat_mask:
1171       inlined_uniform_offset = ZINK_INLINE_VAL_FLAT_MASK * sizeof(uint32_t);
1172       break;
1173    case nir_intrinsic_load_provoking_last:
1174       inlined_uniform_offset = ZINK_INLINE_VAL_PV_LAST_VERT * sizeof(uint32_t);
1175       break;
1176    default:
1177       return false;
1178    }
1179 
1180    b->cursor = nir_before_instr(&intrin->instr);
1181    assert(intrin->def.bit_size == 32 || intrin->def.bit_size == 64);
1182    /* nir_inline_uniforms can't handle bit_size != 32 (it will silently ignore
1183     * anything with a different bit_size) so we need to split the load. */
1184    int num_dwords = intrin->def.bit_size / 32;
1185    nir_def *dwords[2] = {NULL};
1186    for (unsigned i = 0; i < num_dwords; i++)
1187       dwords[i] = nir_load_ubo(b, 1, 32, nir_imm_int(b, 0),
1188                                    nir_imm_int(b, inlined_uniform_offset + i),
1189                                    .align_mul = intrin->def.bit_size / 8,
1190                                    .align_offset = 0,
1191                                    .range_base = 0, .range = ~0);
1192    nir_def *new_dest_def;
1193    if (intrin->def.bit_size == 32)
1194       new_dest_def = dwords[0];
1195    else
1196       new_dest_def = nir_pack_64_2x32_split(b, dwords[0], dwords[1]);
1197    nir_def_replace(&intrin->def, new_dest_def);
1198    return true;
1199 }
1200 
1201 bool
zink_lower_system_values_to_inlined_uniforms(nir_shader * nir)1202 zink_lower_system_values_to_inlined_uniforms(nir_shader *nir)
1203 {
1204    return nir_shader_intrinsics_pass(nir,
1205                                        lower_system_values_to_inlined_uniforms_instr,
1206                                        nir_metadata_dominance, NULL);
1207 }
1208 
1209 /* from radeonsi */
1210 static unsigned
amd_varying_expression_max_cost(nir_shader * producer,nir_shader * consumer)1211 amd_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
1212 {
1213    /* TODO: maybe implement shader profiles to disable, cf. 39804ebf1766d38004259085e1fec4ed8db86f1c */
1214 
1215    switch (consumer->info.stage) {
1216    case MESA_SHADER_TESS_CTRL: /* VS->TCS */
1217       /* Non-amplifying shaders can always have their variyng expressions
1218        * moved into later shaders.
1219        */
1220       return UINT_MAX;
1221 
1222    case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */
1223       return consumer->info.gs.vertices_in == 1 ? UINT_MAX :
1224              consumer->info.gs.vertices_in == 2 ? 20 : 14;
1225 
1226    case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */
1227    case MESA_SHADER_FRAGMENT:
1228       /* Up to 3 uniforms and 5 ALUs. */
1229       return 14;
1230 
1231    default:
1232       unreachable("unexpected shader stage");
1233    }
1234 }
1235 
1236 /* from radeonsi */
1237 static unsigned
amd_varying_estimate_instr_cost(nir_instr * instr)1238 amd_varying_estimate_instr_cost(nir_instr *instr)
1239 {
1240    unsigned dst_bit_size, src_bit_size, num_dst_dwords;
1241    nir_op alu_op;
1242 
1243    /* This is a very loose approximation based on gfx10. */
1244    switch (instr->type) {
1245    case nir_instr_type_alu:
1246       dst_bit_size = nir_instr_as_alu(instr)->def.bit_size;
1247       src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size;
1248       alu_op = nir_instr_as_alu(instr)->op;
1249       num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
1250 
1251       switch (alu_op) {
1252       case nir_op_mov:
1253       case nir_op_vec2:
1254       case nir_op_vec3:
1255       case nir_op_vec4:
1256       case nir_op_vec5:
1257       case nir_op_vec8:
1258       case nir_op_vec16:
1259       case nir_op_fabs:
1260       case nir_op_fneg:
1261       case nir_op_fsat:
1262          return 0;
1263 
1264       case nir_op_imul:
1265       case nir_op_umul_low:
1266          return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords;
1267 
1268       case nir_op_imul_high:
1269       case nir_op_umul_high:
1270       case nir_op_imul_2x32_64:
1271       case nir_op_umul_2x32_64:
1272          return 4;
1273 
1274       case nir_op_fexp2:
1275       case nir_op_flog2:
1276       case nir_op_frcp:
1277       case nir_op_frsq:
1278       case nir_op_fsqrt:
1279       case nir_op_fsin:
1280       case nir_op_fcos:
1281       case nir_op_fsin_amd:
1282       case nir_op_fcos_amd:
1283          return 4; /* FP16 & FP32. */
1284 
1285       case nir_op_fpow:
1286          return 4 + 1 + 4; /* log2 + mul + exp2 */
1287 
1288       case nir_op_fsign:
1289          return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */
1290 
1291       case nir_op_idiv:
1292       case nir_op_udiv:
1293       case nir_op_imod:
1294       case nir_op_umod:
1295       case nir_op_irem:
1296          return dst_bit_size == 64 ? 80 : 40;
1297 
1298       case nir_op_fdiv:
1299          return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */
1300 
1301       case nir_op_fmod:
1302       case nir_op_frem:
1303          return dst_bit_size == 64 ? 80 : 8;
1304 
1305       default:
1306          /* Double opcodes. Comparisons have always full performance. */
1307          if ((dst_bit_size == 64 &&
1308               nir_op_infos[alu_op].output_type & nir_type_float) ||
1309              (dst_bit_size >= 8 && src_bit_size == 64 &&
1310               nir_op_infos[alu_op].input_types[0] & nir_type_float))
1311             return 16;
1312 
1313          return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32);
1314       }
1315 
1316    case nir_instr_type_intrinsic:
1317       dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size;
1318       num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32);
1319 
1320       switch (nir_instr_as_intrinsic(instr)->intrinsic) {
1321       case nir_intrinsic_load_deref:
1322          /* Uniform or UBO load.
1323           * Set a low cost to balance the number of scalar loads and ALUs.
1324           */
1325          return 3 * num_dst_dwords;
1326 
1327       default:
1328          unreachable("unexpected intrinsic");
1329       }
1330 
1331    default:
1332       unreachable("unexpected instr type");
1333    }
1334 }
1335 
1336 void
zink_screen_init_compiler(struct zink_screen * screen)1337 zink_screen_init_compiler(struct zink_screen *screen)
1338 {
1339    static const struct nir_shader_compiler_options
1340    default_options = {
1341       .io_options = nir_io_glsl_lower_derefs,
1342       .lower_ffma16 = true,
1343       .lower_ffma32 = true,
1344       .lower_ffma64 = true,
1345       .lower_scmp = true,
1346       .lower_fdph = true,
1347       .lower_flrp32 = true,
1348       .lower_fsat = true,
1349       .lower_hadd = true,
1350       .lower_iadd_sat = true,
1351       .lower_fisnormal = true,
1352       .lower_extract_byte = true,
1353       .lower_extract_word = true,
1354       .lower_insert_byte = true,
1355       .lower_insert_word = true,
1356 
1357       /* We can only support 32-bit ldexp, but NIR doesn't have a flag
1358        * distinguishing 64-bit ldexp support (radeonsi *does* support 64-bit
1359        * ldexp, so we don't just always lower it in NIR).  Given that ldexp is
1360        * effectively unused (no instances in shader-db), it's not worth the
1361        * effort to do so.
1362        * */
1363       .lower_ldexp = true,
1364 
1365       .lower_mul_high = true,
1366       .lower_to_scalar = true,
1367       .lower_uadd_carry = true,
1368       .compact_arrays = true,
1369       .lower_usub_borrow = true,
1370       .lower_uadd_sat = true,
1371       .lower_usub_sat = true,
1372       .lower_vector_cmp = true,
1373       .lower_int64_options =
1374          nir_lower_bit_count64 |
1375          nir_lower_find_lsb64 |
1376          nir_lower_ufind_msb64,
1377       .lower_doubles_options = nir_lower_dround_even,
1378       .lower_uniforms_to_ubo = true,
1379       .has_fsub = true,
1380       .has_isub = true,
1381       .lower_mul_2x32_64 = true,
1382       .support_16bit_alu = true, /* not quite what it sounds like */
1383       .support_indirect_inputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
1384       .support_indirect_outputs = BITFIELD_MASK(MESA_SHADER_COMPUTE),
1385       .max_unroll_iterations = 0,
1386       .use_interpolated_input_intrinsics = true,
1387       .has_ddx_intrinsics = true,
1388    };
1389 
1390    screen->nir_options = default_options;
1391 
1392    if (!screen->info.feats.features.shaderInt64)
1393       screen->nir_options.lower_int64_options = ~0;
1394 
1395    if (!screen->info.feats.features.shaderFloat64) {
1396       screen->nir_options.lower_doubles_options = ~0;
1397       screen->nir_options.lower_flrp64 = true;
1398       screen->nir_options.lower_ffma64 = true;
1399       /* soft fp64 function inlining will blow up loop bodies and effectively
1400        * stop Vulkan drivers from unrolling the loops.
1401        */
1402       screen->nir_options.max_unroll_iterations_fp64 = 32;
1403    }
1404 
1405    if (screen->driver_compiler_workarounds.io_opt) {
1406       screen->nir_options.io_options |= nir_io_glsl_opt_varyings;
1407 
1408       switch (zink_driverid(screen)) {
1409       case VK_DRIVER_ID_MESA_RADV:
1410       case VK_DRIVER_ID_AMD_OPEN_SOURCE:
1411       case VK_DRIVER_ID_AMD_PROPRIETARY:
1412          screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1413          screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
1414          break;
1415       default:
1416          mesa_logw("zink: instruction costs not implemented for this implementation!");
1417          screen->nir_options.varying_expression_max_cost = amd_varying_expression_max_cost;
1418          screen->nir_options.varying_estimate_instr_cost = amd_varying_estimate_instr_cost;
1419       }
1420    }
1421 
1422    /*
1423        The OpFRem and OpFMod instructions use cheap approximations of remainder,
1424        and the error can be large due to the discontinuity in trunc() and floor().
1425        This can produce mathematically unexpected results in some cases, such as
1426        FMod(x,x) computing x rather than 0, and can also cause the result to have
1427        a different sign than the infinitely precise result.
1428 
1429        -Table 84. Precision of core SPIR-V Instructions
1430        * for drivers that are known to have imprecise fmod for doubles, lower dmod
1431     */
1432    if (zink_driverid(screen) == VK_DRIVER_ID_MESA_RADV ||
1433        zink_driverid(screen) == VK_DRIVER_ID_AMD_OPEN_SOURCE ||
1434        zink_driverid(screen) == VK_DRIVER_ID_AMD_PROPRIETARY)
1435       screen->nir_options.lower_doubles_options = nir_lower_dmod;
1436 
1437    if (screen->info.have_EXT_shader_demote_to_helper_invocation)
1438       screen->nir_options.discard_is_demote = true;
1439 }
1440 
1441 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,gl_shader_stage shader)1442 zink_get_compiler_options(struct pipe_screen *pscreen,
1443                           enum pipe_shader_ir ir,
1444                           gl_shader_stage shader)
1445 {
1446    assert(ir == PIPE_SHADER_IR_NIR);
1447    return &zink_screen(pscreen)->nir_options;
1448 }
1449 
1450 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)1451 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
1452 {
1453    if (zink_debug & ZINK_DEBUG_TGSI) {
1454       fprintf(stderr, "TGSI shader:\n---8<---\n");
1455       tgsi_dump_to_file(tokens, 0, stderr);
1456       fprintf(stderr, "---8<---\n\n");
1457    }
1458 
1459    return tgsi_to_nir(tokens, screen, false);
1460 }
1461 
1462 
1463 static bool
def_is_64bit(nir_def * def,void * state)1464 def_is_64bit(nir_def *def, void *state)
1465 {
1466    bool *lower = (bool *)state;
1467    if (def && (def->bit_size == 64)) {
1468       *lower = true;
1469       return false;
1470    }
1471    return true;
1472 }
1473 
1474 static bool
src_is_64bit(nir_src * src,void * state)1475 src_is_64bit(nir_src *src, void *state)
1476 {
1477    bool *lower = (bool *)state;
1478    if (src && (nir_src_bit_size(*src) == 64)) {
1479       *lower = true;
1480       return false;
1481    }
1482    return true;
1483 }
1484 
1485 static bool
filter_64_bit_instr(const nir_instr * const_instr,UNUSED const void * data)1486 filter_64_bit_instr(const nir_instr *const_instr, UNUSED const void *data)
1487 {
1488    bool lower = false;
1489    /* lower_alu_to_scalar required nir_instr to be const, but nir_foreach_*
1490     * doesn't have const variants, so do the ugly const_cast here. */
1491    nir_instr *instr = (nir_instr *)const_instr;
1492 
1493    nir_foreach_def(instr, def_is_64bit, &lower);
1494    if (lower)
1495       return true;
1496    nir_foreach_src(instr, src_is_64bit, &lower);
1497    return lower;
1498 }
1499 
1500 static bool
filter_pack_instr(const nir_instr * const_instr,UNUSED const void * data)1501 filter_pack_instr(const nir_instr *const_instr, UNUSED const void *data)
1502 {
1503    nir_instr *instr = (nir_instr *)const_instr;
1504    nir_alu_instr *alu = nir_instr_as_alu(instr);
1505    switch (alu->op) {
1506    case nir_op_pack_64_2x32_split:
1507    case nir_op_pack_32_2x16_split:
1508    case nir_op_unpack_32_2x16_split_x:
1509    case nir_op_unpack_32_2x16_split_y:
1510    case nir_op_unpack_64_2x32_split_x:
1511    case nir_op_unpack_64_2x32_split_y:
1512       return true;
1513    default:
1514       break;
1515    }
1516    return false;
1517 }
1518 
1519 
1520 struct bo_vars {
1521    nir_variable *uniforms[5];
1522    nir_variable *ubo[5];
1523    nir_variable *ssbo[5];
1524    uint32_t first_ubo;
1525    uint32_t first_ssbo;
1526 };
1527 
1528 static struct bo_vars
get_bo_vars(struct zink_shader * zs,nir_shader * shader)1529 get_bo_vars(struct zink_shader *zs, nir_shader *shader)
1530 {
1531    struct bo_vars bo;
1532    memset(&bo, 0, sizeof(bo));
1533    if (zs->ubos_used)
1534       bo.first_ubo = ffs(zs->ubos_used & ~BITFIELD_BIT(0)) - 2;
1535    assert(bo.first_ssbo < PIPE_MAX_CONSTANT_BUFFERS);
1536    if (zs->ssbos_used)
1537       bo.first_ssbo = ffs(zs->ssbos_used) - 1;
1538    assert(bo.first_ssbo < PIPE_MAX_SHADER_BUFFERS);
1539    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
1540       unsigned idx = glsl_get_explicit_stride(glsl_get_struct_field(glsl_without_array(var->type), 0)) >> 1;
1541       if (var->data.mode == nir_var_mem_ssbo) {
1542          assert(!bo.ssbo[idx]);
1543          bo.ssbo[idx] = var;
1544       } else {
1545          if (var->data.driver_location) {
1546             assert(!bo.ubo[idx]);
1547             bo.ubo[idx] = var;
1548          } else {
1549             assert(!bo.uniforms[idx]);
1550             bo.uniforms[idx] = var;
1551          }
1552       }
1553    }
1554    return bo;
1555 }
1556 
1557 static bool
bound_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)1558 bound_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
1559 {
1560    struct bo_vars *bo = data;
1561    if (instr->type != nir_instr_type_intrinsic)
1562       return false;
1563    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1564    nir_variable *var = NULL;
1565    nir_def *offset = NULL;
1566    bool is_load = true;
1567    b->cursor = nir_before_instr(instr);
1568 
1569    switch (intr->intrinsic) {
1570    case nir_intrinsic_store_ssbo:
1571       var = bo->ssbo[intr->def.bit_size >> 4];
1572       offset = intr->src[2].ssa;
1573       is_load = false;
1574       break;
1575    case nir_intrinsic_load_ssbo:
1576       var = bo->ssbo[intr->def.bit_size >> 4];
1577       offset = intr->src[1].ssa;
1578       break;
1579    case nir_intrinsic_load_ubo:
1580       if (nir_src_is_const(intr->src[0]) && nir_src_as_const_value(intr->src[0])->u32 == 0)
1581          var = bo->uniforms[intr->def.bit_size >> 4];
1582       else
1583          var = bo->ubo[intr->def.bit_size >> 4];
1584       offset = intr->src[1].ssa;
1585       break;
1586    default:
1587       return false;
1588    }
1589    nir_src offset_src = nir_src_for_ssa(offset);
1590    if (!nir_src_is_const(offset_src))
1591       return false;
1592 
1593    unsigned offset_bytes = nir_src_as_const_value(offset_src)->u32;
1594    const struct glsl_type *strct_type = glsl_get_array_element(var->type);
1595    unsigned size = glsl_array_size(glsl_get_struct_field(strct_type, 0));
1596    bool has_unsized = glsl_array_size(glsl_get_struct_field(strct_type, glsl_get_length(strct_type) - 1)) == 0;
1597    if (has_unsized || offset_bytes + intr->num_components - 1 < size)
1598       return false;
1599 
1600    unsigned rewrites = 0;
1601    nir_def *result[2];
1602    for (unsigned i = 0; i < intr->num_components; i++) {
1603       if (offset_bytes + i >= size) {
1604          rewrites++;
1605          if (is_load)
1606             result[i] = nir_imm_zero(b, 1, intr->def.bit_size);
1607       }
1608    }
1609    assert(rewrites == intr->num_components);
1610    if (is_load) {
1611       nir_def *load = nir_vec(b, result, intr->num_components);
1612       nir_def_rewrite_uses(&intr->def, load);
1613    }
1614    nir_instr_remove(instr);
1615    return true;
1616 }
1617 
1618 static bool
bound_bo_access(nir_shader * shader,struct zink_shader * zs)1619 bound_bo_access(nir_shader *shader, struct zink_shader *zs)
1620 {
1621    struct bo_vars bo = get_bo_vars(zs, shader);
1622    return nir_shader_instructions_pass(shader, bound_bo_access_instr, nir_metadata_dominance, &bo);
1623 }
1624 
1625 static void
optimize_nir(struct nir_shader * s,struct zink_shader * zs,bool can_shrink)1626 optimize_nir(struct nir_shader *s, struct zink_shader *zs, bool can_shrink)
1627 {
1628    bool progress;
1629    do {
1630       progress = false;
1631       if (s->options->lower_int64_options)
1632          NIR_PASS_V(s, nir_lower_int64);
1633       if (s->options->lower_doubles_options & nir_lower_fp64_full_software)
1634          NIR_PASS_V(s, lower_64bit_pack);
1635       NIR_PASS_V(s, nir_lower_vars_to_ssa);
1636       NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_pack_instr, NULL);
1637       NIR_PASS(progress, s, nir_opt_copy_prop_vars);
1638       NIR_PASS(progress, s, nir_copy_prop);
1639       NIR_PASS(progress, s, nir_opt_remove_phis);
1640       if (s->options->lower_int64_options) {
1641          NIR_PASS(progress, s, nir_lower_64bit_phis);
1642          NIR_PASS(progress, s, nir_lower_alu_to_scalar, filter_64_bit_instr, NULL);
1643       }
1644       NIR_PASS(progress, s, nir_opt_dce);
1645       NIR_PASS(progress, s, nir_opt_dead_cf);
1646       NIR_PASS(progress, s, nir_lower_phis_to_scalar, false);
1647       NIR_PASS(progress, s, nir_opt_cse);
1648       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
1649       NIR_PASS(progress, s, nir_opt_algebraic);
1650       NIR_PASS(progress, s, nir_opt_constant_folding);
1651       NIR_PASS(progress, s, nir_opt_undef);
1652       NIR_PASS(progress, s, zink_nir_lower_b2b);
1653       if (zs)
1654          NIR_PASS(progress, s, bound_bo_access, zs);
1655       if (can_shrink)
1656          NIR_PASS(progress, s, nir_opt_shrink_vectors, false);
1657    } while (progress);
1658 
1659    do {
1660       progress = false;
1661       NIR_PASS(progress, s, nir_opt_algebraic_late);
1662       if (progress) {
1663          NIR_PASS_V(s, nir_copy_prop);
1664          NIR_PASS_V(s, nir_opt_dce);
1665          NIR_PASS_V(s, nir_opt_cse);
1666       }
1667    } while (progress);
1668 }
1669 
1670 /* - copy the lowered fbfetch variable
1671  * - set the new one up as an input attachment for descriptor 0.6
1672  * - load it as an image
1673  * - overwrite the previous load
1674  */
1675 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)1676 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
1677 {
1678    bool ms = data != NULL;
1679    if (instr->type != nir_instr_type_intrinsic)
1680       return false;
1681    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1682    if (intr->intrinsic != nir_intrinsic_load_deref)
1683       return false;
1684    nir_variable *var = nir_intrinsic_get_var(intr, 0);
1685    if (!var->data.fb_fetch_output)
1686       return false;
1687    b->cursor = nir_after_instr(instr);
1688    nir_variable *fbfetch = nir_variable_clone(var, b->shader);
1689    /* If Dim is SubpassData, ... Image Format must be Unknown
1690     * - SPIRV OpTypeImage specification
1691     */
1692    fbfetch->data.image.format = 0;
1693    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
1694    fbfetch->data.mode = nir_var_uniform;
1695    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1696    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
1697    fbfetch->data.sample = ms;
1698    enum glsl_sampler_dim dim = ms ? GLSL_SAMPLER_DIM_SUBPASS_MS : GLSL_SAMPLER_DIM_SUBPASS;
1699    fbfetch->type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1700    nir_shader_add_variable(b->shader, fbfetch);
1701    nir_def *deref = &nir_build_deref_var(b, fbfetch)->def;
1702    nir_def *sample = ms ? nir_load_sample_id(b) : nir_undef(b, 1, 32);
1703    nir_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), sample, nir_imm_int(b, 0));
1704    nir_def_rewrite_uses(&intr->def, load);
1705    return true;
1706 }
1707 
1708 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch,bool ms)1709 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch, bool ms)
1710 {
1711    nir_foreach_shader_out_variable(var, shader) {
1712       if (var->data.fb_fetch_output) {
1713          *fbfetch = var;
1714          break;
1715       }
1716    }
1717    assert(*fbfetch);
1718    if (!*fbfetch)
1719       return false;
1720    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, (void*)ms);
1721 }
1722 
1723 /*
1724  * Add a check for out of bounds LOD for every texel fetch op
1725  * It boils down to:
1726  * - if (lod < query_levels(tex))
1727  * -    res = txf(tex)
1728  * - else
1729  * -    res = (0, 0, 0, 1)
1730  */
1731 static bool
lower_txf_lod_robustness_instr(nir_builder * b,nir_instr * in,void * data)1732 lower_txf_lod_robustness_instr(nir_builder *b, nir_instr *in, void *data)
1733 {
1734    if (in->type != nir_instr_type_tex)
1735       return false;
1736    nir_tex_instr *txf = nir_instr_as_tex(in);
1737    if (txf->op != nir_texop_txf)
1738       return false;
1739 
1740    b->cursor = nir_before_instr(in);
1741    int lod_idx = nir_tex_instr_src_index(txf, nir_tex_src_lod);
1742    assert(lod_idx >= 0);
1743    nir_src lod_src = txf->src[lod_idx].src;
1744    if (nir_src_is_const(lod_src) && nir_src_as_const_value(lod_src)->u32 == 0)
1745       return false;
1746 
1747    nir_def *lod = lod_src.ssa;
1748 
1749    int offset_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_offset);
1750    int handle_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_handle);
1751    int deref_idx = nir_tex_instr_src_index(txf, nir_tex_src_texture_deref);
1752    nir_tex_instr *levels = nir_tex_instr_create(b->shader,
1753                                                 1 + !!(offset_idx >= 0) + !!(handle_idx >= 0));
1754    unsigned src_idx = 0;
1755    levels->op = nir_texop_query_levels;
1756    levels->dest_type = nir_type_int | lod->bit_size;
1757    if (deref_idx >= 0) {
1758       levels->src[src_idx].src_type = nir_tex_src_texture_deref;
1759       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[deref_idx].src.ssa);
1760    }
1761    if (offset_idx >= 0) {
1762       levels->src[src_idx].src_type = nir_tex_src_texture_offset;
1763       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[offset_idx].src.ssa);
1764    }
1765    if (handle_idx >= 0) {
1766       levels->src[src_idx].src_type = nir_tex_src_texture_handle;
1767       levels->src[src_idx++].src = nir_src_for_ssa(txf->src[handle_idx].src.ssa);
1768    }
1769    nir_def_init(&levels->instr, &levels->def,
1770                 nir_tex_instr_dest_size(levels), 32);
1771    nir_builder_instr_insert(b, &levels->instr);
1772 
1773    nir_if *lod_oob_if = nir_push_if(b, nir_ilt(b, lod, &levels->def));
1774    nir_tex_instr *new_txf = nir_instr_as_tex(nir_instr_clone(b->shader, in));
1775    nir_builder_instr_insert(b, &new_txf->instr);
1776 
1777    nir_if *lod_oob_else = nir_push_else(b, lod_oob_if);
1778    nir_const_value oob_values[4] = {0};
1779    unsigned bit_size = nir_alu_type_get_type_size(txf->dest_type);
1780    oob_values[3] = (txf->dest_type & nir_type_float) ?
1781                    nir_const_value_for_float(1.0, bit_size) : nir_const_value_for_uint(1, bit_size);
1782    nir_def *oob_val = nir_build_imm(b, nir_tex_instr_dest_size(txf), bit_size, oob_values);
1783 
1784    nir_pop_if(b, lod_oob_else);
1785    nir_def *robust_txf = nir_if_phi(b, &new_txf->def, oob_val);
1786 
1787    nir_def_rewrite_uses(&txf->def, robust_txf);
1788    nir_instr_remove_v(in);
1789    return true;
1790 }
1791 
1792 /* This pass is used to workaround the lack of out of bounds LOD robustness
1793  * for texel fetch ops in VK_EXT_image_robustness.
1794  */
1795 static bool
lower_txf_lod_robustness(nir_shader * shader)1796 lower_txf_lod_robustness(nir_shader *shader)
1797 {
1798    return nir_shader_instructions_pass(shader, lower_txf_lod_robustness_instr, nir_metadata_none, NULL);
1799 }
1800 
1801 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
1802 static bool
check_psiz(struct nir_shader * s)1803 check_psiz(struct nir_shader *s)
1804 {
1805    bool have_psiz = false;
1806    nir_foreach_shader_out_variable(var, s) {
1807       if (var->data.location == VARYING_SLOT_PSIZ) {
1808          /* genuine PSIZ outputs will have this set */
1809          have_psiz |= !!var->data.explicit_location;
1810       }
1811    }
1812    return have_psiz;
1813 }
1814 
1815 static nir_variable *
find_var_with_location_frac(nir_shader * nir,unsigned location,unsigned location_frac,bool have_psiz,nir_variable_mode mode)1816 find_var_with_location_frac(nir_shader *nir, unsigned location, unsigned location_frac, bool have_psiz, nir_variable_mode mode)
1817 {
1818    assert((int)location >= 0);
1819 
1820    nir_foreach_variable_with_modes(var, nir, mode) {
1821       if (var->data.location == location && (location != VARYING_SLOT_PSIZ || !have_psiz || var->data.explicit_location)) {
1822          unsigned num_components = glsl_get_vector_elements(var->type);
1823          if (glsl_type_is_64bit(glsl_without_array(var->type)))
1824             num_components *= 2;
1825          if (is_clipcull_dist(var->data.location))
1826             num_components = glsl_get_aoa_size(var->type);
1827          if (var->data.location_frac <= location_frac &&
1828                var->data.location_frac + num_components > location_frac)
1829             return var;
1830       }
1831    }
1832    return NULL;
1833 }
1834 
1835 static bool
is_inlined(const bool * inlined,const nir_xfb_output_info * output)1836 is_inlined(const bool *inlined, const nir_xfb_output_info *output)
1837 {
1838    unsigned num_components = util_bitcount(output->component_mask);
1839    for (unsigned i = 0; i < num_components; i++)
1840       if (!inlined[output->component_offset + i])
1841          return false;
1842    return true;
1843 }
1844 
1845 static void
update_psiz_location(nir_shader * nir,nir_variable * psiz)1846 update_psiz_location(nir_shader *nir, nir_variable *psiz)
1847 {
1848    uint32_t last_output = util_last_bit64(nir->info.outputs_written);
1849    if (last_output < VARYING_SLOT_VAR0)
1850       last_output = VARYING_SLOT_VAR0;
1851    else
1852       last_output++;
1853    /* this should get fixed up by slot remapping */
1854    psiz->data.location = last_output;
1855 }
1856 
1857 static const struct glsl_type *
clamp_slot_type(const struct glsl_type * type,unsigned slot)1858 clamp_slot_type(const struct glsl_type *type, unsigned slot)
1859 {
1860    /* could be dvec/dmat/mat: each member is the same */
1861    const struct glsl_type *plain = glsl_without_array_or_matrix(type);
1862    /* determine size of each member type */
1863    unsigned slot_count = glsl_count_vec4_slots(plain, false, false);
1864    /* normalize slot idx to current type's size */
1865    slot %= slot_count;
1866    unsigned slot_components = glsl_get_components(plain);
1867    if (glsl_base_type_is_64bit(glsl_get_base_type(plain)))
1868       slot_components *= 2;
1869    /* create a vec4 mask of the selected slot's components out of all the components */
1870    uint32_t mask = BITFIELD_MASK(slot_components) & BITFIELD_RANGE(slot * 4, 4);
1871    /* return a vecN of the selected components */
1872    slot_components = util_bitcount(mask);
1873    return glsl_vec_type(slot_components);
1874 }
1875 
1876 static const struct glsl_type *
unroll_struct_type(const struct glsl_type * slot_type,unsigned * slot_idx)1877 unroll_struct_type(const struct glsl_type *slot_type, unsigned *slot_idx)
1878 {
1879    const struct glsl_type *type = slot_type;
1880    unsigned slot_count = 0;
1881    unsigned cur_slot = 0;
1882    /* iterate over all the members in the struct, stopping once the slot idx is reached */
1883    for (unsigned i = 0; i < glsl_get_length(slot_type) && cur_slot <= *slot_idx; i++, cur_slot += slot_count) {
1884       /* use array type for slot counting but return array member type for unroll */
1885       const struct glsl_type *arraytype = glsl_get_struct_field(slot_type, i);
1886       type = glsl_without_array(arraytype);
1887       slot_count = glsl_count_vec4_slots(arraytype, false, false);
1888    }
1889    *slot_idx -= (cur_slot - slot_count);
1890    if (!glsl_type_is_struct_or_ifc(type))
1891       /* this is a fully unrolled struct: find the number of vec components to output */
1892       type = clamp_slot_type(type, *slot_idx);
1893    return type;
1894 }
1895 
1896 static unsigned
get_slot_components(nir_variable * var,unsigned slot,unsigned so_slot)1897 get_slot_components(nir_variable *var, unsigned slot, unsigned so_slot)
1898 {
1899    assert(var && slot < var->data.location + glsl_count_vec4_slots(var->type, false, false));
1900    const struct glsl_type *orig_type = var->type;
1901    const struct glsl_type *type = glsl_without_array(var->type);
1902    unsigned slot_idx = slot - so_slot;
1903    if (type != orig_type)
1904       slot_idx %= glsl_count_vec4_slots(type, false, false);
1905    /* need to find the vec4 that's being exported by this slot */
1906    while (glsl_type_is_struct_or_ifc(type))
1907       type = unroll_struct_type(type, &slot_idx);
1908 
1909    /* arrays here are already fully unrolled from their structs, so slot handling is implicit */
1910    unsigned num_components = glsl_get_components(glsl_without_array(type));
1911    /* special handling: clip/cull distance are arrays with vector semantics */
1912    if (is_clipcull_dist(var->data.location)) {
1913       num_components = glsl_array_size(type);
1914       if (slot_idx)
1915          /* this is the second vec4 */
1916          num_components %= 4;
1917       else
1918          /* this is the first vec4 */
1919          num_components = MIN2(num_components, 4);
1920    }
1921    assert(num_components);
1922    /* gallium handles xfb in terms of 32bit units */
1923    if (glsl_base_type_is_64bit(glsl_get_base_type(glsl_without_array(type))))
1924       num_components *= 2;
1925    return num_components;
1926 }
1927 
1928 static unsigned
get_var_slot_count(nir_shader * nir,nir_variable * var)1929 get_var_slot_count(nir_shader *nir, nir_variable *var)
1930 {
1931    assert(var->data.mode == nir_var_shader_in || var->data.mode == nir_var_shader_out);
1932    const struct glsl_type *type = var->type;
1933    if (nir_is_arrayed_io(var, nir->info.stage))
1934       type = glsl_get_array_element(type);
1935    unsigned slot_count = 0;
1936    if ((nir->info.stage == MESA_SHADER_VERTEX && var->data.mode == nir_var_shader_in && var->data.location >= VERT_ATTRIB_GENERIC0) ||
1937        var->data.location >= VARYING_SLOT_VAR0)
1938       slot_count = glsl_count_vec4_slots(type, false, false);
1939    else if (glsl_type_is_array(type))
1940       slot_count = DIV_ROUND_UP(glsl_get_aoa_size(type), 4);
1941    else
1942       slot_count = 1;
1943    return slot_count;
1944 }
1945 
1946 
1947 static const nir_xfb_output_info *
find_packed_output(const nir_xfb_info * xfb_info,unsigned slot)1948 find_packed_output(const nir_xfb_info *xfb_info, unsigned slot)
1949 {
1950    for (unsigned i = 0; i < xfb_info->output_count; i++) {
1951       const nir_xfb_output_info *packed_output = &xfb_info->outputs[i];
1952       if (packed_output->location == slot)
1953          return packed_output;
1954    }
1955    return NULL;
1956 }
1957 
1958 static void
update_so_info(struct zink_shader * zs,nir_shader * nir,uint64_t outputs_written,bool have_psiz)1959 update_so_info(struct zink_shader *zs, nir_shader *nir, uint64_t outputs_written, bool have_psiz)
1960 {
1961    bool inlined[VARYING_SLOT_MAX][4] = {0};
1962    uint64_t packed = 0;
1963    uint8_t packed_components[VARYING_SLOT_MAX] = {0};
1964    uint8_t packed_streams[VARYING_SLOT_MAX] = {0};
1965    uint8_t packed_buffers[VARYING_SLOT_MAX] = {0};
1966    uint16_t packed_offsets[VARYING_SLOT_MAX][4] = {0};
1967    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
1968       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
1969       unsigned xfb_components = util_bitcount(output->component_mask);
1970       /* always set stride to be used during draw */
1971       zs->sinfo.stride[output->buffer] = nir->xfb_info->buffers[output->buffer].stride;
1972       for (unsigned c = 0; !is_inlined(inlined[output->location], output) && c < xfb_components; c++) {
1973          unsigned slot = output->location;
1974          if (inlined[slot][output->component_offset + c])
1975             continue;
1976          nir_variable *var = NULL;
1977          while (!var && slot < VARYING_SLOT_TESS_MAX)
1978             var = find_var_with_location_frac(nir, slot--, output->component_offset + c, have_psiz, nir_var_shader_out);
1979          slot = output->location;
1980          unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
1981          if (!var || var->data.location > slot || var->data.location + slot_count <= slot) {
1982             /* if no variable is found for the xfb output, no output exists */
1983             inlined[slot][c + output->component_offset] = true;
1984             continue;
1985          }
1986          if (var->data.explicit_xfb_buffer) {
1987             /* handle dvec3 where gallium splits streamout over 2 registers */
1988             for (unsigned j = 0; j < xfb_components; j++)
1989                inlined[slot][c + output->component_offset + j] = true;
1990          }
1991          if (is_inlined(inlined[slot], output))
1992             continue;
1993          assert(!glsl_type_is_array(var->type) || is_clipcull_dist(var->data.location));
1994          assert(!glsl_type_is_struct_or_ifc(var->type));
1995          unsigned num_components = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : glsl_get_vector_elements(var->type);
1996          if (glsl_type_is_64bit(glsl_without_array(var->type)))
1997             num_components *= 2;
1998          /* if this is the entire variable, try to blast it out during the initial declaration
1999          * structs must be handled later to ensure accurate analysis
2000          */
2001          if ((num_components == xfb_components ||
2002                num_components < xfb_components ||
2003                (num_components > xfb_components && xfb_components == 4))) {
2004             var->data.explicit_xfb_buffer = 1;
2005             var->data.xfb.buffer = output->buffer;
2006             var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2007             var->data.offset = (output->offset + c * sizeof(uint32_t));
2008             var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2009             for (unsigned j = 0; j < MIN2(num_components, xfb_components); j++)
2010                inlined[slot][c + output->component_offset + j] = true;
2011          } else {
2012             /* otherwise store some metadata for later */
2013             packed |= BITFIELD64_BIT(slot);
2014             packed_components[slot] += xfb_components;
2015             packed_streams[slot] |= BITFIELD_BIT(nir->xfb_info->buffer_to_stream[output->buffer]);
2016             packed_buffers[slot] |= BITFIELD_BIT(output->buffer);
2017             for (unsigned j = 0; j < xfb_components; j++)
2018                packed_offsets[output->location][j + output->component_offset + c] = output->offset + j * sizeof(uint32_t);
2019          }
2020       }
2021    }
2022 
2023    /* if this was flagged as a packed output before, and if all the components are
2024     * being output with the same stream on the same buffer with increasing offsets, this entire variable
2025     * can be consolidated into a single output to conserve locations
2026     */
2027    for (unsigned i = 0; i < nir->xfb_info->output_count; i++) {
2028       const nir_xfb_output_info *output = &nir->xfb_info->outputs[i];
2029       unsigned slot = output->location;
2030       if (is_inlined(inlined[slot], output))
2031          continue;
2032       nir_variable *var = NULL;
2033       while (!var)
2034          var = find_var_with_location_frac(nir, slot--, output->component_offset, have_psiz, nir_var_shader_out);
2035       slot = output->location;
2036       unsigned slot_count = var ? get_var_slot_count(nir, var) : 0;
2037       if (!var || var->data.location > slot || var->data.location + slot_count <= slot)
2038          continue;
2039       /* this is a lowered 64bit variable that can't be exported due to packing */
2040       if (var->data.is_xfb)
2041          goto out;
2042 
2043       unsigned num_slots = is_clipcull_dist(var->data.location) ?
2044                            glsl_array_size(var->type) / 4 :
2045                            glsl_count_vec4_slots(var->type, false, false);
2046       /* for each variable, iterate over all the variable's slots and inline the outputs */
2047       for (unsigned j = 0; j < num_slots; j++) {
2048          slot = var->data.location + j;
2049          const nir_xfb_output_info *packed_output = find_packed_output(nir->xfb_info, slot);
2050          if (!packed_output)
2051             goto out;
2052 
2053          /* if this slot wasn't packed or isn't in the same stream/buffer, skip consolidation */
2054          if (!(packed & BITFIELD64_BIT(slot)) ||
2055                util_bitcount(packed_streams[slot]) != 1 ||
2056                util_bitcount(packed_buffers[slot]) != 1)
2057             goto out;
2058 
2059          /* if all the components the variable exports to this slot aren't captured, skip consolidation */
2060          unsigned num_components = get_slot_components(var, slot, var->data.location);
2061          if (num_components != packed_components[slot])
2062             goto out;
2063 
2064          /* in order to pack the xfb output, all the offsets must be sequentially incrementing */
2065          uint32_t prev_offset = packed_offsets[packed_output->location][0];
2066          for (unsigned k = 1; k < num_components; k++) {
2067             /* if the offsets are not incrementing as expected, skip consolidation */
2068             if (packed_offsets[packed_output->location][k] != prev_offset + sizeof(uint32_t))
2069                goto out;
2070             prev_offset = packed_offsets[packed_output->location][k + packed_output->component_offset];
2071          }
2072       }
2073       /* this output can be consolidated: blast out all the data inlined */
2074       var->data.explicit_xfb_buffer = 1;
2075       var->data.xfb.buffer = output->buffer;
2076       var->data.xfb.stride = zs->sinfo.stride[output->buffer];
2077       var->data.offset = output->offset;
2078       var->data.stream = nir->xfb_info->buffer_to_stream[output->buffer];
2079       /* mark all slot components inlined to skip subsequent loop iterations */
2080       for (unsigned j = 0; j < num_slots; j++) {
2081          slot = var->data.location + j;
2082          for (unsigned k = 0; k < packed_components[slot]; k++)
2083             inlined[slot][k] = true;
2084          packed &= ~BITFIELD64_BIT(slot);
2085       }
2086       continue;
2087 out:
2088       unreachable("xfb should be inlined by now!");
2089    }
2090 }
2091 
2092 struct decompose_state {
2093   nir_variable **split;
2094   bool needs_w;
2095 };
2096 
2097 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)2098 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
2099 {
2100    struct decompose_state *state = data;
2101    nir_variable **split = state->split;
2102    if (instr->type != nir_instr_type_intrinsic)
2103       return false;
2104    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2105    if (intr->intrinsic != nir_intrinsic_load_deref)
2106       return false;
2107    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2108    nir_variable *var = nir_deref_instr_get_variable(deref);
2109    if (var != split[0])
2110       return false;
2111    unsigned num_components = glsl_get_vector_elements(split[0]->type);
2112    b->cursor = nir_after_instr(instr);
2113    nir_def *loads[4];
2114    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
2115       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
2116    if (state->needs_w) {
2117       /* oob load w comopnent to get correct value for int/float */
2118       loads[3] = nir_channel(b, loads[0], 3);
2119       loads[0] = nir_channel(b, loads[0], 0);
2120    }
2121    nir_def *new_load = nir_vec(b, loads, num_components);
2122    nir_def_rewrite_uses(&intr->def, new_load);
2123    nir_instr_remove_v(instr);
2124    return true;
2125 }
2126 
2127 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)2128 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
2129 {
2130    uint32_t bits = 0;
2131    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
2132       bits |= BITFIELD_BIT(var->data.driver_location);
2133    bits = ~bits;
2134    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
2135       nir_variable *split[5];
2136       struct decompose_state state;
2137       state.split = split;
2138       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
2139       assert(var);
2140       split[0] = var;
2141       bits |= BITFIELD_BIT(var->data.driver_location);
2142       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
2143       unsigned num_components = glsl_get_vector_elements(var->type);
2144       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
2145       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
2146          split[i+1] = nir_variable_clone(var, nir);
2147          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
2148          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
2149             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
2150          else
2151             split[i+1]->type = new_type;
2152          split[i+1]->data.driver_location = ffs(bits) - 1;
2153          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
2154          nir_shader_add_variable(nir, split[i+1]);
2155       }
2156       var->data.mode = nir_var_shader_temp;
2157       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
2158    }
2159    nir_fixup_deref_modes(nir);
2160    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2161    optimize_nir(nir, NULL, true);
2162    return true;
2163 }
2164 
2165 static bool
rewrite_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2166 rewrite_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2167 {
2168    struct zink_screen *screen = data;
2169    const bool has_int64 = screen->info.feats.features.shaderInt64;
2170    if (instr->type != nir_instr_type_intrinsic)
2171       return false;
2172    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2173    b->cursor = nir_before_instr(instr);
2174    switch (intr->intrinsic) {
2175    case nir_intrinsic_ssbo_atomic:
2176    case nir_intrinsic_ssbo_atomic_swap: {
2177       /* convert offset to uintN_t[idx] */
2178       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, intr->def.bit_size / 8);
2179       nir_src_rewrite(&intr->src[1], offset);
2180       return true;
2181    }
2182    case nir_intrinsic_load_ssbo:
2183    case nir_intrinsic_load_ubo: {
2184       /* ubo0 can have unaligned 64bit loads, particularly for bindless texture ids */
2185       bool force_2x32 = intr->intrinsic == nir_intrinsic_load_ubo &&
2186                         nir_src_is_const(intr->src[0]) &&
2187                         nir_src_as_uint(intr->src[0]) == 0 &&
2188                         intr->def.bit_size == 64 &&
2189                         nir_intrinsic_align_offset(intr) % 8 != 0;
2190       force_2x32 |= intr->def.bit_size == 64 && !has_int64;
2191       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2192       nir_src_rewrite(&intr->src[1], offset);
2193       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2194       if (force_2x32) {
2195          /* this is always scalarized */
2196          assert(intr->def.num_components == 1);
2197          /* rewrite as 2x32 */
2198          nir_def *load[2];
2199          for (unsigned i = 0; i < 2; i++) {
2200             if (intr->intrinsic == nir_intrinsic_load_ssbo)
2201                load[i] = nir_load_ssbo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2202             else
2203                load[i] = nir_load_ubo(b, 1, 32, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0, .range = 4);
2204             nir_intrinsic_set_access(nir_instr_as_intrinsic(load[i]->parent_instr), nir_intrinsic_access(intr));
2205          }
2206          /* cast back to 64bit */
2207          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2208          nir_def_rewrite_uses(&intr->def, casted);
2209          nir_instr_remove(instr);
2210       }
2211       return true;
2212    }
2213    case nir_intrinsic_load_scratch:
2214    case nir_intrinsic_load_shared: {
2215       b->cursor = nir_before_instr(instr);
2216       bool force_2x32 = intr->def.bit_size == 64 && !has_int64;
2217       nir_def *offset = nir_udiv_imm(b, intr->src[0].ssa, (force_2x32 ? 32 : intr->def.bit_size) / 8);
2218       nir_src_rewrite(&intr->src[0], offset);
2219       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2220       if (force_2x32) {
2221          /* this is always scalarized */
2222          assert(intr->def.num_components == 1);
2223          /* rewrite as 2x32 */
2224          nir_def *load[2];
2225          for (unsigned i = 0; i < 2; i++)
2226             load[i] = nir_load_shared(b, 1, 32, nir_iadd_imm(b, intr->src[0].ssa, i), .align_mul = 4, .align_offset = 0);
2227          /* cast back to 64bit */
2228          nir_def *casted = nir_pack_64_2x32_split(b, load[0], load[1]);
2229          nir_def_rewrite_uses(&intr->def, casted);
2230          nir_instr_remove(instr);
2231          return true;
2232       }
2233       break;
2234    }
2235    case nir_intrinsic_store_ssbo: {
2236       b->cursor = nir_before_instr(instr);
2237       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2238       nir_def *offset = nir_udiv_imm(b, intr->src[2].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2239       nir_src_rewrite(&intr->src[2], offset);
2240       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2241       if (force_2x32) {
2242          /* this is always scalarized */
2243          assert(intr->src[0].ssa->num_components == 1);
2244          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2245          for (unsigned i = 0; i < 2; i++)
2246             nir_store_ssbo(b, vals[i], intr->src[1].ssa, nir_iadd_imm(b, intr->src[2].ssa, i), .align_mul = 4, .align_offset = 0);
2247          nir_instr_remove(instr);
2248       }
2249       return true;
2250    }
2251    case nir_intrinsic_store_scratch:
2252    case nir_intrinsic_store_shared: {
2253       b->cursor = nir_before_instr(instr);
2254       bool force_2x32 = nir_src_bit_size(intr->src[0]) == 64 && !has_int64;
2255       nir_def *offset = nir_udiv_imm(b, intr->src[1].ssa, (force_2x32 ? 32 : nir_src_bit_size(intr->src[0])) / 8);
2256       nir_src_rewrite(&intr->src[1], offset);
2257       /* if 64bit isn't supported, 64bit loads definitely aren't supported, so rewrite as 2x32 with cast and pray */
2258       if (nir_src_bit_size(intr->src[0]) == 64 && !has_int64) {
2259          /* this is always scalarized */
2260          assert(intr->src[0].ssa->num_components == 1);
2261          nir_def *vals[2] = {nir_unpack_64_2x32_split_x(b, intr->src[0].ssa), nir_unpack_64_2x32_split_y(b, intr->src[0].ssa)};
2262          for (unsigned i = 0; i < 2; i++)
2263             nir_store_shared(b, vals[i], nir_iadd_imm(b, intr->src[1].ssa, i), .align_mul = 4, .align_offset = 0);
2264          nir_instr_remove(instr);
2265       }
2266       return true;
2267    }
2268    default:
2269       break;
2270    }
2271    return false;
2272 }
2273 
2274 static bool
rewrite_bo_access(nir_shader * shader,struct zink_screen * screen)2275 rewrite_bo_access(nir_shader *shader, struct zink_screen *screen)
2276 {
2277    return nir_shader_instructions_pass(shader, rewrite_bo_access_instr, nir_metadata_dominance, screen);
2278 }
2279 
2280 static nir_variable *
get_bo_var(nir_shader * shader,struct bo_vars * bo,bool ssbo,nir_src * src,unsigned bit_size)2281 get_bo_var(nir_shader *shader, struct bo_vars *bo, bool ssbo, nir_src *src, unsigned bit_size)
2282 {
2283    nir_variable *var, **ptr;
2284    unsigned idx = ssbo || (nir_src_is_const(*src) && !nir_src_as_uint(*src)) ? 0 : 1;
2285 
2286    if (ssbo)
2287       ptr = &bo->ssbo[bit_size >> 4];
2288    else {
2289       if (!idx) {
2290          ptr = &bo->uniforms[bit_size >> 4];
2291       } else
2292          ptr = &bo->ubo[bit_size >> 4];
2293    }
2294    var = *ptr;
2295    if (!var) {
2296       if (ssbo)
2297          var = bo->ssbo[32 >> 4];
2298       else {
2299          if (!idx)
2300             var = bo->uniforms[32 >> 4];
2301          else
2302             var = bo->ubo[32 >> 4];
2303       }
2304       var = nir_variable_clone(var, shader);
2305       if (ssbo)
2306          var->name = ralloc_asprintf(shader, "%s@%u", "ssbos", bit_size);
2307       else
2308          var->name = ralloc_asprintf(shader, "%s@%u", idx ? "ubos" : "uniform_0", bit_size);
2309       *ptr = var;
2310       nir_shader_add_variable(shader, var);
2311 
2312       struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
2313       fields[0].name = ralloc_strdup(shader, "base");
2314       fields[1].name = ralloc_strdup(shader, "unsized");
2315       unsigned array_size = glsl_get_length(var->type);
2316       const struct glsl_type *bare_type = glsl_without_array(var->type);
2317       const struct glsl_type *array_type = glsl_get_struct_field(bare_type, 0);
2318       unsigned length = glsl_get_length(array_type);
2319       const struct glsl_type *type;
2320       const struct glsl_type *unsized = glsl_array_type(glsl_uintN_t_type(bit_size), 0, bit_size / 8);
2321       if (bit_size > 32) {
2322          assert(bit_size == 64);
2323          type = glsl_array_type(glsl_uintN_t_type(bit_size), length / 2, bit_size / 8);
2324       } else {
2325          type = glsl_array_type(glsl_uintN_t_type(bit_size), length * (32 / bit_size), bit_size / 8);
2326       }
2327       fields[0].type = type;
2328       fields[1].type = unsized;
2329       var->type = glsl_array_type(glsl_struct_type(fields, glsl_get_length(bare_type), "struct", false), array_size, 0);
2330       var->data.driver_location = idx;
2331    }
2332    return var;
2333 }
2334 
2335 static void
rewrite_atomic_ssbo_instr(nir_builder * b,nir_instr * instr,struct bo_vars * bo)2336 rewrite_atomic_ssbo_instr(nir_builder *b, nir_instr *instr, struct bo_vars *bo)
2337 {
2338    nir_intrinsic_op op;
2339    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2340    if (intr->intrinsic == nir_intrinsic_ssbo_atomic)
2341       op = nir_intrinsic_deref_atomic;
2342    else if (intr->intrinsic == nir_intrinsic_ssbo_atomic_swap)
2343       op = nir_intrinsic_deref_atomic_swap;
2344    else
2345       unreachable("unknown intrinsic");
2346    nir_def *offset = intr->src[1].ssa;
2347    nir_src *src = &intr->src[0];
2348    nir_variable *var = get_bo_var(b->shader, bo, true, src,
2349                                   intr->def.bit_size);
2350    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2351    nir_def *idx = src->ssa;
2352    if (bo->first_ssbo)
2353       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2354    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var, idx);
2355    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2356 
2357    /* generate new atomic deref ops for every component */
2358    nir_def *result[4];
2359    unsigned num_components = intr->def.num_components;
2360    for (unsigned i = 0; i < num_components; i++) {
2361       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct, offset);
2362       nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(b->shader, op);
2363       nir_def_init(&new_instr->instr, &new_instr->def, 1,
2364                    intr->def.bit_size);
2365       nir_intrinsic_set_atomic_op(new_instr, nir_intrinsic_atomic_op(intr));
2366       new_instr->src[0] = nir_src_for_ssa(&deref_arr->def);
2367       /* deref ops have no offset src, so copy the srcs after it */
2368       for (unsigned j = 2; j < nir_intrinsic_infos[intr->intrinsic].num_srcs; j++)
2369          new_instr->src[j - 1] = nir_src_for_ssa(intr->src[j].ssa);
2370       nir_builder_instr_insert(b, &new_instr->instr);
2371 
2372       result[i] = &new_instr->def;
2373       offset = nir_iadd_imm(b, offset, 1);
2374    }
2375 
2376    nir_def *load = nir_vec(b, result, num_components);
2377    nir_def_replace(&intr->def, load);
2378 }
2379 
2380 static bool
remove_bo_access_instr(nir_builder * b,nir_instr * instr,void * data)2381 remove_bo_access_instr(nir_builder *b, nir_instr *instr, void *data)
2382 {
2383    struct bo_vars *bo = data;
2384    if (instr->type != nir_instr_type_intrinsic)
2385       return false;
2386    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2387    nir_variable *var = NULL;
2388    nir_def *offset = NULL;
2389    bool is_load = true;
2390    b->cursor = nir_before_instr(instr);
2391    nir_src *src;
2392    bool ssbo = true;
2393    switch (intr->intrinsic) {
2394    case nir_intrinsic_ssbo_atomic:
2395    case nir_intrinsic_ssbo_atomic_swap:
2396       rewrite_atomic_ssbo_instr(b, instr, bo);
2397       return true;
2398    case nir_intrinsic_store_ssbo:
2399       src = &intr->src[1];
2400       var = get_bo_var(b->shader, bo, true, src, nir_src_bit_size(intr->src[0]));
2401       offset = intr->src[2].ssa;
2402       is_load = false;
2403       break;
2404    case nir_intrinsic_load_ssbo:
2405       src = &intr->src[0];
2406       var = get_bo_var(b->shader, bo, true, src, intr->def.bit_size);
2407       offset = intr->src[1].ssa;
2408       break;
2409    case nir_intrinsic_load_ubo:
2410       src = &intr->src[0];
2411       var = get_bo_var(b->shader, bo, false, src, intr->def.bit_size);
2412       offset = intr->src[1].ssa;
2413       ssbo = false;
2414       break;
2415    default:
2416       return false;
2417    }
2418    assert(var);
2419    assert(offset);
2420    nir_deref_instr *deref_var = nir_build_deref_var(b, var);
2421    nir_def *idx = !ssbo && var->data.driver_location ? nir_iadd_imm(b, src->ssa, -1) : src->ssa;
2422    if (!ssbo && bo->first_ubo && var->data.driver_location)
2423       idx = nir_iadd_imm(b, idx, -bo->first_ubo);
2424    else if (ssbo && bo->first_ssbo)
2425       idx = nir_iadd_imm(b, idx, -bo->first_ssbo);
2426    nir_deref_instr *deref_array = nir_build_deref_array(b, deref_var,
2427                                                         nir_i2iN(b, idx, deref_var->def.bit_size));
2428    nir_deref_instr *deref_struct = nir_build_deref_struct(b, deref_array, 0);
2429    assert(intr->num_components <= 2);
2430    if (is_load) {
2431       nir_def *result[2];
2432       for (unsigned i = 0; i < intr->num_components; i++) {
2433          nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2434                                                             nir_i2iN(b, offset, deref_struct->def.bit_size));
2435          result[i] = nir_load_deref(b, deref_arr);
2436          if (intr->intrinsic == nir_intrinsic_load_ssbo)
2437             nir_intrinsic_set_access(nir_instr_as_intrinsic(result[i]->parent_instr), nir_intrinsic_access(intr));
2438          offset = nir_iadd_imm(b, offset, 1);
2439       }
2440       nir_def *load = nir_vec(b, result, intr->num_components);
2441       nir_def_rewrite_uses(&intr->def, load);
2442    } else {
2443       nir_deref_instr *deref_arr = nir_build_deref_array(b, deref_struct,
2444                                                          nir_i2iN(b, offset, deref_struct->def.bit_size));
2445       nir_build_store_deref(b, &deref_arr->def, intr->src[0].ssa, BITFIELD_MASK(intr->num_components), nir_intrinsic_access(intr));
2446    }
2447    nir_instr_remove(instr);
2448    return true;
2449 }
2450 
2451 static bool
remove_bo_access(nir_shader * shader,struct zink_shader * zs)2452 remove_bo_access(nir_shader *shader, struct zink_shader *zs)
2453 {
2454    struct bo_vars bo = get_bo_vars(zs, shader);
2455    return nir_shader_instructions_pass(shader, remove_bo_access_instr, nir_metadata_dominance, &bo);
2456 }
2457 
2458 static bool
filter_io_instr(nir_intrinsic_instr * intr,bool * is_load,bool * is_input,bool * is_interp)2459 filter_io_instr(nir_intrinsic_instr *intr, bool *is_load, bool *is_input, bool *is_interp)
2460 {
2461    switch (intr->intrinsic) {
2462    case nir_intrinsic_load_interpolated_input:
2463       *is_interp = true;
2464       FALLTHROUGH;
2465    case nir_intrinsic_load_input:
2466    case nir_intrinsic_load_per_vertex_input:
2467       *is_input = true;
2468       FALLTHROUGH;
2469    case nir_intrinsic_load_output:
2470    case nir_intrinsic_load_per_vertex_output:
2471    case nir_intrinsic_load_per_primitive_output:
2472       *is_load = true;
2473       FALLTHROUGH;
2474    case nir_intrinsic_store_output:
2475    case nir_intrinsic_store_per_primitive_output:
2476    case nir_intrinsic_store_per_vertex_output:
2477       break;
2478    default:
2479       return false;
2480    }
2481    return true;
2482 }
2483 
2484 static bool
io_instr_is_arrayed(nir_intrinsic_instr * intr)2485 io_instr_is_arrayed(nir_intrinsic_instr *intr)
2486 {
2487    switch (intr->intrinsic) {
2488    case nir_intrinsic_load_per_vertex_input:
2489    case nir_intrinsic_load_per_vertex_output:
2490    case nir_intrinsic_load_per_primitive_output:
2491    case nir_intrinsic_store_per_primitive_output:
2492    case nir_intrinsic_store_per_vertex_output:
2493       return true;
2494    default:
2495       break;
2496    }
2497    return false;
2498 }
2499 
2500 static bool
find_var_deref(nir_shader * nir,nir_variable * var)2501 find_var_deref(nir_shader *nir, nir_variable *var)
2502 {
2503    nir_foreach_function_impl(impl, nir) {
2504       nir_foreach_block(block, impl) {
2505          nir_foreach_instr(instr, block) {
2506             if (instr->type != nir_instr_type_deref)
2507                continue;
2508             nir_deref_instr *deref = nir_instr_as_deref(instr);
2509             if (deref->deref_type == nir_deref_type_var && deref->var == var)
2510                return true;
2511          }
2512       }
2513    }
2514    return false;
2515 }
2516 
2517 static bool
find_var_io(nir_shader * nir,nir_variable * var)2518 find_var_io(nir_shader *nir, nir_variable *var)
2519 {
2520    nir_foreach_function(function, nir) {
2521       if (!function->impl)
2522          continue;
2523 
2524       nir_foreach_block(block, function->impl) {
2525          nir_foreach_instr(instr, block) {
2526             if (instr->type != nir_instr_type_intrinsic)
2527                continue;
2528             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2529             bool is_load = false;
2530             bool is_input = false;
2531             bool is_interp = false;
2532             if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2533                continue;
2534             if (var->data.mode == nir_var_shader_in && !is_input)
2535                continue;
2536             if (var->data.mode == nir_var_shader_out && is_input)
2537                continue;
2538             unsigned slot_offset = 0;
2539             if (var->data.fb_fetch_output && !is_load)
2540                continue;
2541             if (nir->info.stage == MESA_SHADER_FRAGMENT && !is_load && !is_input && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
2542                continue;
2543             nir_src *src_offset = nir_get_io_offset_src(intr);
2544             if (src_offset && nir_src_is_const(*src_offset))
2545                slot_offset = nir_src_as_uint(*src_offset);
2546             unsigned slot_count = get_var_slot_count(nir, var);
2547             if (var->data.mode & (nir_var_shader_out | nir_var_shader_in) &&
2548                 var->data.fb_fetch_output == nir_intrinsic_io_semantics(intr).fb_fetch_output &&
2549                 var->data.location <= nir_intrinsic_io_semantics(intr).location + slot_offset &&
2550                 var->data.location + slot_count > nir_intrinsic_io_semantics(intr).location + slot_offset)
2551                return true;
2552          }
2553       }
2554    }
2555    return false;
2556 }
2557 
2558 struct clamp_layer_output_state {
2559    nir_variable *original;
2560    nir_variable *clamped;
2561 };
2562 
2563 static void
clamp_layer_output_emit(nir_builder * b,struct clamp_layer_output_state * state)2564 clamp_layer_output_emit(nir_builder *b, struct clamp_layer_output_state *state)
2565 {
2566    nir_def *is_layered = nir_load_push_constant_zink(b, 1, 32,
2567                                                          nir_imm_int(b, ZINK_GFX_PUSHCONST_FRAMEBUFFER_IS_LAYERED));
2568    nir_deref_instr *original_deref = nir_build_deref_var(b, state->original);
2569    nir_deref_instr *clamped_deref = nir_build_deref_var(b, state->clamped);
2570    nir_def *layer = nir_bcsel(b, nir_ieq_imm(b, is_layered, 1),
2571                                   nir_load_deref(b, original_deref),
2572                                   nir_imm_int(b, 0));
2573    nir_store_deref(b, clamped_deref, layer, 0);
2574 }
2575 
2576 static bool
clamp_layer_output_instr(nir_builder * b,nir_instr * instr,void * data)2577 clamp_layer_output_instr(nir_builder *b, nir_instr *instr, void *data)
2578 {
2579    struct clamp_layer_output_state *state = data;
2580    switch (instr->type) {
2581    case nir_instr_type_intrinsic: {
2582       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2583       if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter &&
2584           intr->intrinsic != nir_intrinsic_emit_vertex)
2585          return false;
2586       b->cursor = nir_before_instr(instr);
2587       clamp_layer_output_emit(b, state);
2588       return true;
2589    }
2590    default: return false;
2591    }
2592 }
2593 
2594 static bool
clamp_layer_output(nir_shader * vs,nir_shader * fs,unsigned * next_location)2595 clamp_layer_output(nir_shader *vs, nir_shader *fs, unsigned *next_location)
2596 {
2597    switch (vs->info.stage) {
2598    case MESA_SHADER_VERTEX:
2599    case MESA_SHADER_GEOMETRY:
2600    case MESA_SHADER_TESS_EVAL:
2601       break;
2602    default:
2603       unreachable("invalid last vertex stage!");
2604    }
2605    struct clamp_layer_output_state state = {0};
2606    state.original = nir_find_variable_with_location(vs, nir_var_shader_out, VARYING_SLOT_LAYER);
2607    if (!state.original || (!find_var_deref(vs, state.original) && !find_var_io(vs, state.original)))
2608       return false;
2609    state.clamped = nir_variable_create(vs, nir_var_shader_out, glsl_int_type(), "layer_clamped");
2610    state.clamped->data.location = VARYING_SLOT_LAYER;
2611    nir_variable *fs_var = nir_find_variable_with_location(fs, nir_var_shader_in, VARYING_SLOT_LAYER);
2612    if ((state.original->data.explicit_xfb_buffer || fs_var) && *next_location < MAX_VARYING) {
2613       state.original->data.location = VARYING_SLOT_VAR0; // Anything but a built-in slot
2614       state.original->data.driver_location = (*next_location)++;
2615       if (fs_var) {
2616          fs_var->data.location = state.original->data.location;
2617          fs_var->data.driver_location = state.original->data.driver_location;
2618       }
2619    } else {
2620       if (state.original->data.explicit_xfb_buffer) {
2621          /* Will xfb the clamped output but still better than nothing */
2622          state.clamped->data.explicit_xfb_buffer = state.original->data.explicit_xfb_buffer;
2623          state.clamped->data.xfb.buffer = state.original->data.xfb.buffer;
2624          state.clamped->data.xfb.stride = state.original->data.xfb.stride;
2625          state.clamped->data.offset = state.original->data.offset;
2626          state.clamped->data.stream = state.original->data.stream;
2627       }
2628       state.original->data.mode = nir_var_shader_temp;
2629       nir_fixup_deref_modes(vs);
2630    }
2631    if (vs->info.stage == MESA_SHADER_GEOMETRY) {
2632       nir_shader_instructions_pass(vs, clamp_layer_output_instr, nir_metadata_dominance, &state);
2633    } else {
2634       nir_builder b;
2635       nir_function_impl *impl = nir_shader_get_entrypoint(vs);
2636       b = nir_builder_at(nir_after_impl(impl));
2637       assert(impl->end_block->predecessors->entries == 1);
2638       clamp_layer_output_emit(&b, &state);
2639       nir_metadata_preserve(impl, nir_metadata_dominance);
2640    }
2641    optimize_nir(vs, NULL, true);
2642    NIR_PASS_V(vs, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2643    return true;
2644 }
2645 
2646 struct io_slot_map {
2647    uint64_t *patch_slot_track;
2648    uint64_t *slot_track;
2649    unsigned char *slot_map;
2650    unsigned reserved;
2651    unsigned char *patch_slot_map;
2652    unsigned patch_reserved;
2653 };
2654 
2655 static void
assign_track_slot_mask(struct io_slot_map * io,nir_variable * var,unsigned slot,unsigned num_slots)2656 assign_track_slot_mask(struct io_slot_map *io, nir_variable *var, unsigned slot, unsigned num_slots)
2657 {
2658    uint64_t *track = var->data.patch ? io->patch_slot_track : io->slot_track;
2659    uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(glsl_without_array(var->type))) << var->data.location_frac;
2660    uint64_t slot_mask = BITFIELD64_RANGE(slot, num_slots);
2661    u_foreach_bit(c, mask) {
2662       assert((track[c] & slot_mask) == 0);
2663       track[c] |= slot_mask;
2664    }
2665 }
2666 
2667 static void
assign_slot_io(gl_shader_stage stage,struct io_slot_map * io,nir_variable * var,unsigned slot)2668 assign_slot_io(gl_shader_stage stage, struct io_slot_map *io, nir_variable *var, unsigned slot)
2669 {
2670    unsigned num_slots;
2671    if (nir_is_arrayed_io(var, stage))
2672       num_slots = glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
2673    else
2674       num_slots = glsl_count_vec4_slots(var->type, false, false);
2675    uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2676    assign_track_slot_mask(io, var, slot, num_slots);
2677    if (slot_map[slot] != 0xff)
2678       return;
2679    unsigned *reserved = var->data.patch ? &io->patch_reserved : &io->reserved;
2680    assert(*reserved + num_slots <= MAX_VARYING);
2681    assert(*reserved < MAX_VARYING);
2682    for (unsigned i = 0; i < num_slots; i++)
2683       slot_map[slot + i] = (*reserved)++;
2684 }
2685 
2686 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2687 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2688 {
2689    unsigned slot = var->data.location;
2690    switch (slot) {
2691    case -1:
2692       unreachable("there should be no UINT32_MAX location variables!");
2693       break;
2694    case VARYING_SLOT_POS:
2695    case VARYING_SLOT_PSIZ:
2696    case VARYING_SLOT_LAYER:
2697    case VARYING_SLOT_PRIMITIVE_ID:
2698    case VARYING_SLOT_CLIP_DIST0:
2699    case VARYING_SLOT_CULL_DIST0:
2700    case VARYING_SLOT_VIEWPORT:
2701    case VARYING_SLOT_FACE:
2702    case VARYING_SLOT_TESS_LEVEL_OUTER:
2703    case VARYING_SLOT_TESS_LEVEL_INNER:
2704       /* use a sentinel value to avoid counting later */
2705       var->data.driver_location = UINT32_MAX;
2706       return;
2707 
2708    default:
2709       break;
2710    }
2711    if (var->data.patch) {
2712       assert(slot >= VARYING_SLOT_PATCH0);
2713       slot -= VARYING_SLOT_PATCH0;
2714    }
2715    assign_slot_io(stage, io, var, slot);
2716    slot = var->data.patch ? io->patch_slot_map[slot] : io->slot_map[slot];
2717    assert(slot < MAX_VARYING);
2718    var->data.driver_location = slot;
2719 }
2720 
2721 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)2722 is_texcoord(gl_shader_stage stage, const nir_variable *var)
2723 {
2724    if (stage != MESA_SHADER_FRAGMENT)
2725       return false;
2726    return var->data.location >= VARYING_SLOT_TEX0 &&
2727           var->data.location <= VARYING_SLOT_TEX7;
2728 }
2729 
2730 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,struct io_slot_map * io)2731 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, struct io_slot_map *io)
2732 {
2733    unsigned slot = var->data.location;
2734    switch (slot) {
2735    case VARYING_SLOT_POS:
2736    case VARYING_SLOT_PSIZ:
2737    case VARYING_SLOT_LAYER:
2738    case VARYING_SLOT_PRIMITIVE_ID:
2739    case VARYING_SLOT_CLIP_DIST0:
2740    case VARYING_SLOT_CULL_DIST0:
2741    case VARYING_SLOT_VIEWPORT:
2742    case VARYING_SLOT_FACE:
2743    case VARYING_SLOT_TESS_LEVEL_OUTER:
2744    case VARYING_SLOT_TESS_LEVEL_INNER:
2745       /* use a sentinel value to avoid counting later */
2746       var->data.driver_location = UINT_MAX;
2747       return true;
2748    default:
2749       break;
2750    }
2751    if (var->data.patch) {
2752       assert(slot >= VARYING_SLOT_PATCH0);
2753       slot -= VARYING_SLOT_PATCH0;
2754    }
2755    uint8_t *slot_map = var->data.patch ? io->patch_slot_map : io->slot_map;
2756    if (slot_map[slot] == (unsigned char)-1) {
2757       /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE,
2758          * so keep for now and eliminate later
2759          */
2760       if (is_texcoord(stage, var)) {
2761          var->data.driver_location = UINT32_MAX;
2762          return true;
2763       }
2764       /* patch variables may be read in the workgroup */
2765       if (stage != MESA_SHADER_TESS_CTRL)
2766          /* dead io */
2767          return false;
2768       assign_slot_io(stage, io, var, slot);
2769    }
2770    var->data.driver_location = slot_map[slot];
2771    return true;
2772 }
2773 
2774 
2775 static bool
rewrite_read_as_0(nir_builder * b,nir_instr * instr,void * data)2776 rewrite_read_as_0(nir_builder *b, nir_instr *instr, void *data)
2777 {
2778    nir_variable *var = data;
2779    if (instr->type != nir_instr_type_intrinsic)
2780       return false;
2781 
2782    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2783    bool is_load = false;
2784    bool is_input = false;
2785    bool is_interp = false;
2786    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2787       return false;
2788    if (!is_load)
2789       return false;
2790    unsigned location = nir_intrinsic_io_semantics(intr).location;
2791    if (location != var->data.location)
2792       return false;
2793    b->cursor = nir_before_instr(instr);
2794    nir_def *zero = nir_imm_zero(b, intr->def.num_components,
2795                                 intr->def.bit_size);
2796    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2797       switch (location) {
2798       case VARYING_SLOT_COL0:
2799       case VARYING_SLOT_COL1:
2800       case VARYING_SLOT_BFC0:
2801       case VARYING_SLOT_BFC1:
2802          /* default color is 0,0,0,1 */
2803          if (intr->def.num_components == 4)
2804             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2805          break;
2806       default:
2807          break;
2808       }
2809    }
2810    nir_def_replace(&intr->def, zero);
2811    return true;
2812 }
2813 
2814 
2815 
2816 static bool
delete_psiz_store_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)2817 delete_psiz_store_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2818 {
2819    switch (intr->intrinsic) {
2820    case nir_intrinsic_store_output:
2821    case nir_intrinsic_store_per_primitive_output:
2822    case nir_intrinsic_store_per_vertex_output:
2823       break;
2824    default:
2825       return false;
2826    }
2827    if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PSIZ)
2828       return false;
2829    if (!data || (nir_src_is_const(intr->src[0]) && fabs(nir_src_as_float(intr->src[0]) - 1.0) < FLT_EPSILON)) {
2830       nir_instr_remove(&intr->instr);
2831       return true;
2832    }
2833    return false;
2834 }
2835 
2836 static bool
delete_psiz_store(nir_shader * nir,bool one)2837 delete_psiz_store(nir_shader *nir, bool one)
2838 {
2839    bool progress = nir_shader_intrinsics_pass(nir, delete_psiz_store_instr,
2840                                               nir_metadata_dominance, one ? nir : NULL);
2841    if (progress)
2842       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2843    return progress;
2844 }
2845 
2846 struct write_components {
2847    unsigned slot;
2848    uint32_t component_mask;
2849 };
2850 
2851 static bool
fill_zero_reads(nir_builder * b,nir_intrinsic_instr * intr,void * data)2852 fill_zero_reads(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2853 {
2854    struct write_components *wc = data;
2855    bool is_load = false;
2856    bool is_input = false;
2857    bool is_interp = false;
2858    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2859       return false;
2860    if (!is_input)
2861       return false;
2862    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2863    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2864       return false;
2865    unsigned num_components = intr->num_components;
2866    unsigned c = nir_intrinsic_component(intr);
2867    if (intr->def.bit_size == 64)
2868       num_components *= 2;
2869    nir_src *src_offset = nir_get_io_offset_src(intr);
2870    if (!nir_src_is_const(*src_offset))
2871       return false;
2872    unsigned slot_offset = nir_src_as_uint(*src_offset);
2873    if (s.location + slot_offset != wc->slot)
2874       return false;
2875    uint32_t readmask = BITFIELD_MASK(intr->num_components) << c;
2876    if (intr->def.bit_size == 64)
2877       readmask |= readmask << (intr->num_components + c);
2878    /* handle dvec3/dvec4 */
2879    if (num_components + c > 4)
2880       readmask >>= 4;
2881    if ((wc->component_mask & readmask) == readmask)
2882       return false;
2883    uint32_t rewrite_mask = readmask & ~wc->component_mask;
2884    if (!rewrite_mask)
2885       return false;
2886    b->cursor = nir_after_instr(&intr->instr);
2887    nir_def *zero = nir_imm_zero(b, intr->def.num_components, intr->def.bit_size);
2888    if (b->shader->info.stage == MESA_SHADER_FRAGMENT) {
2889       switch (wc->slot) {
2890       case VARYING_SLOT_COL0:
2891       case VARYING_SLOT_COL1:
2892       case VARYING_SLOT_BFC0:
2893       case VARYING_SLOT_BFC1:
2894          /* default color is 0,0,0,1 */
2895          if (intr->def.num_components == 4)
2896             zero = nir_vector_insert_imm(b, zero, nir_imm_float(b, 1.0), 3);
2897          break;
2898       default:
2899          break;
2900       }
2901    }
2902    rewrite_mask >>= c;
2903    nir_def *dest = &intr->def;
2904    u_foreach_bit(component, rewrite_mask)
2905       dest = nir_vector_insert_imm(b, dest, nir_channel(b, zero, component), component);
2906    nir_def_rewrite_uses_after(&intr->def, dest, dest->parent_instr);
2907    return true;
2908 }
2909 
2910 static bool
find_max_write_components(nir_builder * b,nir_intrinsic_instr * intr,void * data)2911 find_max_write_components(nir_builder *b, nir_intrinsic_instr *intr, void *data)
2912 {
2913    struct write_components *wc = data;
2914    bool is_load = false;
2915    bool is_input = false;
2916    bool is_interp = false;
2917    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
2918       return false;
2919    if (is_input || is_load)
2920       return false;
2921    nir_io_semantics s = nir_intrinsic_io_semantics(intr);
2922    if (wc->slot < s.location || wc->slot >= s.location + s.num_slots)
2923       return false;
2924    unsigned location = s.location;
2925    unsigned c = nir_intrinsic_component(intr);
2926    uint32_t wrmask = nir_intrinsic_write_mask(intr) << c;
2927    if ((nir_intrinsic_src_type(intr) & NIR_ALU_TYPE_SIZE_MASK) == 64) {
2928       unsigned num_components = intr->num_components * 2;
2929       nir_src *src_offset = nir_get_io_offset_src(intr);
2930       if (nir_src_is_const(*src_offset)) {
2931          if (location + nir_src_as_uint(*src_offset) != wc->slot && num_components + c < 4)
2932             return false;
2933       }
2934       wrmask |= wrmask << intr->num_components;
2935       /* handle dvec3/dvec4 */
2936       if (num_components + c > 4)
2937          wrmask >>= 4;
2938    }
2939    wc->component_mask |= wrmask;
2940    return false;
2941 }
2942 
2943 void
zink_compiler_assign_io(struct zink_screen * screen,nir_shader * producer,nir_shader * consumer)2944 zink_compiler_assign_io(struct zink_screen *screen, nir_shader *producer, nir_shader *consumer)
2945 {
2946    uint64_t slot_track[4] = {0};
2947    uint64_t patch_slot_track[4] = {0};
2948    unsigned char slot_map[VARYING_SLOT_MAX];
2949    memset(slot_map, -1, sizeof(slot_map));
2950    unsigned char patch_slot_map[VARYING_SLOT_MAX];
2951    memset(patch_slot_map, -1, sizeof(patch_slot_map));
2952    struct io_slot_map io = {
2953       .patch_slot_track = patch_slot_track,
2954       .slot_track = slot_track,
2955       .slot_map = slot_map,
2956       .patch_slot_map = patch_slot_map,
2957       .reserved = 0,
2958       .patch_reserved = 0,
2959    };
2960    bool do_fixup = false;
2961    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
2962    nir_variable *var = nir_find_variable_with_location(producer, nir_var_shader_out, VARYING_SLOT_PSIZ);
2963    if (var) {
2964       bool can_remove = false;
2965       if (!nir_find_variable_with_location(consumer, nir_var_shader_in, VARYING_SLOT_PSIZ)) {
2966          /* maintenance5 guarantees "A default size of 1.0 is used if PointSize is not written" */
2967          if (screen->info.have_KHR_maintenance5 && !var->data.explicit_xfb_buffer && delete_psiz_store(producer, true))
2968             can_remove = !(producer->info.outputs_written & VARYING_BIT_PSIZ);
2969          else if (consumer->info.stage != MESA_SHADER_FRAGMENT)
2970             can_remove = !var->data.explicit_location;
2971       }
2972       /* remove injected pointsize from all but the last vertex stage */
2973       if (can_remove) {
2974          var->data.mode = nir_var_shader_temp;
2975          nir_fixup_deref_modes(producer);
2976          delete_psiz_store(producer, false);
2977          NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2978          optimize_nir(producer, NULL, true);
2979       }
2980    }
2981    if (consumer->info.stage != MESA_SHADER_FRAGMENT) {
2982       producer->info.has_transform_feedback_varyings = false;
2983       nir_foreach_shader_out_variable(var_out, producer)
2984          var_out->data.explicit_xfb_buffer = false;
2985    }
2986    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
2987       /* never assign from tcs -> tes, always invert */
2988       nir_foreach_variable_with_modes(var_in, consumer, nir_var_shader_in)
2989          assign_producer_var_io(consumer->info.stage, var_in, &io);
2990       nir_foreach_variable_with_modes_safe(var_out, producer, nir_var_shader_out) {
2991          if (!assign_consumer_var_io(producer->info.stage, var_out, &io))
2992             /* this is an output, nothing more needs to be done for it to be dropped */
2993             do_fixup = true;
2994       }
2995    } else {
2996       nir_foreach_variable_with_modes(var_out, producer, nir_var_shader_out)
2997          assign_producer_var_io(producer->info.stage, var_out, &io);
2998       nir_foreach_variable_with_modes_safe(var_in, consumer, nir_var_shader_in) {
2999          if (!assign_consumer_var_io(consumer->info.stage, var_in, &io)) {
3000             do_fixup = true;
3001             /* input needs to be rewritten */
3002             nir_shader_instructions_pass(consumer, rewrite_read_as_0, nir_metadata_dominance, var_in);
3003          }
3004       }
3005       if (consumer->info.stage == MESA_SHADER_FRAGMENT && screen->driver_compiler_workarounds.needs_sanitised_layer)
3006          do_fixup |= clamp_layer_output(producer, consumer, &io.reserved);
3007    }
3008    nir_shader_gather_info(producer, nir_shader_get_entrypoint(producer));
3009    if (producer->info.io_lowered && consumer->info.io_lowered) {
3010       u_foreach_bit64(slot, producer->info.outputs_written & BITFIELD64_RANGE(VARYING_SLOT_VAR0, 31)) {
3011          struct write_components wc = {slot, 0};
3012          nir_shader_intrinsics_pass(producer, find_max_write_components, nir_metadata_all, &wc);
3013          assert(wc.component_mask);
3014          if (wc.component_mask != BITFIELD_MASK(4))
3015             do_fixup |= nir_shader_intrinsics_pass(consumer, fill_zero_reads, nir_metadata_dominance, &wc);
3016       }
3017    }
3018    if (!do_fixup)
3019       return;
3020    nir_fixup_deref_modes(nir);
3021    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3022    optimize_nir(nir, NULL, true);
3023 }
3024 
3025 /* all types that hit this function contain something that is 64bit */
3026 static const struct glsl_type *
rewrite_64bit_type(nir_shader * nir,const struct glsl_type * type,nir_variable * var,bool doubles_only)3027 rewrite_64bit_type(nir_shader *nir, const struct glsl_type *type, nir_variable *var, bool doubles_only)
3028 {
3029    if (glsl_type_is_array(type)) {
3030       const struct glsl_type *child = glsl_get_array_element(type);
3031       unsigned elements = glsl_array_size(type);
3032       unsigned stride = glsl_get_explicit_stride(type);
3033       return glsl_array_type(rewrite_64bit_type(nir, child, var, doubles_only), elements, stride);
3034    }
3035    /* rewrite structs recursively */
3036    if (glsl_type_is_struct_or_ifc(type)) {
3037       unsigned nmembers = glsl_get_length(type);
3038       struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, nmembers * 2);
3039       unsigned xfb_offset = 0;
3040       for (unsigned i = 0; i < nmembers; i++) {
3041          const struct glsl_struct_field *f = glsl_get_struct_field_data(type, i);
3042          fields[i] = *f;
3043          xfb_offset += glsl_get_component_slots(fields[i].type) * 4;
3044          if (i < nmembers - 1 && xfb_offset % 8 &&
3045              (glsl_contains_double(glsl_get_struct_field(type, i + 1)) ||
3046               (glsl_type_contains_64bit(glsl_get_struct_field(type, i + 1)) && !doubles_only))) {
3047             var->data.is_xfb = true;
3048          }
3049          fields[i].type = rewrite_64bit_type(nir, f->type, var, doubles_only);
3050       }
3051       return glsl_struct_type(fields, nmembers, glsl_get_type_name(type), glsl_struct_type_is_packed(type));
3052    }
3053    if (!glsl_type_is_64bit(type) || (!glsl_contains_double(type) && doubles_only))
3054       return type;
3055    if (doubles_only && glsl_type_is_vector_or_scalar(type))
3056       return glsl_vector_type(GLSL_TYPE_UINT64, glsl_get_vector_elements(type));
3057    enum glsl_base_type base_type;
3058    switch (glsl_get_base_type(type)) {
3059    case GLSL_TYPE_UINT64:
3060       base_type = GLSL_TYPE_UINT;
3061       break;
3062    case GLSL_TYPE_INT64:
3063       base_type = GLSL_TYPE_INT;
3064       break;
3065    case GLSL_TYPE_DOUBLE:
3066       base_type = GLSL_TYPE_FLOAT;
3067       break;
3068    default:
3069       unreachable("unknown 64-bit vertex attribute format!");
3070    }
3071    if (glsl_type_is_scalar(type))
3072       return glsl_vector_type(base_type, 2);
3073    unsigned num_components;
3074    if (glsl_type_is_matrix(type)) {
3075       /* align to vec4 size: dvec3-composed arrays are arrays of dvec3s */
3076       unsigned vec_components = glsl_get_vector_elements(type);
3077       if (vec_components == 3)
3078          vec_components = 4;
3079       num_components = vec_components * 2 * glsl_get_matrix_columns(type);
3080    } else {
3081       num_components = glsl_get_vector_elements(type) * 2;
3082       if (num_components <= 4)
3083          return glsl_vector_type(base_type, num_components);
3084    }
3085    /* dvec3/dvec4/dmatX: rewrite as struct { vec4, vec4, vec4, ... [vec2] } */
3086    struct glsl_struct_field fields[8] = {0};
3087    unsigned remaining = num_components;
3088    unsigned nfields = 0;
3089    for (unsigned i = 0; remaining; i++, remaining -= MIN2(4, remaining), nfields++) {
3090       assert(i < ARRAY_SIZE(fields));
3091       fields[i].name = "";
3092       fields[i].offset = i * 16;
3093       fields[i].type = glsl_vector_type(base_type, MIN2(4, remaining));
3094    }
3095    char buf[64];
3096    snprintf(buf, sizeof(buf), "struct(%s)", glsl_get_type_name(type));
3097    return glsl_struct_type(fields, nfields, buf, true);
3098 }
3099 
3100 static const struct glsl_type *
deref_is_matrix(nir_deref_instr * deref)3101 deref_is_matrix(nir_deref_instr *deref)
3102 {
3103    if (glsl_type_is_matrix(deref->type))
3104       return deref->type;
3105    nir_deref_instr *parent = nir_deref_instr_parent(deref);
3106    if (parent)
3107       return deref_is_matrix(parent);
3108    return NULL;
3109 }
3110 
3111 static bool
lower_64bit_vars_function(nir_shader * shader,nir_function_impl * impl,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3112 lower_64bit_vars_function(nir_shader *shader, nir_function_impl *impl, nir_variable *var,
3113                           struct hash_table *derefs, struct set *deletes, bool doubles_only)
3114 {
3115    bool func_progress = false;
3116    nir_builder b = nir_builder_create(impl);
3117    nir_foreach_block(block, impl) {
3118       nir_foreach_instr_safe(instr, block) {
3119          switch (instr->type) {
3120          case nir_instr_type_deref: {
3121             nir_deref_instr *deref = nir_instr_as_deref(instr);
3122             if (!(deref->modes & var->data.mode))
3123                continue;
3124             if (nir_deref_instr_get_variable(deref) != var)
3125                continue;
3126 
3127             /* matrix types are special: store the original deref type for later use */
3128             const struct glsl_type *matrix = deref_is_matrix(deref);
3129             nir_deref_instr *parent = nir_deref_instr_parent(deref);
3130             if (!matrix) {
3131                /* if this isn't a direct matrix deref, it's maybe a matrix row deref */
3132                hash_table_foreach(derefs, he) {
3133                   /* propagate parent matrix type to row deref */
3134                   if (he->key == parent)
3135                      matrix = he->data;
3136                }
3137             }
3138             if (matrix)
3139                _mesa_hash_table_insert(derefs, deref, (void*)matrix);
3140             if (deref->deref_type == nir_deref_type_var)
3141                deref->type = var->type;
3142             else
3143                deref->type = rewrite_64bit_type(shader, deref->type, var, doubles_only);
3144          }
3145          break;
3146          case nir_instr_type_intrinsic: {
3147             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
3148             if (intr->intrinsic != nir_intrinsic_store_deref &&
3149                   intr->intrinsic != nir_intrinsic_load_deref)
3150                break;
3151             if (nir_intrinsic_get_var(intr, 0) != var)
3152                break;
3153             if ((intr->intrinsic == nir_intrinsic_store_deref && intr->src[1].ssa->bit_size != 64) ||
3154                   (intr->intrinsic == nir_intrinsic_load_deref && intr->def.bit_size != 64))
3155                break;
3156             b.cursor = nir_before_instr(instr);
3157             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
3158             unsigned num_components = intr->num_components * 2;
3159             nir_def *comp[NIR_MAX_VEC_COMPONENTS];
3160             /* this is the stored matrix type from the deref */
3161             struct hash_entry *he = _mesa_hash_table_search(derefs, deref);
3162             const struct glsl_type *matrix = he ? he->data : NULL;
3163             if (doubles_only && !matrix)
3164                break;
3165             func_progress = true;
3166             if (intr->intrinsic == nir_intrinsic_store_deref) {
3167                /* first, unpack the src data to 32bit vec2 components */
3168                for (unsigned i = 0; i < intr->num_components; i++) {
3169                   nir_def *ssa = nir_unpack_64_2x32(&b, nir_channel(&b, intr->src[1].ssa, i));
3170                   comp[i * 2] = nir_channel(&b, ssa, 0);
3171                   comp[i * 2 + 1] = nir_channel(&b, ssa, 1);
3172                }
3173                unsigned wrmask = nir_intrinsic_write_mask(intr);
3174                unsigned mask = 0;
3175                /* expand writemask for doubled components */
3176                for (unsigned i = 0; i < intr->num_components; i++) {
3177                   if (wrmask & BITFIELD_BIT(i))
3178                      mask |= BITFIELD_BIT(i * 2) | BITFIELD_BIT(i * 2 + 1);
3179                }
3180                if (matrix) {
3181                   /* matrix types always come from array (row) derefs */
3182                   assert(deref->deref_type == nir_deref_type_array);
3183                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3184                   /* let optimization clean up consts later */
3185                   nir_def *index = deref->arr.index.ssa;
3186                   /* this might be an indirect array index:
3187                      * - iterate over matrix columns
3188                      * - add if blocks for each column
3189                      * - perform the store in the block
3190                      */
3191                   for (unsigned idx = 0; idx < glsl_get_matrix_columns(matrix); idx++) {
3192                      nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3193                      unsigned vec_components = glsl_get_vector_elements(matrix);
3194                      /* always clamp dvec3 to 4 components */
3195                      if (vec_components == 3)
3196                         vec_components = 4;
3197                      unsigned start_component = idx * vec_components * 2;
3198                      /* struct member */
3199                      unsigned member = start_component / 4;
3200                      /* number of components remaining */
3201                      unsigned remaining = num_components;
3202                      for (unsigned i = 0; i < num_components; member++) {
3203                         if (!(mask & BITFIELD_BIT(i)))
3204                            continue;
3205                         assert(member < glsl_get_length(var_deref->type));
3206                         /* deref the rewritten struct to the appropriate vec4/vec2 */
3207                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3208                         unsigned incr = MIN2(remaining, 4);
3209                         /* assemble the write component vec */
3210                         nir_def *val = nir_vec(&b, &comp[i], incr);
3211                         /* use the number of components being written as the writemask */
3212                         if (glsl_get_vector_elements(strct->type) > val->num_components)
3213                            val = nir_pad_vector(&b, val, glsl_get_vector_elements(strct->type));
3214                         nir_store_deref(&b, strct, val, BITFIELD_MASK(incr));
3215                         remaining -= incr;
3216                         i += incr;
3217                      }
3218                      nir_pop_if(&b, NULL);
3219                   }
3220                   _mesa_set_add(deletes, &deref->instr);
3221                } else if (num_components <= 4) {
3222                   /* simple store case: just write out the components */
3223                   nir_def *dest = nir_vec(&b, comp, num_components);
3224                   nir_store_deref(&b, deref, dest, mask);
3225                } else {
3226                   /* writing > 4 components: access the struct and write to the appropriate vec4 members */
3227                   for (unsigned i = 0; num_components; i++, num_components -= MIN2(num_components, 4)) {
3228                      if (!(mask & BITFIELD_MASK(4)))
3229                         continue;
3230                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3231                      nir_def *dest = nir_vec(&b, &comp[i * 4], MIN2(num_components, 4));
3232                      if (glsl_get_vector_elements(strct->type) > dest->num_components)
3233                         dest = nir_pad_vector(&b, dest, glsl_get_vector_elements(strct->type));
3234                      nir_store_deref(&b, strct, dest, mask & BITFIELD_MASK(4));
3235                      mask >>= 4;
3236                   }
3237                }
3238             } else {
3239                nir_def *dest = NULL;
3240                if (matrix) {
3241                   /* matrix types always come from array (row) derefs */
3242                   assert(deref->deref_type == nir_deref_type_array);
3243                   nir_deref_instr *var_deref = nir_deref_instr_parent(deref);
3244                   /* let optimization clean up consts later */
3245                   nir_def *index = deref->arr.index.ssa;
3246                   /* this might be an indirect array index:
3247                      * - iterate over matrix columns
3248                      * - add if blocks for each column
3249                      * - phi the loads using the array index
3250                      */
3251                   unsigned cols = glsl_get_matrix_columns(matrix);
3252                   nir_def *dests[4];
3253                   for (unsigned idx = 0; idx < cols; idx++) {
3254                      /* don't add an if for the final row: this will be handled in the else */
3255                      if (idx < cols - 1)
3256                         nir_push_if(&b, nir_ieq_imm(&b, index, idx));
3257                      unsigned vec_components = glsl_get_vector_elements(matrix);
3258                      /* always clamp dvec3 to 4 components */
3259                      if (vec_components == 3)
3260                         vec_components = 4;
3261                      unsigned start_component = idx * vec_components * 2;
3262                      /* struct member */
3263                      unsigned member = start_component / 4;
3264                      /* number of components remaining */
3265                      unsigned remaining = num_components;
3266                      /* component index */
3267                      unsigned comp_idx = 0;
3268                      for (unsigned i = 0; i < num_components; member++) {
3269                         assert(member < glsl_get_length(var_deref->type));
3270                         nir_deref_instr *strct = nir_build_deref_struct(&b, var_deref, member);
3271                         nir_def *load = nir_load_deref(&b, strct);
3272                         unsigned incr = MIN2(remaining, 4);
3273                         /* repack the loads to 64bit */
3274                         for (unsigned c = 0; c < incr / 2; c++, comp_idx++)
3275                            comp[comp_idx] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(c * 2, 2)));
3276                         remaining -= incr;
3277                         i += incr;
3278                      }
3279                      dest = dests[idx] = nir_vec(&b, comp, intr->num_components);
3280                      if (idx < cols - 1)
3281                         nir_push_else(&b, NULL);
3282                   }
3283                   /* loop over all the if blocks that were made, pop them, and phi the loaded+packed results */
3284                   for (unsigned idx = cols - 1; idx >= 1; idx--) {
3285                      nir_pop_if(&b, NULL);
3286                      dest = nir_if_phi(&b, dests[idx - 1], dest);
3287                   }
3288                   _mesa_set_add(deletes, &deref->instr);
3289                } else if (num_components <= 4) {
3290                   /* simple load case */
3291                   nir_def *load = nir_load_deref(&b, deref);
3292                   /* pack 32bit loads into 64bit: this will automagically get optimized out later */
3293                   for (unsigned i = 0; i < intr->num_components; i++) {
3294                      comp[i] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(i * 2, 2)));
3295                   }
3296                   dest = nir_vec(&b, comp, intr->num_components);
3297                } else {
3298                   /* writing > 4 components: access the struct and load the appropriate vec4 members */
3299                   for (unsigned i = 0; i < 2; i++, num_components -= 4) {
3300                      nir_deref_instr *strct = nir_build_deref_struct(&b, deref, i);
3301                      nir_def *load = nir_load_deref(&b, strct);
3302                      comp[i * 2] = nir_pack_64_2x32(&b,
3303                                                     nir_trim_vector(&b, load, 2));
3304                      if (num_components > 2)
3305                         comp[i * 2 + 1] = nir_pack_64_2x32(&b, nir_channels(&b, load, BITFIELD_RANGE(2, 2)));
3306                   }
3307                   dest = nir_vec(&b, comp, intr->num_components);
3308                }
3309                nir_def_rewrite_uses_after(&intr->def, dest, instr);
3310             }
3311             _mesa_set_add(deletes, instr);
3312             break;
3313          }
3314          break;
3315          default: break;
3316          }
3317       }
3318    }
3319    if (func_progress)
3320       nir_metadata_preserve(impl, nir_metadata_none);
3321    /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
3322    set_foreach_remove(deletes, he)
3323       nir_instr_remove((void*)he->key);
3324    return func_progress;
3325 }
3326 
3327 static bool
lower_64bit_vars_loop(nir_shader * shader,nir_variable * var,struct hash_table * derefs,struct set * deletes,bool doubles_only)3328 lower_64bit_vars_loop(nir_shader *shader, nir_variable *var, struct hash_table *derefs,
3329                       struct set *deletes, bool doubles_only)
3330 {
3331    if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3332       return false;
3333    var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3334    /* once type is rewritten, rewrite all loads and stores */
3335    nir_foreach_function_impl(impl, shader)
3336       lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3337    return true;
3338 }
3339 
3340 /* rewrite all input/output variables using 32bit types and load/stores */
3341 static bool
lower_64bit_vars(nir_shader * shader,bool doubles_only)3342 lower_64bit_vars(nir_shader *shader, bool doubles_only)
3343 {
3344    bool progress = false;
3345    struct hash_table *derefs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3346    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
3347    nir_foreach_function_impl(impl, shader) {
3348       nir_foreach_function_temp_variable(var, impl) {
3349          if (!glsl_type_contains_64bit(var->type) || (doubles_only && !glsl_contains_double(var->type)))
3350             continue;
3351          var->type = rewrite_64bit_type(shader, var->type, var, doubles_only);
3352          progress |= lower_64bit_vars_function(shader, impl, var, derefs, deletes, doubles_only);
3353       }
3354    }
3355    ralloc_free(deletes);
3356    ralloc_free(derefs);
3357    if (progress) {
3358       nir_lower_alu_to_scalar(shader, filter_64_bit_instr, NULL);
3359       nir_lower_phis_to_scalar(shader, false);
3360       optimize_nir(shader, NULL, true);
3361    }
3362    return progress;
3363 }
3364 
3365 static void
zink_shader_dump(const struct zink_shader * zs,void * words,size_t size,const char * file)3366 zink_shader_dump(const struct zink_shader *zs, void *words, size_t size, const char *file)
3367 {
3368    FILE *fp = fopen(file, "wb");
3369    if (fp) {
3370       fwrite(words, 1, size, fp);
3371       fclose(fp);
3372       fprintf(stderr, "wrote %s shader '%s'...\n", _mesa_shader_stage_to_string(zs->info.stage), file);
3373    }
3374 }
3375 
3376 static VkShaderStageFlagBits
zink_get_next_stage(gl_shader_stage stage)3377 zink_get_next_stage(gl_shader_stage stage)
3378 {
3379    switch (stage) {
3380    case MESA_SHADER_VERTEX:
3381       return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
3382              VK_SHADER_STAGE_GEOMETRY_BIT |
3383              VK_SHADER_STAGE_FRAGMENT_BIT;
3384    case MESA_SHADER_TESS_CTRL:
3385       return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
3386    case MESA_SHADER_TESS_EVAL:
3387       return VK_SHADER_STAGE_GEOMETRY_BIT |
3388              VK_SHADER_STAGE_FRAGMENT_BIT;
3389    case MESA_SHADER_GEOMETRY:
3390       return VK_SHADER_STAGE_FRAGMENT_BIT;
3391    case MESA_SHADER_FRAGMENT:
3392    case MESA_SHADER_COMPUTE:
3393    case MESA_SHADER_KERNEL:
3394       return 0;
3395    default:
3396       unreachable("invalid shader stage");
3397    }
3398 }
3399 
3400 struct zink_shader_object
zink_shader_spirv_compile(struct zink_screen * screen,struct zink_shader * zs,struct spirv_shader * spirv,bool can_shobj,struct zink_program * pg)3401 zink_shader_spirv_compile(struct zink_screen *screen, struct zink_shader *zs, struct spirv_shader *spirv, bool can_shobj, struct zink_program *pg)
3402 {
3403    VkShaderModuleCreateInfo smci = {0};
3404    VkShaderCreateInfoEXT sci = {0};
3405 
3406    if (!spirv)
3407       spirv = zs->spirv;
3408 
3409    if (zink_debug & ZINK_DEBUG_SPIRV) {
3410       char buf[256];
3411       static int i;
3412       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
3413       zink_shader_dump(zs, spirv->words, spirv->num_words * sizeof(uint32_t), buf);
3414    }
3415 
3416    sci.sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT;
3417    sci.stage = mesa_to_vk_shader_stage(zs->info.stage);
3418    sci.nextStage = zink_get_next_stage(zs->info.stage);
3419    sci.codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT;
3420    sci.codeSize = spirv->num_words * sizeof(uint32_t);
3421    sci.pCode = spirv->words;
3422    sci.pName = "main";
3423    VkDescriptorSetLayout dsl[ZINK_GFX_SHADER_COUNT] = {0};
3424    if (pg) {
3425       sci.setLayoutCount = pg->num_dsl;
3426       sci.pSetLayouts = pg->dsl;
3427    } else {
3428       sci.setLayoutCount = zs->info.stage + 1;
3429       dsl[zs->info.stage] = zs->precompile.dsl;;
3430       sci.pSetLayouts = dsl;
3431    }
3432    VkPushConstantRange pcr;
3433    pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS;
3434    pcr.offset = 0;
3435    pcr.size = sizeof(struct zink_gfx_push_constant);
3436    sci.pushConstantRangeCount = 1;
3437    sci.pPushConstantRanges = &pcr;
3438 
3439    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
3440    smci.codeSize = spirv->num_words * sizeof(uint32_t);
3441    smci.pCode = spirv->words;
3442 
3443 #ifndef NDEBUG
3444    if (zink_debug & ZINK_DEBUG_VALIDATION) {
3445       static const struct spirv_to_nir_options spirv_options = {
3446          .environment = NIR_SPIRV_VULKAN,
3447          .capabilities = NULL,
3448          .ubo_addr_format = nir_address_format_32bit_index_offset,
3449          .ssbo_addr_format = nir_address_format_32bit_index_offset,
3450          .phys_ssbo_addr_format = nir_address_format_64bit_global,
3451          .push_const_addr_format = nir_address_format_logical,
3452          .shared_addr_format = nir_address_format_32bit_offset,
3453       };
3454       uint32_t num_spec_entries = 0;
3455       struct nir_spirv_specialization *spec_entries = NULL;
3456       VkSpecializationInfo sinfo = {0};
3457       VkSpecializationMapEntry me[3];
3458       uint32_t size[3] = {1,1,1};
3459       if (!zs->info.workgroup_size[0]) {
3460          sinfo.mapEntryCount = 3;
3461          sinfo.pMapEntries = &me[0];
3462          sinfo.dataSize = sizeof(uint32_t) * 3;
3463          sinfo.pData = size;
3464          uint32_t ids[] = {ZINK_WORKGROUP_SIZE_X, ZINK_WORKGROUP_SIZE_Y, ZINK_WORKGROUP_SIZE_Z};
3465          for (int i = 0; i < 3; i++) {
3466             me[i].size = sizeof(uint32_t);
3467             me[i].constantID = ids[i];
3468             me[i].offset = i * sizeof(uint32_t);
3469          }
3470          spec_entries = vk_spec_info_to_nir_spirv(&sinfo, &num_spec_entries);
3471       }
3472       nir_shader *nir = spirv_to_nir(spirv->words, spirv->num_words,
3473                          spec_entries, num_spec_entries,
3474                          clamp_stage(&zs->info), "main", &spirv_options, &screen->nir_options);
3475       assert(nir);
3476       ralloc_free(nir);
3477       free(spec_entries);
3478    }
3479 #endif
3480 
3481    VkResult ret;
3482    struct zink_shader_object obj = {0};
3483    if (!can_shobj || !screen->info.have_EXT_shader_object)
3484       ret = VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &obj.mod);
3485    else
3486       ret = VKSCR(CreateShadersEXT)(screen->dev, 1, &sci, NULL, &obj.obj);
3487    ASSERTED bool success = zink_screen_handle_vkresult(screen, ret);
3488    assert(success);
3489    return obj;
3490 }
3491 
3492 static void
prune_io(nir_shader * nir)3493 prune_io(nir_shader *nir)
3494 {
3495    nir_foreach_shader_in_variable_safe(var, nir) {
3496       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3497          var->data.mode = nir_var_shader_temp;
3498    }
3499    nir_foreach_shader_out_variable_safe(var, nir) {
3500       if (!find_var_deref(nir, var) && !find_var_io(nir, var))
3501          var->data.mode = nir_var_shader_temp;
3502    }
3503    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
3504 }
3505 
3506 static void
flag_shadow_tex(nir_variable * var,struct zink_shader * zs)3507 flag_shadow_tex(nir_variable *var, struct zink_shader *zs)
3508 {
3509    assert(var->data.driver_location < 32); //bitfield size for tracking
3510    zs->fs.legacy_shadow_mask |= BITFIELD_BIT(var->data.driver_location);
3511 }
3512 
3513 static void
flag_shadow_tex_instr(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3514 flag_shadow_tex_instr(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3515 {
3516    assert(var);
3517    unsigned num_components = tex->def.num_components;
3518    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3519    if (rewrite_depth && nir_def_components_read( &tex->def) & ~1) {
3520       /* this needs recompiles */
3521       if (b->shader->info.stage == MESA_SHADER_FRAGMENT)
3522          flag_shadow_tex(var, zs);
3523       else
3524          mesa_loge("unhandled old-style shadow sampler in non-fragment stage!");
3525    }
3526 }
3527 
3528 static nir_def *
rewrite_tex_dest(nir_builder * b,nir_tex_instr * tex,nir_variable * var,struct zink_shader * zs)3529 rewrite_tex_dest(nir_builder *b, nir_tex_instr *tex, nir_variable *var, struct zink_shader *zs)
3530 {
3531    assert(var);
3532    const struct glsl_type *type = glsl_without_array(var->type);
3533    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3534    bool is_int = glsl_base_type_is_integer(ret_type);
3535    unsigned bit_size = glsl_base_type_get_bit_size(ret_type);
3536    unsigned dest_size = tex->def.bit_size;
3537    b->cursor = nir_after_instr(&tex->instr);
3538    unsigned num_components = tex->def.num_components;
3539    bool rewrite_depth = tex->is_shadow && num_components > 1 && tex->op != nir_texop_tg4 && !tex->is_sparse;
3540    if (bit_size == dest_size && !rewrite_depth)
3541       return NULL;
3542    nir_def *dest = &tex->def;
3543    if (rewrite_depth && zs) {
3544       if (nir_def_components_read(dest) & ~1) {
3545          /* handled above */
3546          return NULL;
3547       }
3548       /* If only .x is used in the NIR, then it's effectively not a legacy depth
3549        * sample anyway and we don't want to ask for shader recompiles.  This is
3550        * the typical path, since GL_DEPTH_TEXTURE_MODE defaults to either RED or
3551        * LUMINANCE, so apps just use the first channel.
3552        */
3553       tex->def.num_components = 1;
3554       tex->is_new_style_shadow = true;
3555    }
3556    if (bit_size != dest_size) {
3557       tex->def.bit_size = bit_size;
3558       tex->dest_type = nir_get_nir_type_for_glsl_base_type(ret_type);
3559 
3560       if (is_int) {
3561          if (glsl_unsigned_base_type_of(ret_type) == ret_type)
3562             dest = nir_u2uN(b, &tex->def, dest_size);
3563          else
3564             dest = nir_i2iN(b, &tex->def, dest_size);
3565       } else {
3566          dest = nir_f2fN(b, &tex->def, dest_size);
3567       }
3568       if (!rewrite_depth)
3569          nir_def_rewrite_uses_after(&tex->def, dest, dest->parent_instr);
3570    }
3571    return dest;
3572 }
3573 
3574 struct lower_zs_swizzle_state {
3575    bool shadow_only;
3576    unsigned base_sampler_id;
3577    const struct zink_zs_swizzle_key *swizzle;
3578 };
3579 
3580 static bool
lower_zs_swizzle_tex_instr(nir_builder * b,nir_instr * instr,void * data)3581 lower_zs_swizzle_tex_instr(nir_builder *b, nir_instr *instr, void *data)
3582 {
3583    struct lower_zs_swizzle_state *state = data;
3584    const struct zink_zs_swizzle_key *swizzle_key = state->swizzle;
3585    assert(state->shadow_only || swizzle_key);
3586    if (instr->type != nir_instr_type_tex)
3587       return false;
3588    nir_tex_instr *tex = nir_instr_as_tex(instr);
3589    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod ||
3590        (!tex->is_shadow && state->shadow_only) || tex->is_new_style_shadow)
3591       return false;
3592    if (tex->is_shadow && tex->op == nir_texop_tg4)
3593       /* Will not even try to emulate the shadow comparison */
3594       return false;
3595    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
3596    nir_variable *var = NULL;
3597    if (handle != -1)
3598       /* gtfo bindless depth texture mode */
3599       return false;
3600    var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
3601    assert(var);
3602    uint32_t sampler_id = var->data.binding - state->base_sampler_id;
3603    const struct glsl_type *type = glsl_without_array(var->type);
3604    enum glsl_base_type ret_type = glsl_get_sampler_result_type(type);
3605    bool is_int = glsl_base_type_is_integer(ret_type);
3606    unsigned num_components = tex->def.num_components;
3607    if (tex->is_shadow)
3608       tex->is_new_style_shadow = true;
3609    nir_def *dest = rewrite_tex_dest(b, tex, var, NULL);
3610    assert(dest || !state->shadow_only);
3611    if (!dest && !(swizzle_key->mask & BITFIELD_BIT(sampler_id)))
3612       return false;
3613    else if (!dest)
3614       dest = &tex->def;
3615    else
3616       tex->def.num_components = 1;
3617    if (swizzle_key && (swizzle_key->mask & BITFIELD_BIT(sampler_id))) {
3618       /* these require manual swizzles */
3619       if (tex->op == nir_texop_tg4) {
3620          assert(!tex->is_shadow);
3621          nir_def *swizzle;
3622          switch (swizzle_key->swizzle[sampler_id].s[tex->component]) {
3623          case PIPE_SWIZZLE_0:
3624             swizzle = nir_imm_zero(b, 4, tex->def.bit_size);
3625             break;
3626          case PIPE_SWIZZLE_1:
3627             if (is_int)
3628                swizzle = nir_imm_intN_t(b, 4, tex->def.bit_size);
3629             else
3630                swizzle = nir_imm_floatN_t(b, 4, tex->def.bit_size);
3631             break;
3632          default:
3633             if (!tex->component)
3634                return false;
3635             tex->component = 0;
3636             return true;
3637          }
3638          nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3639          return true;
3640       }
3641       nir_def *vec[4];
3642       for (unsigned i = 0; i < ARRAY_SIZE(vec); i++) {
3643          switch (swizzle_key->swizzle[sampler_id].s[i]) {
3644          case PIPE_SWIZZLE_0:
3645             vec[i] = nir_imm_zero(b, 1, tex->def.bit_size);
3646             break;
3647          case PIPE_SWIZZLE_1:
3648             if (is_int)
3649                vec[i] = nir_imm_intN_t(b, 1, tex->def.bit_size);
3650             else
3651                vec[i] = nir_imm_floatN_t(b, 1, tex->def.bit_size);
3652             break;
3653          default:
3654             vec[i] = dest->num_components == 1 ? dest : nir_channel(b, dest, i);
3655             break;
3656          }
3657       }
3658       nir_def *swizzle = nir_vec(b, vec, num_components);
3659       nir_def_rewrite_uses_after(dest, swizzle, swizzle->parent_instr);
3660    } else {
3661       assert(tex->is_shadow);
3662       nir_def *vec[4] = {dest, dest, dest, dest};
3663       nir_def *splat = nir_vec(b, vec, num_components);
3664       nir_def_rewrite_uses_after(dest, splat, splat->parent_instr);
3665    }
3666    return true;
3667 }
3668 
3669 /* Applies in-shader swizzles when necessary for depth/shadow sampling.
3670  *
3671  * SPIRV only has new-style (scalar result) shadow sampling, so to emulate
3672  * !is_new_style_shadow (vec4 result) shadow sampling we lower to a
3673  * new-style-shadow sample, and apply GL_DEPTH_TEXTURE_MODE swizzles in the NIR
3674  * shader to expand out to vec4.  Since this depends on sampler state, it's a
3675  * draw-time shader recompile to do so.
3676  *
3677  * We may also need to apply shader swizzles for
3678  * driver_compiler_workarounds.needs_zs_shader_swizzle.
3679  */
3680 static bool
lower_zs_swizzle_tex(nir_shader * nir,const void * swizzle,bool shadow_only)3681 lower_zs_swizzle_tex(nir_shader *nir, const void *swizzle, bool shadow_only)
3682 {
3683    /* We don't use nir_lower_tex to do our swizzling, because of this base_sampler_id. */
3684    unsigned base_sampler_id = gl_shader_stage_is_compute(nir->info.stage) ? 0 : PIPE_MAX_SAMPLERS * nir->info.stage;
3685    struct lower_zs_swizzle_state state = {shadow_only, base_sampler_id, swizzle};
3686    return nir_shader_instructions_pass(nir, lower_zs_swizzle_tex_instr,
3687                                        nir_metadata_control_flow,
3688                                        (void*)&state);
3689 }
3690 
3691 static bool
invert_point_coord_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3692 invert_point_coord_instr(nir_builder *b, nir_intrinsic_instr *intr,
3693                          void *data)
3694 {
3695    if (intr->intrinsic != nir_intrinsic_load_point_coord)
3696       return false;
3697    b->cursor = nir_after_instr(&intr->instr);
3698    nir_def *def = nir_vec2(b, nir_channel(b, &intr->def, 0),
3699                                   nir_fsub_imm(b, 1.0, nir_channel(b, &intr->def, 1)));
3700    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
3701    return true;
3702 }
3703 
3704 static bool
invert_point_coord(nir_shader * nir)3705 invert_point_coord(nir_shader *nir)
3706 {
3707    if (!BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD))
3708       return false;
3709    return nir_shader_intrinsics_pass(nir, invert_point_coord_instr,
3710                                      nir_metadata_dominance, NULL);
3711 }
3712 
3713 static bool
lower_sparse_instr(nir_builder * b,nir_instr * instr,void * data)3714 lower_sparse_instr(nir_builder *b, nir_instr *instr, void *data)
3715 {
3716    b->cursor = nir_after_instr(instr);
3717 
3718    switch (instr->type) {
3719    case nir_instr_type_tex: {
3720       nir_tex_instr *tex = nir_instr_as_tex(instr);
3721       if (!tex->is_sparse)
3722          return false;
3723 
3724       nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &tex->def));
3725       nir_def *vec = nir_vector_insert_imm(b, &tex->def, res,
3726                                            tex->def.num_components - 1);
3727       nir_def_rewrite_uses_after(&tex->def, vec, vec->parent_instr);
3728       return true;
3729    }
3730 
3731    case nir_instr_type_intrinsic: {
3732       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3733       switch (intrin->intrinsic) {
3734       case nir_intrinsic_image_deref_sparse_load: {
3735          nir_def *res = nir_b2i32(b, nir_is_sparse_resident_zink(b, &intrin->def));
3736          nir_def *vec = nir_vector_insert_imm(b, &intrin->def, res, 4);
3737          nir_def_rewrite_uses_after(&intrin->def, vec, vec->parent_instr);
3738          return true;
3739       }
3740 
3741       case nir_intrinsic_sparse_residency_code_and: {
3742          nir_def *res = nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa);
3743          nir_def_rewrite_uses(&intrin->def, res);
3744          return true;
3745       }
3746 
3747       case nir_intrinsic_is_sparse_texels_resident: {
3748          nir_def *res = nir_i2b(b, intrin->src[0].ssa);
3749          nir_def_rewrite_uses(&intrin->def, res);
3750          return true;
3751       }
3752 
3753       default:
3754          return false;
3755       }
3756    }
3757 
3758    default:
3759       return false;
3760    }
3761 }
3762 
3763 static bool
lower_sparse(nir_shader * shader)3764 lower_sparse(nir_shader *shader)
3765 {
3766    return nir_shader_instructions_pass(shader, lower_sparse_instr,
3767                                        nir_metadata_dominance, NULL);
3768 }
3769 
3770 static bool
add_derefs_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)3771 add_derefs_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
3772 {
3773    bool is_load = false;
3774    bool is_input = false;
3775    bool is_interp = false;
3776    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
3777       return false;
3778    bool is_special_io = (b->shader->info.stage == MESA_SHADER_VERTEX && is_input) ||
3779                         (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_input);
3780    unsigned loc = nir_intrinsic_io_semantics(intr).location;
3781    nir_src *src_offset = nir_get_io_offset_src(intr);
3782    const unsigned slot_offset = src_offset && nir_src_is_const(*src_offset) ? nir_src_as_uint(*src_offset) : 0;
3783    unsigned location = loc + slot_offset;
3784    unsigned frac = nir_intrinsic_component(intr);
3785    unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
3786    /* set c aligned/rounded down to dword */
3787    unsigned c = frac;
3788    if (frac && bit_size < 32)
3789       c = frac * bit_size / 32;
3790    /* loop over all the variables and rewrite corresponding access */
3791    nir_foreach_variable_with_modes(var, b->shader, is_input ? nir_var_shader_in : nir_var_shader_out) {
3792       const struct glsl_type *type = var->type;
3793       if (nir_is_arrayed_io(var, b->shader->info.stage))
3794          type = glsl_get_array_element(type);
3795       unsigned slot_count = get_var_slot_count(b->shader, var);
3796       /* filter access that isn't specific to this variable */
3797       if (var->data.location > location || var->data.location + slot_count <= location)
3798          continue;
3799       if (var->data.fb_fetch_output != nir_intrinsic_io_semantics(intr).fb_fetch_output)
3800          continue;
3801       if (b->shader->info.stage == MESA_SHADER_FRAGMENT && !is_load && nir_intrinsic_io_semantics(intr).dual_source_blend_index != var->data.index)
3802          continue;
3803 
3804       unsigned size = 0;
3805       bool is_struct = glsl_type_is_struct(glsl_without_array(type));
3806       if (is_struct)
3807          size = get_slot_components(var, var->data.location + slot_offset, var->data.location);
3808       else if (!is_special_io && var->data.compact)
3809          size = glsl_get_aoa_size(type);
3810       else
3811          size = glsl_get_vector_elements(glsl_without_array(type));
3812       assert(size);
3813       if (glsl_type_is_64bit(glsl_without_array(var->type)))
3814          size *= 2;
3815       if (var->data.location != location && size > 4 && size % 4 && !is_struct) {
3816          /* adjust for dvec3-type slot overflow */
3817          assert(location > var->data.location);
3818          size -= (location - var->data.location) * 4;
3819       }
3820       assert(size);
3821       if (var->data.location_frac + size <= c || var->data.location_frac > c)
3822          continue;
3823 
3824       b->cursor = nir_before_instr(&intr->instr);
3825       nir_deref_instr *deref = nir_build_deref_var(b, var);
3826       if (nir_is_arrayed_io(var, b->shader->info.stage)) {
3827          assert(intr->intrinsic != nir_intrinsic_store_output);
3828          deref = nir_build_deref_array(b, deref, intr->src[!is_load].ssa);
3829       }
3830       if (glsl_type_is_array(type)) {
3831          /* unroll array derefs */
3832          unsigned idx = var->data.compact ? (frac - var->data.location_frac) : 0;
3833          assert(src_offset);
3834          if (var->data.location < VARYING_SLOT_VAR0) {
3835             if (src_offset) {
3836                /* clip/cull dist and tess levels use different array offset semantics */
3837                bool is_clipdist = (b->shader->info.stage != MESA_SHADER_VERTEX || var->data.mode == nir_var_shader_out) &&
3838                                   is_clipcull_dist(var->data.location);
3839                bool is_tess_level = b->shader->info.stage == MESA_SHADER_TESS_CTRL &&
3840                                     var->data.location >= VARYING_SLOT_TESS_LEVEL_INNER && var->data.location >= VARYING_SLOT_TESS_LEVEL_OUTER;
3841                bool is_builtin_array = is_clipdist || is_tess_level;
3842                /* this is explicit for ease of debugging but could be collapsed at some point in the future*/
3843                if (nir_src_is_const(*src_offset)) {
3844                   unsigned offset = slot_offset;
3845                   if (is_builtin_array)
3846                      offset *= 4;
3847                   if (is_clipdist) {
3848                      if (loc == VARYING_SLOT_CLIP_DIST1 || loc == VARYING_SLOT_CULL_DIST1)
3849                         offset += 4;
3850                   }
3851                   deref = nir_build_deref_array_imm(b, deref, offset + idx);
3852                } else {
3853                   nir_def *offset = src_offset->ssa;
3854                   if (is_builtin_array)
3855                      nir_imul_imm(b, offset, 4);
3856                   deref = nir_build_deref_array(b, deref, idx ? nir_iadd_imm(b, offset, idx) : src_offset->ssa);
3857                }
3858             } else {
3859                deref = nir_build_deref_array_imm(b, deref, idx);
3860             }
3861             type = glsl_get_array_element(type);
3862          } else {
3863             idx += location - var->data.location;
3864             /* need to convert possible N*M to [N][M] */
3865             nir_def *nm = idx ? nir_iadd_imm(b, src_offset->ssa, idx) : src_offset->ssa;
3866             while (glsl_type_is_array(type)) {
3867                const struct glsl_type *elem = glsl_get_array_element(type);
3868                unsigned type_size = glsl_count_vec4_slots(elem, false, false);
3869                nir_def *n = glsl_type_is_array(elem) ? nir_udiv_imm(b, nm, type_size) : nm;
3870                if (glsl_type_is_vector_or_scalar(elem) && glsl_type_is_64bit(elem) && glsl_get_vector_elements(elem) > 2)
3871                   n = nir_udiv_imm(b, n, 2);
3872                deref = nir_build_deref_array(b, deref, n);
3873                nm = nir_umod_imm(b, nm, type_size);
3874                type = glsl_get_array_element(type);
3875             }
3876          }
3877       } else if (glsl_type_is_struct(type)) {
3878          deref = nir_build_deref_struct(b, deref, slot_offset);
3879       }
3880       assert(!glsl_type_is_array(type));
3881       unsigned num_components = glsl_get_vector_elements(type);
3882       if (is_load) {
3883          nir_def *load;
3884          if (is_interp) {
3885             nir_def *interp = intr->src[0].ssa;
3886             nir_intrinsic_instr *interp_intr = nir_instr_as_intrinsic(interp->parent_instr);
3887             assert(interp_intr);
3888             var->data.interpolation = nir_intrinsic_interp_mode(interp_intr);
3889             switch (interp_intr->intrinsic) {
3890             case nir_intrinsic_load_barycentric_centroid:
3891                load = nir_interp_deref_at_centroid(b, num_components, bit_size, &deref->def);
3892                break;
3893             case nir_intrinsic_load_barycentric_sample:
3894                var->data.sample = 1;
3895                load = nir_load_deref(b, deref);
3896                break;
3897             case nir_intrinsic_load_barycentric_pixel:
3898                load = nir_load_deref(b, deref);
3899                break;
3900             case nir_intrinsic_load_barycentric_at_sample:
3901                load = nir_interp_deref_at_sample(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3902                break;
3903             case nir_intrinsic_load_barycentric_at_offset:
3904                load = nir_interp_deref_at_offset(b, num_components, bit_size, &deref->def, interp_intr->src[0].ssa);
3905                break;
3906             default:
3907                unreachable("unhandled interp!");
3908             }
3909          } else {
3910             load = nir_load_deref(b, deref);
3911          }
3912          /* filter needed components */
3913          if (intr->num_components < load->num_components)
3914             load = nir_channels(b, load, BITFIELD_MASK(intr->num_components) << (c - var->data.location_frac));
3915          nir_def_rewrite_uses(&intr->def, load);
3916       } else {
3917          nir_def *store = intr->src[0].ssa;
3918          /* pad/filter components to match deref type */
3919          if (intr->num_components < num_components) {
3920             nir_def *zero = nir_imm_zero(b, 1, bit_size);
3921             nir_def *vec[4] = {zero, zero, zero, zero};
3922             u_foreach_bit(i, nir_intrinsic_write_mask(intr))
3923                vec[c - var->data.location_frac + i] = nir_channel(b, store, i);
3924             store = nir_vec(b, vec, num_components);
3925          } if (store->num_components > num_components) {
3926             store = nir_channels(b, store, nir_intrinsic_write_mask(intr));
3927          }
3928          if (store->bit_size != glsl_get_bit_size(type)) {
3929             /* this should be some weird bindless io conversion */
3930             assert(store->bit_size == 64 && glsl_get_bit_size(type) == 32);
3931             assert(num_components != store->num_components);
3932             store = nir_unpack_64_2x32(b, store);
3933          }
3934          nir_store_deref(b, deref, store, BITFIELD_RANGE(c - var->data.location_frac, intr->num_components));
3935       }
3936       nir_instr_remove(&intr->instr);
3937       return true;
3938    }
3939    unreachable("failed to find variable for explicit io!");
3940    return true;
3941 }
3942 
3943 static bool
add_derefs(nir_shader * nir)3944 add_derefs(nir_shader *nir)
3945 {
3946    return nir_shader_intrinsics_pass(nir, add_derefs_instr,
3947                                      nir_metadata_dominance, NULL);
3948 }
3949 
3950 static struct zink_shader_object
compile_module(struct zink_screen * screen,struct zink_shader * zs,nir_shader * nir,bool can_shobj,struct zink_program * pg)3951 compile_module(struct zink_screen *screen, struct zink_shader *zs, nir_shader *nir, bool can_shobj, struct zink_program *pg)
3952 {
3953    struct zink_shader_info *sinfo = &zs->sinfo;
3954    prune_io(nir);
3955 
3956    switch (nir->info.stage) {
3957    case MESA_SHADER_VERTEX:
3958    case MESA_SHADER_TESS_EVAL:
3959    case MESA_SHADER_GEOMETRY:
3960       NIR_PASS_V(nir, nir_divergence_analysis);
3961       break;
3962    default: break;
3963    }
3964    NIR_PASS_V(nir, nir_convert_from_ssa, true);
3965 
3966    if (zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV))
3967       nir_index_ssa_defs(nir_shader_get_entrypoint(nir));
3968    if (zink_debug & ZINK_DEBUG_NIR) {
3969       fprintf(stderr, "NIR shader:\n---8<---\n");
3970       nir_print_shader(nir, stderr);
3971       fprintf(stderr, "---8<---\n");
3972    }
3973 
3974    struct zink_shader_object obj = {0};
3975    struct spirv_shader *spirv = nir_to_spirv(nir, sinfo, screen);
3976    if (spirv)
3977       obj = zink_shader_spirv_compile(screen, zs, spirv, can_shobj, pg);
3978 
3979    /* TODO: determine if there's any reason to cache spirv output? */
3980    if (zs->info.stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated)
3981       zs->spirv = spirv;
3982    else
3983       obj.spirv = spirv;
3984    return obj;
3985 }
3986 
3987 static bool
remove_interpolate_at_sample(struct nir_builder * b,nir_intrinsic_instr * interp,void * data)3988 remove_interpolate_at_sample(struct nir_builder *b, nir_intrinsic_instr *interp, void *data)
3989 {
3990    if (interp->intrinsic != nir_intrinsic_interp_deref_at_sample)
3991       return false;
3992 
3993    b->cursor = nir_before_instr(&interp->instr);
3994    nir_def *res = nir_load_deref(b, nir_src_as_deref(interp->src[0]));
3995    nir_def_rewrite_uses(&interp->def, res);
3996 
3997    return true;
3998 }
3999 
4000 struct zink_shader_object
zink_shader_compile(struct zink_screen * screen,bool can_shobj,struct zink_shader * zs,nir_shader * nir,const struct zink_shader_key * key,const void * extra_data,struct zink_program * pg)4001 zink_shader_compile(struct zink_screen *screen, bool can_shobj, struct zink_shader *zs,
4002                     nir_shader *nir, const struct zink_shader_key *key, const void *extra_data, struct zink_program *pg)
4003 {
4004    bool need_optimize = true;
4005    bool inlined_uniforms = false;
4006 
4007    NIR_PASS_V(nir, add_derefs);
4008    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4009    if (key) {
4010       if (key->inline_uniforms) {
4011          NIR_PASS_V(nir, nir_inline_uniforms,
4012                     nir->info.num_inlinable_uniforms,
4013                     key->base.inlined_uniform_values,
4014                     nir->info.inlinable_uniform_dw_offsets);
4015 
4016          inlined_uniforms = true;
4017       }
4018 
4019       /* TODO: use a separate mem ctx here for ralloc */
4020 
4021       if (!screen->optimal_keys) {
4022          switch (zs->info.stage) {
4023          case MESA_SHADER_VERTEX: {
4024             uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
4025             const struct zink_vs_key *vs_key = zink_vs_key(key);
4026             switch (vs_key->size) {
4027             case 4:
4028                decomposed_attrs = vs_key->u32.decomposed_attrs;
4029                decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
4030                break;
4031             case 2:
4032                decomposed_attrs = vs_key->u16.decomposed_attrs;
4033                decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
4034                break;
4035             case 1:
4036                decomposed_attrs = vs_key->u8.decomposed_attrs;
4037                decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
4038                break;
4039             default: break;
4040             }
4041             if (decomposed_attrs || decomposed_attrs_without_w)
4042                NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
4043             break;
4044          }
4045 
4046          case MESA_SHADER_GEOMETRY:
4047             if (zink_gs_key(key)->lower_line_stipple) {
4048                NIR_PASS_V(nir, lower_line_stipple_gs, zink_gs_key(key)->line_rectangular);
4049                NIR_PASS_V(nir, nir_lower_var_copies);
4050                need_optimize = true;
4051             }
4052 
4053             if (zink_gs_key(key)->lower_line_smooth) {
4054                NIR_PASS_V(nir, lower_line_smooth_gs);
4055                NIR_PASS_V(nir, nir_lower_var_copies);
4056                need_optimize = true;
4057             }
4058 
4059             if (zink_gs_key(key)->lower_gl_point) {
4060                NIR_PASS_V(nir, lower_gl_point_gs);
4061                need_optimize = true;
4062             }
4063 
4064             if (zink_gs_key(key)->lower_pv_mode) {
4065                NIR_PASS_V(nir, lower_pv_mode_gs, zink_gs_key(key)->lower_pv_mode);
4066                need_optimize = true; //TODO verify that this is required
4067             }
4068             break;
4069 
4070          default:
4071             break;
4072          }
4073       }
4074 
4075       switch (zs->info.stage) {
4076       case MESA_SHADER_VERTEX:
4077       case MESA_SHADER_TESS_EVAL:
4078       case MESA_SHADER_GEOMETRY:
4079          if (zink_vs_key_base(key)->last_vertex_stage) {
4080             if (!zink_vs_key_base(key)->clip_halfz && !screen->info.have_EXT_depth_clip_control) {
4081                NIR_PASS_V(nir, nir_lower_clip_halfz);
4082             }
4083             if (zink_vs_key_base(key)->push_drawid) {
4084                NIR_PASS_V(nir, lower_drawid);
4085             }
4086          } else {
4087             nir->xfb_info = NULL;
4088          }
4089          if (zink_vs_key_base(key)->robust_access)
4090             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4091          break;
4092       case MESA_SHADER_FRAGMENT:
4093          if (zink_fs_key(key)->lower_line_smooth) {
4094             NIR_PASS_V(nir, lower_line_smooth_fs,
4095                        zink_fs_key(key)->lower_line_stipple);
4096             need_optimize = true;
4097          } else if (zink_fs_key(key)->lower_line_stipple)
4098                NIR_PASS_V(nir, lower_line_stipple_fs);
4099 
4100          if (zink_fs_key(key)->lower_point_smooth) {
4101             NIR_PASS_V(nir, nir_lower_point_smooth);
4102             NIR_PASS_V(nir, nir_lower_discard_if, nir_lower_discard_if_to_cf);
4103             nir->info.fs.uses_discard = true;
4104             need_optimize = true;
4105          }
4106 
4107          if (zink_fs_key(key)->robust_access)
4108             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4109 
4110          if (!zink_fs_key_base(key)->samples && zink_shader_uses_samples(zs)) {
4111             /* VK will always use gl_SampleMask[] values even if sample count is 0,
4112             * so we need to skip this write here to mimic GL's behavior of ignoring it
4113             */
4114             nir_foreach_shader_out_variable(var, nir) {
4115                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
4116                   var->data.mode = nir_var_shader_temp;
4117             }
4118             nir_fixup_deref_modes(nir);
4119             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4120             NIR_PASS_V(nir, nir_shader_intrinsics_pass, remove_interpolate_at_sample,
4121                        nir_metadata_control_flow, NULL);
4122 
4123             need_optimize = true;
4124          }
4125          if (zink_fs_key_base(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
4126             NIR_PASS_V(nir, lower_dual_blend);
4127          }
4128          if (zink_fs_key_base(key)->coord_replace_bits)
4129             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key_base(key)->coord_replace_bits, true, false);
4130          if (zink_fs_key_base(key)->point_coord_yinvert)
4131             NIR_PASS_V(nir, invert_point_coord);
4132          if (zink_fs_key_base(key)->force_persample_interp || zink_fs_key_base(key)->fbfetch_ms) {
4133             nir_foreach_shader_in_variable(var, nir)
4134                var->data.sample = true;
4135             nir->info.fs.uses_sample_qualifier = true;
4136             nir->info.fs.uses_sample_shading = true;
4137          }
4138          if (zs->fs.legacy_shadow_mask && !key->base.needs_zs_shader_swizzle)
4139             NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, zink_fs_key_base(key)->shadow_needs_shader_swizzle ? extra_data : NULL, true);
4140          if (nir->info.fs.uses_fbfetch_output) {
4141             nir_variable *fbfetch = NULL;
4142             NIR_PASS_V(nir, lower_fbfetch, &fbfetch, zink_fs_key_base(key)->fbfetch_ms);
4143             /* old variable must be deleted to avoid spirv errors */
4144             fbfetch->data.mode = nir_var_shader_temp;
4145             nir_fixup_deref_modes(nir);
4146             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4147             need_optimize = true;
4148          }
4149          nir_foreach_shader_in_variable_safe(var, nir) {
4150             if (!is_texcoord(MESA_SHADER_FRAGMENT, var) || var->data.driver_location != -1)
4151                continue;
4152             nir_shader_instructions_pass(nir, rewrite_read_as_0, nir_metadata_dominance, var);
4153             var->data.mode = nir_var_shader_temp;
4154             nir_fixup_deref_modes(nir);
4155             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4156             need_optimize = true;
4157          }
4158          break;
4159       case MESA_SHADER_COMPUTE:
4160          if (zink_cs_key(key)->robust_access)
4161             NIR_PASS(need_optimize, nir, lower_txf_lod_robustness);
4162          break;
4163       default: break;
4164       }
4165       if (key->base.needs_zs_shader_swizzle) {
4166          assert(extra_data);
4167          NIR_PASS(need_optimize, nir, lower_zs_swizzle_tex, extra_data, false);
4168       }
4169       if (key->base.nonseamless_cube_mask) {
4170          NIR_PASS_V(nir, zink_lower_cubemap_to_array, key->base.nonseamless_cube_mask);
4171          need_optimize = true;
4172       }
4173    }
4174    if (screen->driconf.inline_uniforms) {
4175       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4176       NIR_PASS_V(nir, rewrite_bo_access, screen);
4177       NIR_PASS_V(nir, remove_bo_access, zs);
4178       need_optimize = true;
4179    }
4180    if (inlined_uniforms) {
4181       optimize_nir(nir, zs, true);
4182 
4183       /* This must be done again. */
4184       NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
4185                                                        nir_var_shader_out);
4186 
4187       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
4188       if (impl->ssa_alloc > ZINK_ALWAYS_INLINE_LIMIT)
4189          zs->can_inline = false;
4190    } else if (need_optimize)
4191       optimize_nir(nir, zs, true);
4192    bool has_sparse = false;
4193    NIR_PASS(has_sparse, nir, lower_sparse);
4194    if (has_sparse)
4195       optimize_nir(nir, zs, false);
4196 
4197    struct zink_shader_object obj = compile_module(screen, zs, nir, can_shobj, pg);
4198    ralloc_free(nir);
4199    return obj;
4200 }
4201 
4202 struct zink_shader_object
zink_shader_compile_separate(struct zink_screen * screen,struct zink_shader * zs)4203 zink_shader_compile_separate(struct zink_screen *screen, struct zink_shader *zs)
4204 {
4205    nir_shader *nir = zs->nir;
4206    /* TODO: maybe compile multiple variants for different set counts for compact mode? */
4207    int set = zs->info.stage == MESA_SHADER_FRAGMENT;
4208    if (screen->info.have_EXT_shader_object)
4209       set = zs->info.stage;
4210    unsigned offsets[4];
4211    zink_descriptor_shader_get_binding_offsets(zs, offsets);
4212    nir_foreach_variable_with_modes(var, nir, nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_uniform | nir_var_image) {
4213       if (var->data.descriptor_set == screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS])
4214          continue;
4215       var->data.descriptor_set = set;
4216       switch (var->data.mode) {
4217       case nir_var_mem_ubo:
4218             var->data.binding = !!var->data.driver_location;
4219             break;
4220       case nir_var_uniform:
4221          if (glsl_type_is_sampler(glsl_without_array(var->type)))
4222             var->data.binding += offsets[1];
4223          break;
4224       case nir_var_mem_ssbo:
4225          var->data.binding += offsets[2];
4226          break;
4227       case nir_var_image:
4228          var->data.binding += offsets[3];
4229          break;
4230       default: break;
4231       }
4232    }
4233    NIR_PASS_V(nir, add_derefs);
4234    NIR_PASS_V(nir, nir_lower_fragcolor, nir->info.fs.color_is_dual_source ? 1 : 8);
4235    if (screen->driconf.inline_uniforms) {
4236       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
4237       NIR_PASS_V(nir, rewrite_bo_access, screen);
4238       NIR_PASS_V(nir, remove_bo_access, zs);
4239    }
4240    optimize_nir(nir, zs, true);
4241    zink_descriptor_shader_init(screen, zs);
4242    nir_shader *nir_clone = NULL;
4243    if (screen->info.have_EXT_shader_object)
4244       nir_clone = nir_shader_clone(nir, nir);
4245    struct zink_shader_object obj = compile_module(screen, zs, nir, true, NULL);
4246    if (screen->info.have_EXT_shader_object && !zs->info.internal) {
4247       /* always try to pre-generate a tcs in case it's needed */
4248       if (zs->info.stage == MESA_SHADER_TESS_EVAL) {
4249          nir_shader *nir_tcs = NULL;
4250          /* use max pcp for compat */
4251          zs->non_fs.generated_tcs = zink_shader_tcs_create(screen, 32);
4252          zink_shader_tcs_init(screen, zs->non_fs.generated_tcs, nir_clone, &nir_tcs);
4253          nir_tcs->info.separate_shader = true;
4254          zs->non_fs.generated_tcs->precompile.obj = zink_shader_compile_separate(screen, zs->non_fs.generated_tcs);
4255          ralloc_free(nir_tcs);
4256          zs->non_fs.generated_tcs->nir = NULL;
4257       }
4258    }
4259    spirv_shader_delete(obj.spirv);
4260    obj.spirv = NULL;
4261    return obj;
4262 }
4263 
4264 static bool
lower_baseinstance_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)4265 lower_baseinstance_instr(nir_builder *b, nir_intrinsic_instr *intr,
4266                          void *data)
4267 {
4268    if (intr->intrinsic != nir_intrinsic_load_instance_id)
4269       return false;
4270    b->cursor = nir_after_instr(&intr->instr);
4271    nir_def *def = nir_isub(b, &intr->def, nir_load_base_instance(b));
4272    nir_def_rewrite_uses_after(&intr->def, def, def->parent_instr);
4273    return true;
4274 }
4275 
4276 static bool
lower_baseinstance(nir_shader * shader)4277 lower_baseinstance(nir_shader *shader)
4278 {
4279    if (shader->info.stage != MESA_SHADER_VERTEX)
4280       return false;
4281    return nir_shader_intrinsics_pass(shader, lower_baseinstance_instr,
4282                                      nir_metadata_dominance, NULL);
4283 }
4284 
4285 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
4286  * so instead we delete all those broken variables and just make new ones
4287  */
4288 static bool
unbreak_bos(nir_shader * shader,struct zink_shader * zs,bool needs_size)4289 unbreak_bos(nir_shader *shader, struct zink_shader *zs, bool needs_size)
4290 {
4291    uint64_t max_ssbo_size = 0;
4292    uint64_t max_ubo_size = 0;
4293    uint64_t max_uniform_size = 0;
4294 
4295    if (!shader->info.num_ssbos && !shader->info.num_ubos)
4296       return false;
4297 
4298    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
4299       const struct glsl_type *type = glsl_without_array(var->type);
4300       if (type_is_counter(type))
4301          continue;
4302       /* be conservative: use the bigger of the interface and variable types to ensure in-bounds access */
4303       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
4304       const struct glsl_type *interface_type = var->interface_type ? glsl_without_array(var->interface_type) : NULL;
4305       if (interface_type) {
4306          unsigned block_size = glsl_get_explicit_size(interface_type, true);
4307          if (glsl_get_length(interface_type) == 1) {
4308             /* handle bare unsized ssbo arrays: glsl_get_explicit_size always returns type-aligned sizes */
4309             const struct glsl_type *f = glsl_get_struct_field(interface_type, 0);
4310             if (glsl_type_is_array(f) && !glsl_array_size(f))
4311                block_size = 0;
4312          }
4313          if (block_size) {
4314             block_size = DIV_ROUND_UP(block_size, sizeof(float) * 4);
4315             size = MAX2(size, block_size);
4316          }
4317       }
4318       if (var->data.mode == nir_var_mem_ubo) {
4319          if (var->data.driver_location)
4320             max_ubo_size = MAX2(max_ubo_size, size);
4321          else
4322             max_uniform_size = MAX2(max_uniform_size, size);
4323       } else {
4324          max_ssbo_size = MAX2(max_ssbo_size, size);
4325          if (interface_type) {
4326             if (glsl_type_is_unsized_array(glsl_get_struct_field(interface_type, glsl_get_length(interface_type) - 1)))
4327                needs_size = true;
4328          }
4329       }
4330       var->data.mode = nir_var_shader_temp;
4331    }
4332    nir_fixup_deref_modes(shader);
4333    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4334    optimize_nir(shader, NULL, true);
4335 
4336    struct glsl_struct_field field = {0};
4337    field.name = ralloc_strdup(shader, "base");
4338    if (shader->info.num_ubos) {
4339       if (shader->num_uniforms && zs->ubos_used & BITFIELD_BIT(0)) {
4340          field.type = glsl_array_type(glsl_uint_type(), max_uniform_size * 4, 4);
4341          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4342                                                  glsl_array_type(glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430, false, "struct"), 1, 0),
4343                                                  "uniform_0@32");
4344          var->interface_type = var->type;
4345          var->data.mode = nir_var_mem_ubo;
4346          var->data.driver_location = 0;
4347       }
4348 
4349       unsigned num_ubos = shader->info.num_ubos - !!shader->info.first_ubo_is_default_ubo;
4350       uint32_t ubos_used = zs->ubos_used & ~BITFIELD_BIT(0);
4351       if (num_ubos && ubos_used) {
4352          field.type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
4353          /* shrink array as much as possible */
4354          unsigned first_ubo = ffs(ubos_used) - 2;
4355          assert(first_ubo < PIPE_MAX_CONSTANT_BUFFERS);
4356          num_ubos -= first_ubo;
4357          assert(num_ubos);
4358          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo,
4359                                    glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ubos, 0),
4360                                    "ubos@32");
4361          var->interface_type = var->type;
4362          var->data.mode = nir_var_mem_ubo;
4363          var->data.driver_location = first_ubo + !!shader->info.first_ubo_is_default_ubo;
4364       }
4365    }
4366    if (shader->info.num_ssbos && zs->ssbos_used) {
4367       /* shrink array as much as possible */
4368       unsigned first_ssbo = ffs(zs->ssbos_used) - 1;
4369       assert(first_ssbo < PIPE_MAX_SHADER_BUFFERS);
4370       unsigned num_ssbos = shader->info.num_ssbos - first_ssbo;
4371       assert(num_ssbos);
4372       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), needs_size ? 0 : max_ssbo_size * 4, 4);
4373       field.type = ssbo_type;
4374       nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
4375                                               glsl_array_type(glsl_struct_type(&field, 1, "struct", false), num_ssbos, 0),
4376                                               "ssbos@32");
4377       var->interface_type = var->type;
4378       var->data.mode = nir_var_mem_ssbo;
4379       var->data.driver_location = first_ssbo;
4380    }
4381    return true;
4382 }
4383 
4384 static uint32_t
get_src_mask_ssbo(unsigned total,nir_src src)4385 get_src_mask_ssbo(unsigned total, nir_src src)
4386 {
4387    if (nir_src_is_const(src))
4388       return BITFIELD_BIT(nir_src_as_uint(src));
4389    return BITFIELD_MASK(total);
4390 }
4391 
4392 static uint32_t
get_src_mask_ubo(unsigned total,nir_src src)4393 get_src_mask_ubo(unsigned total, nir_src src)
4394 {
4395    if (nir_src_is_const(src))
4396       return BITFIELD_BIT(nir_src_as_uint(src));
4397    return BITFIELD_MASK(total) & ~BITFIELD_BIT(0);
4398 }
4399 
4400 static bool
analyze_io(struct zink_shader * zs,nir_shader * shader)4401 analyze_io(struct zink_shader *zs, nir_shader *shader)
4402 {
4403    bool ret = false;
4404    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
4405    nir_foreach_block(block, impl) {
4406       nir_foreach_instr(instr, block) {
4407          if (shader->info.stage != MESA_SHADER_KERNEL && instr->type == nir_instr_type_tex) {
4408             /* gl_nir_lower_samplers_as_deref is where this would normally be set, but zink doesn't use it */
4409             nir_tex_instr *tex = nir_instr_as_tex(instr);
4410             int deref_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
4411             if (deref_idx >= 0) {
4412                nir_variable *img = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[deref_idx].src.ssa->parent_instr));
4413                unsigned size = glsl_type_is_array(img->type) ? glsl_get_aoa_size(img->type) : 1;
4414                BITSET_SET_RANGE(shader->info.textures_used, img->data.driver_location, img->data.driver_location + (size - 1));
4415             }
4416             continue;
4417          }
4418          if (instr->type != nir_instr_type_intrinsic)
4419             continue;
4420 
4421          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4422          switch (intrin->intrinsic) {
4423          case nir_intrinsic_store_ssbo:
4424             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[1]);
4425             break;
4426 
4427          case nir_intrinsic_get_ssbo_size: {
4428             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4429             ret = true;
4430             break;
4431          }
4432          case nir_intrinsic_ssbo_atomic:
4433          case nir_intrinsic_ssbo_atomic_swap:
4434          case nir_intrinsic_load_ssbo:
4435             zs->ssbos_used |= get_src_mask_ssbo(shader->info.num_ssbos, intrin->src[0]);
4436             break;
4437          case nir_intrinsic_load_ubo:
4438          case nir_intrinsic_load_ubo_vec4:
4439             zs->ubos_used |= get_src_mask_ubo(shader->info.num_ubos, intrin->src[0]);
4440             break;
4441          default:
4442             break;
4443          }
4444       }
4445    }
4446    return ret;
4447 }
4448 
4449 struct zink_bindless_info {
4450    nir_variable *bindless[4];
4451    unsigned bindless_set;
4452 };
4453 
4454 /* this is a "default" bindless texture used if the shader has no texture variables */
4455 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex,unsigned descriptor_set)4456 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex, unsigned descriptor_set)
4457 {
4458    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
4459    nir_variable *var;
4460 
4461    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
4462    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
4463    var->data.descriptor_set = descriptor_set;
4464    var->data.driver_location = var->data.binding = binding;
4465    return var;
4466 }
4467 
4468 /* this is a "default" bindless image used if the shader has no image variables */
4469 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim,unsigned descriptor_set)4470 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim, unsigned descriptor_set)
4471 {
4472    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
4473    nir_variable *var;
4474 
4475    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
4476    var = nir_variable_create(nir, nir_var_image, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
4477    var->data.descriptor_set = descriptor_set;
4478    var->data.driver_location = var->data.binding = binding;
4479    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4480    return var;
4481 }
4482 
4483 /* rewrite bindless instructions as array deref instructions */
4484 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)4485 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
4486 {
4487    struct zink_bindless_info *bindless = data;
4488 
4489    if (in->type == nir_instr_type_tex) {
4490       nir_tex_instr *tex = nir_instr_as_tex(in);
4491       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4492       if (idx == -1)
4493          return false;
4494 
4495       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[1] : bindless->bindless[0];
4496       if (!var) {
4497          var = create_bindless_texture(b->shader, tex, bindless->bindless_set);
4498          if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4499             bindless->bindless[1] = var;
4500          else
4501             bindless->bindless[0] = var;
4502       }
4503       b->cursor = nir_before_instr(in);
4504       nir_deref_instr *deref = nir_build_deref_var(b, var);
4505       if (glsl_type_is_array(var->type))
4506          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
4507       nir_src_rewrite(&tex->src[idx].src, &deref->def);
4508 
4509       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
4510        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
4511        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
4512        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
4513        *
4514        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
4515        * - Warhammer 40k: Dawn of War III
4516        */
4517       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
4518       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
4519       unsigned coord_components = nir_src_num_components(tex->src[c].src);
4520       if (coord_components < needed_components) {
4521          nir_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
4522          nir_src_rewrite(&tex->src[c].src, def);
4523          tex->coord_components = needed_components;
4524       }
4525       return true;
4526    }
4527    if (in->type != nir_instr_type_intrinsic)
4528       return false;
4529    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
4530 
4531    nir_intrinsic_op op;
4532 #define OP_SWAP(OP) \
4533    case nir_intrinsic_bindless_image_##OP: \
4534       op = nir_intrinsic_image_deref_##OP; \
4535       break;
4536 
4537 
4538    /* convert bindless intrinsics to deref intrinsics */
4539    switch (instr->intrinsic) {
4540    OP_SWAP(atomic)
4541    OP_SWAP(atomic_swap)
4542    OP_SWAP(format)
4543    OP_SWAP(load)
4544    OP_SWAP(order)
4545    OP_SWAP(samples)
4546    OP_SWAP(size)
4547    OP_SWAP(store)
4548    default:
4549       return false;
4550    }
4551 
4552    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
4553    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless->bindless[3] : bindless->bindless[2];
4554    if (!var)
4555       var = create_bindless_image(b->shader, dim, bindless->bindless_set);
4556    instr->intrinsic = op;
4557    b->cursor = nir_before_instr(in);
4558    nir_deref_instr *deref = nir_build_deref_var(b, var);
4559    if (glsl_type_is_array(var->type))
4560       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
4561    nir_src_rewrite(&instr->src[0], &deref->def);
4562    return true;
4563 }
4564 
4565 static bool
lower_bindless(nir_shader * shader,struct zink_bindless_info * bindless)4566 lower_bindless(nir_shader *shader, struct zink_bindless_info *bindless)
4567 {
4568    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
4569       return false;
4570    nir_fixup_deref_modes(shader);
4571    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
4572    optimize_nir(shader, NULL, true);
4573    return true;
4574 }
4575 
4576 /* convert shader image/texture io variables to int64 handles for bindless indexing */
4577 static bool
lower_bindless_io_instr(nir_builder * b,nir_intrinsic_instr * instr,void * data)4578 lower_bindless_io_instr(nir_builder *b, nir_intrinsic_instr *instr,
4579                         void *data)
4580 {
4581    bool is_load = false;
4582    bool is_input = false;
4583    bool is_interp = false;
4584    if (!filter_io_instr(instr, &is_load, &is_input, &is_interp))
4585       return false;
4586 
4587    nir_variable *var = find_var_with_location_frac(b->shader, nir_intrinsic_io_semantics(instr).location, nir_intrinsic_component(instr), false, is_input ? nir_var_shader_in : nir_var_shader_out);
4588    if (var->data.bindless)
4589       return false;
4590    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
4591       return false;
4592    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
4593       return false;
4594 
4595    var->type = glsl_vector_type(GLSL_TYPE_INT, 2);
4596    var->data.bindless = 1;
4597    return true;
4598 }
4599 
4600 static bool
lower_bindless_io(nir_shader * shader)4601 lower_bindless_io(nir_shader *shader)
4602 {
4603    return nir_shader_intrinsics_pass(shader, lower_bindless_io_instr,
4604                                      nir_metadata_dominance, NULL);
4605 }
4606 
4607 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index,bool compact_descriptors)4608 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index, bool compact_descriptors)
4609 {
4610    if (stage == MESA_SHADER_NONE) {
4611       unreachable("not supported");
4612    } else {
4613       unsigned base = stage;
4614       /* clamp compute bindings for better driver efficiency */
4615       if (gl_shader_stage_is_compute(stage))
4616          base = 0;
4617       switch (type) {
4618       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
4619       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
4620          return base * 2 + !!index;
4621 
4622       case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
4623          assert(stage == MESA_SHADER_KERNEL);
4624          FALLTHROUGH;
4625       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4626          if (stage == MESA_SHADER_KERNEL) {
4627             assert(index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
4628             return index + PIPE_MAX_SAMPLERS;
4629          }
4630          FALLTHROUGH;
4631       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4632          assert(index < PIPE_MAX_SAMPLERS);
4633          assert(stage != MESA_SHADER_KERNEL);
4634          return (base * PIPE_MAX_SAMPLERS) + index;
4635 
4636       case VK_DESCRIPTOR_TYPE_SAMPLER:
4637          assert(index < PIPE_MAX_SAMPLERS);
4638          assert(stage == MESA_SHADER_KERNEL);
4639          return index;
4640 
4641       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
4642          return base + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * 2));
4643 
4644       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4645       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4646          assert(index < ZINK_MAX_SHADER_IMAGES);
4647          if (stage == MESA_SHADER_KERNEL)
4648             return index + (compact_descriptors ? (PIPE_MAX_SAMPLERS + PIPE_MAX_SHADER_SAMPLER_VIEWS) : 0);
4649          return (base * ZINK_MAX_SHADER_IMAGES) + index + (compact_descriptors * (ZINK_GFX_SHADER_COUNT * PIPE_MAX_SAMPLERS));
4650 
4651       default:
4652          unreachable("unexpected type");
4653       }
4654    }
4655 }
4656 
4657 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,struct zink_bindless_info * bindless)4658 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, struct zink_bindless_info *bindless)
4659 {
4660    if (glsl_type_is_struct(type)) {
4661       for (unsigned i = 0; i < glsl_get_length(type); i++)
4662          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
4663       return;
4664    }
4665 
4666    /* just a random scalar in a struct */
4667    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
4668       return;
4669 
4670    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
4671    unsigned binding;
4672    switch (vktype) {
4673       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
4674          binding = 0;
4675          break;
4676       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
4677          binding = 1;
4678          break;
4679       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
4680          binding = 2;
4681          break;
4682       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
4683          binding = 3;
4684          break;
4685       default:
4686          unreachable("unknown");
4687    }
4688    if (!bindless->bindless[binding]) {
4689       bindless->bindless[binding] = nir_variable_clone(var, nir);
4690       bindless->bindless[binding]->data.bindless = 0;
4691       bindless->bindless[binding]->data.descriptor_set = bindless->bindless_set;
4692       bindless->bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
4693       bindless->bindless[binding]->data.driver_location = bindless->bindless[binding]->data.binding = binding;
4694       if (!bindless->bindless[binding]->data.image.format)
4695          bindless->bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
4696       nir_shader_add_variable(nir, bindless->bindless[binding]);
4697    } else {
4698       assert(glsl_get_sampler_dim(glsl_without_array(bindless->bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
4699    }
4700    var->data.mode = nir_var_shader_temp;
4701 }
4702 
4703 static bool
convert_1d_shadow_tex(nir_builder * b,nir_instr * instr,void * data)4704 convert_1d_shadow_tex(nir_builder *b, nir_instr *instr, void *data)
4705 {
4706    struct zink_screen *screen = data;
4707    if (instr->type != nir_instr_type_tex)
4708       return false;
4709    nir_tex_instr *tex = nir_instr_as_tex(instr);
4710    if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D || !tex->is_shadow)
4711       return false;
4712    if (tex->is_sparse && screen->need_2D_sparse) {
4713       /* no known case of this exists: only nvidia can hit it, and nothing uses it */
4714       mesa_loge("unhandled/unsupported 1D sparse texture!");
4715       abort();
4716    }
4717    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
4718    b->cursor = nir_before_instr(instr);
4719    tex->coord_components++;
4720    unsigned srcs[] = {
4721       nir_tex_src_coord,
4722       nir_tex_src_offset,
4723       nir_tex_src_ddx,
4724       nir_tex_src_ddy,
4725    };
4726    for (unsigned i = 0; i < ARRAY_SIZE(srcs); i++) {
4727       unsigned c = nir_tex_instr_src_index(tex, srcs[i]);
4728       if (c == -1)
4729          continue;
4730       if (tex->src[c].src.ssa->num_components == tex->coord_components)
4731          continue;
4732       nir_def *def;
4733       nir_def *zero = nir_imm_zero(b, 1, tex->src[c].src.ssa->bit_size);
4734       if (tex->src[c].src.ssa->num_components == 1)
4735          def = nir_vec2(b, tex->src[c].src.ssa, zero);
4736       else
4737          def = nir_vec3(b, nir_channel(b, tex->src[c].src.ssa, 0), zero, nir_channel(b, tex->src[c].src.ssa, 1));
4738       nir_src_rewrite(&tex->src[c].src, def);
4739    }
4740    b->cursor = nir_after_instr(instr);
4741    unsigned needed_components = nir_tex_instr_dest_size(tex);
4742    unsigned num_components = tex->def.num_components;
4743    if (needed_components > num_components) {
4744       tex->def.num_components = needed_components;
4745       assert(num_components < 3);
4746       /* take either xz or just x since this is promoted to 2D from 1D */
4747       uint32_t mask = num_components == 2 ? (1|4) : 1;
4748       nir_def *dst = nir_channels(b, &tex->def, mask);
4749       nir_def_rewrite_uses_after(&tex->def, dst, dst->parent_instr);
4750    }
4751    return true;
4752 }
4753 
4754 static bool
lower_1d_shadow(nir_shader * shader,struct zink_screen * screen)4755 lower_1d_shadow(nir_shader *shader, struct zink_screen *screen)
4756 {
4757    bool found = false;
4758    nir_foreach_variable_with_modes(var, shader, nir_var_uniform | nir_var_image) {
4759       const struct glsl_type *type = glsl_without_array(var->type);
4760       unsigned length = glsl_get_length(var->type);
4761       if (!glsl_type_is_sampler(type) || !glsl_sampler_type_is_shadow(type) || glsl_get_sampler_dim(type) != GLSL_SAMPLER_DIM_1D)
4762          continue;
4763       const struct glsl_type *sampler = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, true, glsl_sampler_type_is_array(type), glsl_get_sampler_result_type(type));
4764       var->type = type != var->type ? glsl_array_type(sampler, length, glsl_get_explicit_stride(var->type)) : sampler;
4765 
4766       found = true;
4767    }
4768    if (found) {
4769       nir_shader_instructions_pass(shader, convert_1d_shadow_tex, nir_metadata_dominance, screen);
4770       nir_fixup_deref_types(shader);
4771    }
4772    return found;
4773 }
4774 
4775 static void
scan_nir(struct zink_screen * screen,nir_shader * shader,struct zink_shader * zs)4776 scan_nir(struct zink_screen *screen, nir_shader *shader, struct zink_shader *zs)
4777 {
4778    nir_foreach_function_impl(impl, shader) {
4779       nir_foreach_block_safe(block, impl) {
4780          nir_foreach_instr_safe(instr, block) {
4781             if (instr->type == nir_instr_type_tex) {
4782                nir_tex_instr *tex = nir_instr_as_tex(instr);
4783                zs->sinfo.have_sparse |= tex->is_sparse;
4784             }
4785             if (instr->type != nir_instr_type_intrinsic)
4786                continue;
4787             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
4788             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
4789                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
4790                 intr->intrinsic == nir_intrinsic_image_deref_store ||
4791                 intr->intrinsic == nir_intrinsic_image_deref_atomic ||
4792                 intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
4793                 intr->intrinsic == nir_intrinsic_image_deref_size ||
4794                 intr->intrinsic == nir_intrinsic_image_deref_samples ||
4795                 intr->intrinsic == nir_intrinsic_image_deref_format ||
4796                 intr->intrinsic == nir_intrinsic_image_deref_order) {
4797 
4798                 nir_variable *var = nir_intrinsic_get_var(intr, 0);
4799 
4800                 /* Structs have been lowered already, so get_aoa_size is sufficient. */
4801                 const unsigned size =
4802                    glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
4803                 BITSET_SET_RANGE(shader->info.images_used, var->data.binding,
4804                                  var->data.binding + (MAX2(size, 1) - 1));
4805             }
4806             if (intr->intrinsic == nir_intrinsic_is_sparse_texels_resident ||
4807                 intr->intrinsic == nir_intrinsic_image_deref_sparse_load)
4808                zs->sinfo.have_sparse = true;
4809 
4810             bool is_load = false;
4811             bool is_input = false;
4812             bool is_interp = false;
4813             if (filter_io_instr(intr, &is_load, &is_input, &is_interp)) {
4814                nir_io_semantics s = nir_intrinsic_io_semantics(intr);
4815                if (io_instr_is_arrayed(intr) && s.location < VARYING_SLOT_PATCH0) {
4816                   if (is_input)
4817                      zs->arrayed_inputs |= BITFIELD64_BIT(s.location);
4818                   else
4819                      zs->arrayed_outputs |= BITFIELD64_BIT(s.location);
4820                }
4821                /* TODO: delete this once #10826 is fixed */
4822                if (!(is_input && shader->info.stage == MESA_SHADER_VERTEX)) {
4823                   if (is_clipcull_dist(s.location)) {
4824                      unsigned frac = nir_intrinsic_component(intr) + 1;
4825                      if (s.location < VARYING_SLOT_CULL_DIST0) {
4826                         if (s.location == VARYING_SLOT_CLIP_DIST1)
4827                            frac += 4;
4828                         shader->info.clip_distance_array_size = MAX3(shader->info.clip_distance_array_size, frac, s.num_slots);
4829                      } else {
4830                         if (s.location == VARYING_SLOT_CULL_DIST1)
4831                            frac += 4;
4832                         shader->info.cull_distance_array_size = MAX3(shader->info.cull_distance_array_size, frac, s.num_slots);
4833                      }
4834                   }
4835                }
4836             }
4837 
4838             static bool warned = false;
4839             if (!screen->info.have_EXT_shader_atomic_float && !screen->is_cpu && !warned) {
4840                switch (intr->intrinsic) {
4841                case nir_intrinsic_image_deref_atomic: {
4842                   nir_variable *var = nir_intrinsic_get_var(intr, 0);
4843                   if (nir_intrinsic_atomic_op(intr) == nir_atomic_op_iadd &&
4844                       util_format_is_float(var->data.image.format))
4845                      fprintf(stderr, "zink: Vulkan driver missing VK_EXT_shader_atomic_float but attempting to do atomic ops!\n");
4846                   break;
4847                }
4848                default:
4849                   break;
4850                }
4851             }
4852          }
4853       }
4854    }
4855 }
4856 
4857 static bool
match_tex_dests_instr(nir_builder * b,nir_instr * in,void * data,bool pre)4858 match_tex_dests_instr(nir_builder *b, nir_instr *in, void *data, bool pre)
4859 {
4860    if (in->type != nir_instr_type_tex)
4861       return false;
4862    nir_tex_instr *tex = nir_instr_as_tex(in);
4863    if (tex->op == nir_texop_txs || tex->op == nir_texop_lod)
4864       return false;
4865    int handle = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
4866    nir_variable *var = NULL;
4867    if (handle != -1) {
4868       if (pre)
4869          return false;
4870       var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[handle].src));
4871    } else {
4872       var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
4873    }
4874    if (pre) {
4875       flag_shadow_tex_instr(b, tex, var, data);
4876       return false;
4877    }
4878    return !!rewrite_tex_dest(b, tex, var, data);
4879 }
4880 
4881 static bool
match_tex_dests_instr_pre(nir_builder * b,nir_instr * in,void * data)4882 match_tex_dests_instr_pre(nir_builder *b, nir_instr *in, void *data)
4883 {
4884    return match_tex_dests_instr(b, in, data, true);
4885 }
4886 
4887 static bool
match_tex_dests_instr_post(nir_builder * b,nir_instr * in,void * data)4888 match_tex_dests_instr_post(nir_builder *b, nir_instr *in, void *data)
4889 {
4890    return match_tex_dests_instr(b, in, data, false);
4891 }
4892 
4893 static bool
match_tex_dests(nir_shader * shader,struct zink_shader * zs,bool pre_mangle)4894 match_tex_dests(nir_shader *shader, struct zink_shader *zs, bool pre_mangle)
4895 {
4896    return nir_shader_instructions_pass(shader, pre_mangle ? match_tex_dests_instr_pre : match_tex_dests_instr_post, nir_metadata_dominance, zs);
4897 }
4898 
4899 static bool
split_bitfields_instr(nir_builder * b,nir_alu_instr * alu,void * data)4900 split_bitfields_instr(nir_builder *b, nir_alu_instr *alu, void *data)
4901 {
4902    switch (alu->op) {
4903    case nir_op_ubitfield_extract:
4904    case nir_op_ibitfield_extract:
4905    case nir_op_bitfield_insert:
4906       break;
4907    default:
4908       return false;
4909    }
4910    unsigned num_components = alu->def.num_components;
4911    if (num_components == 1)
4912       return false;
4913    b->cursor = nir_before_instr(&alu->instr);
4914    nir_def *dests[NIR_MAX_VEC_COMPONENTS];
4915    for (unsigned i = 0; i < num_components; i++) {
4916       if (alu->op == nir_op_bitfield_insert)
4917          dests[i] = nir_bitfield_insert(b,
4918                                         nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4919                                         nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4920                                         nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]),
4921                                         nir_channel(b, alu->src[3].src.ssa, alu->src[3].swizzle[i]));
4922       else if (alu->op == nir_op_ubitfield_extract)
4923          dests[i] = nir_ubitfield_extract(b,
4924                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4925                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4926                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4927       else
4928          dests[i] = nir_ibitfield_extract(b,
4929                                           nir_channel(b, alu->src[0].src.ssa, alu->src[0].swizzle[i]),
4930                                           nir_channel(b, alu->src[1].src.ssa, alu->src[1].swizzle[i]),
4931                                           nir_channel(b, alu->src[2].src.ssa, alu->src[2].swizzle[i]));
4932    }
4933    nir_def *dest = nir_vec(b, dests, num_components);
4934    nir_def_rewrite_uses_after(&alu->def, dest, &alu->instr);
4935    nir_instr_remove(&alu->instr);
4936    return true;
4937 }
4938 
4939 
4940 static bool
split_bitfields(nir_shader * shader)4941 split_bitfields(nir_shader *shader)
4942 {
4943    return nir_shader_alu_pass(shader, split_bitfields_instr,
4944                               nir_metadata_dominance, NULL);
4945 }
4946 
4947 static bool
strip_tex_ms_instr(nir_builder * b,nir_instr * in,void * data)4948 strip_tex_ms_instr(nir_builder *b, nir_instr *in, void *data)
4949 {
4950    if (in->type != nir_instr_type_intrinsic)
4951       return false;
4952    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(in);
4953    switch (intr->intrinsic) {
4954    case nir_intrinsic_image_deref_samples:
4955       b->cursor = nir_before_instr(in);
4956       nir_def_rewrite_uses_after(&intr->def, nir_imm_zero(b, 1, intr->def.bit_size), in);
4957       nir_instr_remove(in);
4958       break;
4959    case nir_intrinsic_image_deref_store:
4960    case nir_intrinsic_image_deref_load:
4961    case nir_intrinsic_image_deref_atomic:
4962    case nir_intrinsic_image_deref_atomic_swap:
4963       break;
4964    default:
4965       return false;
4966    }
4967    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
4968    if (dim != GLSL_SAMPLER_DIM_MS)
4969       return false;
4970 
4971    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
4972    nir_variable *var = nir_deref_instr_get_variable(deref);
4973    nir_deref_instr *parent = nir_deref_instr_parent(deref);
4974    if (parent) {
4975       parent->type = var->type;
4976       deref->type = glsl_without_array(var->type);
4977    } else {
4978       deref->type = var->type;
4979    }
4980    nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
4981    return true;
4982 }
4983 
4984 
4985 static bool
strip_tex_ms(nir_shader * shader)4986 strip_tex_ms(nir_shader *shader)
4987 {
4988    bool progress = false;
4989    nir_foreach_image_variable(var, shader) {
4990       const struct glsl_type *bare_type = glsl_without_array(var->type);
4991       if (glsl_get_sampler_dim(bare_type) != GLSL_SAMPLER_DIM_MS)
4992          continue;
4993       unsigned array_size = 0;
4994       if (glsl_type_is_array(var->type))
4995          array_size = glsl_array_size(var->type);
4996 
4997       const struct glsl_type *new_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, glsl_sampler_type_is_array(bare_type), glsl_get_sampler_result_type(bare_type));
4998       if (array_size)
4999          new_type = glsl_array_type(new_type, array_size, glsl_get_explicit_stride(var->type));
5000       var->type = new_type;
5001       progress = true;
5002    }
5003    if (!progress)
5004       return false;
5005    return nir_shader_instructions_pass(shader, strip_tex_ms_instr, nir_metadata_all, NULL);
5006 }
5007 
5008 static void
rewrite_cl_derefs(nir_shader * nir,nir_variable * var)5009 rewrite_cl_derefs(nir_shader *nir, nir_variable *var)
5010 {
5011    nir_foreach_function_impl(impl, nir) {
5012       nir_foreach_block(block, impl) {
5013          nir_foreach_instr_safe(instr, block) {
5014             if (instr->type != nir_instr_type_deref)
5015                continue;
5016             nir_deref_instr *deref = nir_instr_as_deref(instr);
5017             nir_variable *img = nir_deref_instr_get_variable(deref);
5018             if (img != var)
5019                continue;
5020             if (glsl_type_is_array(var->type)) {
5021                if (deref->deref_type == nir_deref_type_array)
5022                   deref->type = glsl_without_array(var->type);
5023                else
5024                   deref->type = var->type;
5025             } else {
5026                deref->type = var->type;
5027             }
5028          }
5029       }
5030    }
5031 }
5032 
5033 static void
type_image(nir_shader * nir,nir_variable * var)5034 type_image(nir_shader *nir, nir_variable *var)
5035 {
5036    nir_foreach_function_impl(impl, nir) {
5037       nir_foreach_block(block, impl) {
5038          nir_foreach_instr_safe(instr, block) {
5039             if (instr->type != nir_instr_type_intrinsic)
5040                continue;
5041             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5042             if (intr->intrinsic == nir_intrinsic_image_deref_load ||
5043                intr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
5044                intr->intrinsic == nir_intrinsic_image_deref_store ||
5045                intr->intrinsic == nir_intrinsic_image_deref_atomic ||
5046                intr->intrinsic == nir_intrinsic_image_deref_atomic_swap ||
5047                intr->intrinsic == nir_intrinsic_image_deref_samples ||
5048                intr->intrinsic == nir_intrinsic_image_deref_format ||
5049                intr->intrinsic == nir_intrinsic_image_deref_order) {
5050                nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5051                nir_variable *img = nir_deref_instr_get_variable(deref);
5052                if (img != var)
5053                   continue;
5054 
5055                nir_alu_type alu_type;
5056                if (nir_intrinsic_has_src_type(intr))
5057                   alu_type = nir_intrinsic_src_type(intr);
5058                else
5059                   alu_type = nir_intrinsic_dest_type(intr);
5060 
5061                const struct glsl_type *type = glsl_without_array(var->type);
5062                if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5063                   assert(glsl_get_sampler_result_type(type) == nir_get_glsl_base_type_for_nir_type(alu_type));
5064                   continue;
5065                }
5066                const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5067                if (glsl_type_is_array(var->type))
5068                   img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5069                var->type = img_type;
5070                rewrite_cl_derefs(nir, var);
5071                return;
5072             }
5073          }
5074       }
5075    }
5076    nir_foreach_function_impl(impl, nir) {
5077       nir_foreach_block(block, impl) {
5078          nir_foreach_instr_safe(instr, block) {
5079             if (instr->type != nir_instr_type_intrinsic)
5080                continue;
5081             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5082             if (intr->intrinsic != nir_intrinsic_image_deref_size)
5083                continue;
5084             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5085             nir_variable *img = nir_deref_instr_get_variable(deref);
5086             if (img != var)
5087                continue;
5088             nir_alu_type alu_type = nir_type_uint32;
5089             const struct glsl_type *type = glsl_without_array(var->type);
5090             if (glsl_get_sampler_result_type(type) != GLSL_TYPE_VOID) {
5091                continue;
5092             }
5093             const struct glsl_type *img_type = glsl_image_type(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type), nir_get_glsl_base_type_for_nir_type(alu_type));
5094             if (glsl_type_is_array(var->type))
5095                img_type = glsl_array_type(img_type, glsl_array_size(var->type), glsl_get_explicit_stride(var->type));
5096             var->type = img_type;
5097             rewrite_cl_derefs(nir, var);
5098             return;
5099          }
5100       }
5101    }
5102    var->data.mode = nir_var_shader_temp;
5103 }
5104 
5105 static bool
type_sampler_vars(nir_shader * nir)5106 type_sampler_vars(nir_shader *nir)
5107 {
5108    bool progress = false;
5109    nir_foreach_function_impl(impl, nir) {
5110       nir_foreach_block(block, impl) {
5111          nir_foreach_instr(instr, block) {
5112             if (instr->type != nir_instr_type_tex)
5113                continue;
5114             nir_tex_instr *tex = nir_instr_as_tex(instr);
5115             nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(tex->src[nir_tex_instr_src_index(tex, nir_tex_src_texture_deref)].src.ssa->parent_instr));
5116             assert(var);
5117             if (glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID &&
5118                 nir_tex_instr_is_query(tex))
5119                continue;
5120             const struct glsl_type *img_type = glsl_sampler_type(glsl_get_sampler_dim(glsl_without_array(var->type)), tex->is_shadow, tex->is_array, nir_get_glsl_base_type_for_nir_type(tex->dest_type));
5121             unsigned size = glsl_type_is_array(var->type) ? glsl_array_size(var->type) : 1;
5122             if (size > 1)
5123                img_type = glsl_array_type(img_type, size, 0);
5124             var->type = img_type;
5125             progress = true;
5126          }
5127       }
5128    }
5129    return progress;
5130 }
5131 
5132 static bool
type_images(nir_shader * nir)5133 type_images(nir_shader *nir)
5134 {
5135    bool progress = false;
5136    progress |= type_sampler_vars(nir);
5137    nir_foreach_variable_with_modes(var, nir, nir_var_image) {
5138       type_image(nir, var);
5139       progress = true;
5140    }
5141    if (progress) {
5142       nir_fixup_deref_types(nir);
5143       nir_fixup_deref_modes(nir);
5144    }
5145    return progress;
5146 }
5147 
5148 /* attempt to assign io for separate shaders */
5149 static bool
fixup_io_locations(nir_shader * nir)5150 fixup_io_locations(nir_shader *nir)
5151 {
5152    nir_variable_mode modes;
5153    if (nir->info.stage != MESA_SHADER_FRAGMENT && nir->info.stage != MESA_SHADER_VERTEX)
5154       modes = nir_var_shader_in | nir_var_shader_out;
5155    else
5156       modes = nir->info.stage == MESA_SHADER_FRAGMENT ? nir_var_shader_in : nir_var_shader_out;
5157    u_foreach_bit(mode, modes) {
5158       nir_variable_mode m = BITFIELD_BIT(mode);
5159       if ((m == nir_var_shader_in && ((nir->info.inputs_read & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == nir->info.inputs_read)) ||
5160           (m == nir_var_shader_out && ((nir->info.outputs_written | nir->info.outputs_read) & BITFIELD64_MASK(VARYING_SLOT_VAR1)) == (nir->info.outputs_written | nir->info.outputs_read))) {
5161          /* this is a special heuristic to catch ARB/fixedfunc shaders which have different rules:
5162           * - i/o interface blocks don't need to match
5163           * - any location can be present or not
5164           * - it just has to work
5165           *
5166           * VAR0 is the only user varying that mesa can produce in this case, so overwrite POS
5167           * since it's a builtin and yolo it with all the other legacy crap
5168           */
5169          nir_foreach_variable_with_modes(var, nir, m) {
5170             if (nir_slot_is_sysval_output(var->data.location, MESA_SHADER_NONE))
5171                continue;
5172             if (var->data.location == VARYING_SLOT_VAR0)
5173                var->data.driver_location = 0;
5174             else if (var->data.patch)
5175                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5176             else
5177                var->data.driver_location = var->data.location;
5178          }
5179          continue;
5180       }
5181       /* i/o interface blocks are required to be EXACT matches between stages:
5182       * iterate over all locations and set locations incrementally
5183       */
5184       unsigned slot = 0;
5185       for (unsigned i = 0; i < VARYING_SLOT_TESS_MAX; i++) {
5186          if (nir_slot_is_sysval_output(i, MESA_SHADER_NONE))
5187             continue;
5188          bool found = false;
5189          unsigned size = 0;
5190          nir_foreach_variable_with_modes(var, nir, m) {
5191             if (var->data.location != i)
5192                continue;
5193             /* only add slots for non-component vars or first-time component vars */
5194             if (!var->data.location_frac || !size) {
5195                /* ensure variable is given enough slots */
5196                if (nir_is_arrayed_io(var, nir->info.stage))
5197                   size += glsl_count_vec4_slots(glsl_get_array_element(var->type), false, false);
5198                else
5199                   size += glsl_count_vec4_slots(var->type, false, false);
5200             }
5201             if (var->data.patch)
5202                var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
5203             else
5204                var->data.driver_location = slot;
5205             found = true;
5206          }
5207          slot += size;
5208          if (found) {
5209             /* ensure the consumed slots aren't double iterated */
5210             i += size - 1;
5211          } else {
5212             /* locations used between stages are not required to be contiguous */
5213             if (i >= VARYING_SLOT_VAR0)
5214                slot++;
5215          }
5216       }
5217    }
5218    return true;
5219 }
5220 
5221 static uint64_t
zink_flat_flags(struct nir_shader * shader)5222 zink_flat_flags(struct nir_shader *shader)
5223 {
5224    uint64_t flat_flags = 0;
5225    nir_foreach_shader_in_variable(var, shader) {
5226       if (var->data.interpolation == INTERP_MODE_FLAT)
5227          flat_flags |= BITFIELD64_BIT(var->data.location);
5228    }
5229 
5230    return flat_flags;
5231 }
5232 
5233 struct rework_io_state {
5234    /* these are search criteria */
5235    bool indirect_only;
5236    unsigned location;
5237    nir_variable_mode mode;
5238    gl_shader_stage stage;
5239    nir_shader *nir;
5240    const char *name;
5241 
5242    /* these are found by scanning */
5243    bool arrayed_io;
5244    bool medium_precision;
5245    bool fb_fetch_output;
5246    bool dual_source_blend_index;
5247    uint32_t component_mask;
5248    uint32_t ignored_component_mask;
5249    unsigned array_size;
5250    unsigned bit_size;
5251    unsigned base;
5252    nir_alu_type type;
5253    /* must be last */
5254    char *newname;
5255 };
5256 
5257 /* match an existing variable against the rework state */
5258 static nir_variable *
find_rework_var(nir_shader * nir,struct rework_io_state * ris)5259 find_rework_var(nir_shader *nir, struct rework_io_state *ris)
5260 {
5261    nir_foreach_variable_with_modes(var, nir, ris->mode) {
5262       const struct glsl_type *type = var->type;
5263       if (nir_is_arrayed_io(var, nir->info.stage))
5264          type = glsl_get_array_element(type);
5265       if (var->data.fb_fetch_output != ris->fb_fetch_output)
5266          continue;
5267       if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out && ris->dual_source_blend_index != var->data.index)
5268          continue;
5269       unsigned num_slots = var->data.compact ? DIV_ROUND_UP(glsl_array_size(type), 4) : glsl_count_attribute_slots(type, false);
5270       if (var->data.location > ris->location + ris->array_size || var->data.location + num_slots <= ris->location)
5271          continue;
5272       unsigned num_components = glsl_get_vector_elements(glsl_without_array(type));
5273       assert(!glsl_type_contains_64bit(type));
5274       uint32_t component_mask = ris->component_mask ? ris->component_mask : BITFIELD_MASK(4);
5275       if (BITFIELD_RANGE(var->data.location_frac, num_components) & component_mask)
5276          return var;
5277    }
5278    return NULL;
5279 }
5280 
5281 static void
update_io_var_name(struct rework_io_state * ris,const char * name)5282 update_io_var_name(struct rework_io_state *ris, const char *name)
5283 {
5284    if (!(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV)))
5285       return;
5286    if (!name)
5287       return;
5288    if (ris->name && !strcmp(ris->name, name))
5289       return;
5290    if (ris->newname && !strcmp(ris->newname, name))
5291       return;
5292    if (ris->newname) {
5293       ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->newname, name);
5294    } else if (ris->name) {
5295       ris->newname = ralloc_asprintf(ris->nir, "%s_%s", ris->name, name);
5296    } else {
5297       ris->newname = ralloc_strdup(ris->nir, name);
5298    }
5299 }
5300 
5301 /* check/update tracking state for variable info */
5302 static void
update_io_var_state(nir_intrinsic_instr * intr,struct rework_io_state * ris)5303 update_io_var_state(nir_intrinsic_instr *intr, struct rework_io_state *ris)
5304 {
5305    bool is_load = false;
5306    bool is_input = false;
5307    bool is_interp = false;
5308    filter_io_instr(intr, &is_load, &is_input, &is_interp);
5309    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5310    unsigned frac = nir_intrinsic_component(intr);
5311    /* the mask of components for the instruction */
5312    uint32_t cmask = is_load ? BITFIELD_RANGE(frac, intr->num_components) : (nir_intrinsic_write_mask(intr) << frac);
5313 
5314    /* always check for existing variables first */
5315    struct rework_io_state test = {
5316       .location = ris->location,
5317       .mode = ris->mode,
5318       .stage = ris->stage,
5319       .arrayed_io = io_instr_is_arrayed(intr),
5320       .medium_precision = sem.medium_precision,
5321       .fb_fetch_output = sem.fb_fetch_output,
5322       .dual_source_blend_index = sem.dual_source_blend_index,
5323       .component_mask = cmask,
5324       .array_size = sem.num_slots > 1 ? sem.num_slots : 0,
5325    };
5326    if (find_rework_var(ris->nir, &test))
5327       return;
5328 
5329    /* filter ignored components to scan later:
5330     * - ignore no-overlapping-components case
5331     * - always match fbfetch and dual src blend
5332     */
5333    if (ris->component_mask &&
5334        (!(ris->component_mask & cmask) || ris->fb_fetch_output != sem.fb_fetch_output || ris->dual_source_blend_index != sem.dual_source_blend_index)) {
5335       ris->ignored_component_mask |= cmask;
5336       return;
5337    }
5338 
5339    assert(!ris->indirect_only || sem.num_slots > 1);
5340    if (sem.num_slots > 1)
5341       ris->array_size = MAX2(ris->array_size, sem.num_slots);
5342 
5343    assert(!ris->component_mask || ris->arrayed_io == io_instr_is_arrayed(intr));
5344    ris->arrayed_io = io_instr_is_arrayed(intr);
5345 
5346    ris->component_mask |= cmask;
5347 
5348    unsigned bit_size = is_load ? intr->def.bit_size : nir_src_bit_size(intr->src[0]);
5349    assert(!ris->bit_size || ris->bit_size == bit_size);
5350    ris->bit_size = bit_size;
5351 
5352    nir_alu_type type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5353    if (ris->type) {
5354       /* in the case of clashing types, this heuristic guarantees some semblance of a match */
5355       if (ris->type & nir_type_float || type & nir_type_float) {
5356          ris->type = nir_type_float | bit_size;
5357       } else if (ris->type & nir_type_int || type & nir_type_int) {
5358          ris->type = nir_type_int | bit_size;
5359       } else if (ris->type & nir_type_uint || type & nir_type_uint) {
5360          ris->type = nir_type_uint | bit_size;
5361       } else {
5362          assert(bit_size == 1);
5363          ris->type = nir_type_bool;
5364       }
5365    } else {
5366       ris->type = type;
5367    }
5368 
5369    update_io_var_name(ris, intr->name);
5370 
5371    ris->medium_precision |= sem.medium_precision;
5372    ris->fb_fetch_output |= sem.fb_fetch_output;
5373    ris->dual_source_blend_index |= sem.dual_source_blend_index;
5374    if (ris->stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5375       ris->base = nir_intrinsic_base(intr);
5376 }
5377 
5378 /* instruction-level scanning for variable data */
5379 static bool
scan_io_var_usage(nir_builder * b,nir_intrinsic_instr * intr,void * data)5380 scan_io_var_usage(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5381 {
5382    struct rework_io_state *ris = data;
5383    bool is_load = false;
5384    bool is_input = false;
5385    bool is_interp = false;
5386    /* mode-based filtering */
5387    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5388       return false;
5389    if (ris->mode == nir_var_shader_in) {
5390       if (!is_input)
5391          return false;
5392    } else {
5393       if (is_input)
5394          return false;
5395    }
5396    /* location-based filtering */
5397    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5398    if (sem.location != ris->location && (ris->location > sem.location || ris->location + ris->array_size <= sem.location))
5399       return false;
5400 
5401    /* only scan indirect i/o when indirect_only is set */
5402    nir_src *src_offset = nir_get_io_offset_src(intr);
5403    if (!nir_src_is_const(*src_offset)) {
5404       if (!ris->indirect_only)
5405          return false;
5406       update_io_var_state(intr, ris);
5407       return false;
5408    }
5409 
5410    /* don't scan direct i/o when indirect_only is set */
5411    if (ris->indirect_only)
5412       return false;
5413 
5414    update_io_var_state(intr, ris);
5415    return false;
5416 }
5417 
5418 /* scan a given i/o slot for state info */
5419 static struct rework_io_state
scan_io_var_slot(nir_shader * nir,nir_variable_mode mode,unsigned location,bool scan_indirects)5420 scan_io_var_slot(nir_shader *nir, nir_variable_mode mode, unsigned location, bool scan_indirects)
5421 {
5422    struct rework_io_state ris = {
5423       .location = location,
5424       .mode = mode,
5425       .stage = nir->info.stage,
5426       .nir = nir,
5427    };
5428 
5429    struct rework_io_state test;
5430    do {
5431       update_io_var_name(&test, ris.newname ? ris.newname : ris.name);
5432       test = ris;
5433       /* always run indirect scan first to detect potential overlaps */
5434       if (scan_indirects) {
5435          ris.indirect_only = true;
5436          nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5437       }
5438       ris.indirect_only = false;
5439       nir_shader_intrinsics_pass(nir, scan_io_var_usage, nir_metadata_all, &ris);
5440       /* keep scanning until no changes found */
5441    } while (memcmp(&ris, &test, offsetof(struct rework_io_state, newname)));
5442    return ris;
5443 }
5444 
5445 /* create a variable using explicit/scan info */
5446 static void
create_io_var(nir_shader * nir,struct rework_io_state * ris)5447 create_io_var(nir_shader *nir, struct rework_io_state *ris)
5448 {
5449    char name[1024];
5450    assert(ris->component_mask);
5451    if (ris->newname || ris->name) {
5452       snprintf(name, sizeof(name), "%s", ris->newname ? ris->newname : ris->name);
5453    /* always use builtin name where possible */
5454    } else if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in) {
5455       snprintf(name, sizeof(name), "%s", gl_vert_attrib_name(ris->location));
5456    } else if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_out) {
5457       snprintf(name, sizeof(name), "%s", gl_frag_result_name(ris->location));
5458    } else if (nir_slot_is_sysval_output(ris->location, nir->info.stage)) {
5459       snprintf(name, sizeof(name), "%s", gl_varying_slot_name_for_stage(ris->location, nir->info.stage));
5460    } else {
5461       int c = ffs(ris->component_mask) - 1;
5462       if (c)
5463          snprintf(name, sizeof(name), "slot_%u_c%u", ris->location, c);
5464       else
5465          snprintf(name, sizeof(name), "slot_%u", ris->location);
5466    }
5467    /* calculate vec/array type */
5468    int frac = ffs(ris->component_mask) - 1;
5469    int num_components = util_last_bit(ris->component_mask) - frac;
5470    assert(ris->component_mask == BITFIELD_RANGE(frac, num_components));
5471    const struct glsl_type *vec_type = glsl_vector_type(nir_get_glsl_base_type_for_nir_type(ris->type), num_components);
5472    if (ris->array_size)
5473       vec_type = glsl_array_type(vec_type, ris->array_size, glsl_get_explicit_stride(vec_type));
5474    if (ris->arrayed_io) {
5475       /* tess size may be unknown with generated tcs */
5476       unsigned arrayed = nir->info.stage == MESA_SHADER_GEOMETRY ?
5477                          nir->info.gs.vertices_in : 32 /* MAX_PATCH_VERTICES */;
5478       vec_type = glsl_array_type(vec_type, arrayed, glsl_get_explicit_stride(vec_type));
5479    }
5480    nir_variable *var = nir_variable_create(nir, ris->mode, vec_type, name);
5481    var->data.location_frac = frac;
5482    var->data.location = ris->location;
5483    /* gallium vertex inputs use intrinsic 'base' indexing */
5484    if (nir->info.stage == MESA_SHADER_VERTEX && ris->mode == nir_var_shader_in)
5485       var->data.driver_location = ris->base;
5486    var->data.patch = ris->location >= VARYING_SLOT_PATCH0 ||
5487                      ((nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
5488                       (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER));
5489    /* set flat by default: add_derefs will fill this in later after more shader passes */
5490    if (nir->info.stage == MESA_SHADER_FRAGMENT && ris->mode == nir_var_shader_in)
5491       var->data.interpolation = INTERP_MODE_FLAT;
5492    var->data.fb_fetch_output = ris->fb_fetch_output;
5493    var->data.index = ris->dual_source_blend_index;
5494    var->data.precision = ris->medium_precision;
5495    /* only clip/cull dist and tess levels are compact */
5496    if (nir->info.stage != MESA_SHADER_VERTEX || ris->mode != nir_var_shader_in)
5497       var->data.compact = is_clipcull_dist(ris->location) || (ris->location == VARYING_SLOT_TESS_LEVEL_INNER || ris->location == VARYING_SLOT_TESS_LEVEL_OUTER);
5498 }
5499 
5500 /* loop the i/o mask and generate variables for specified locations */
5501 static void
loop_io_var_mask(nir_shader * nir,nir_variable_mode mode,bool indirect,bool patch,uint64_t mask)5502 loop_io_var_mask(nir_shader *nir, nir_variable_mode mode, bool indirect, bool patch, uint64_t mask)
5503 {
5504    ASSERTED bool is_vertex_input = nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in;
5505    u_foreach_bit64(slot, mask) {
5506       if (patch)
5507          slot += VARYING_SLOT_PATCH0;
5508 
5509       /* this should've been handled explicitly */
5510       assert(is_vertex_input || !is_clipcull_dist(slot));
5511 
5512       unsigned remaining = 0;
5513       do {
5514          /* scan the slot for usage */
5515          struct rework_io_state ris = scan_io_var_slot(nir, mode, slot, indirect);
5516          /* one of these must be true or things have gone very wrong */
5517          assert(indirect || ris.component_mask || find_rework_var(nir, &ris) || remaining);
5518          /* release builds only */
5519          if (!ris.component_mask)
5520             break;
5521 
5522          /* whatever reaches this point is either enough info to create a variable or an existing variable */
5523          if (!find_rework_var(nir, &ris))
5524             create_io_var(nir, &ris);
5525          /* scanning may detect multiple potential variables per location at component offsets: process again */
5526          remaining = ris.ignored_component_mask;
5527       } while (remaining);
5528    }
5529 }
5530 
5531 /* for a given mode, generate variables */
5532 static void
rework_io_vars(nir_shader * nir,nir_variable_mode mode,struct zink_shader * zs)5533 rework_io_vars(nir_shader *nir, nir_variable_mode mode, struct zink_shader *zs)
5534 {
5535    assert(mode == nir_var_shader_out || mode == nir_var_shader_in);
5536    assert(util_bitcount(mode) == 1);
5537    bool found = false;
5538    /* if no i/o, skip */
5539    if (mode == nir_var_shader_out)
5540       found = nir->info.outputs_written || nir->info.outputs_read || nir->info.patch_outputs_written || nir->info.patch_outputs_read;
5541    else
5542       found = nir->info.inputs_read || nir->info.patch_inputs_read;
5543    if (!found)
5544       return;
5545 
5546    /* use local copies to enable incremental processing */
5547    uint64_t inputs_read = nir->info.inputs_read;
5548    uint64_t inputs_read_indirectly = nir->info.inputs_read_indirectly;
5549    uint64_t outputs_accessed = nir->info.outputs_written | nir->info.outputs_read;
5550    uint64_t outputs_accessed_indirectly = nir->info.outputs_accessed_indirectly;
5551 
5552    /* fragment outputs are special: handle separately */
5553    if (mode == nir_var_shader_out && nir->info.stage == MESA_SHADER_FRAGMENT) {
5554       assert(!outputs_accessed_indirectly);
5555       u_foreach_bit64(slot, outputs_accessed) {
5556          struct rework_io_state ris = {
5557             .location = slot,
5558             .mode = mode,
5559             .stage = nir->info.stage,
5560          };
5561          /* explicitly handle builtins */
5562          switch (slot) {
5563          case FRAG_RESULT_DEPTH:
5564          case FRAG_RESULT_STENCIL:
5565          case FRAG_RESULT_SAMPLE_MASK:
5566             ris.bit_size = 32;
5567             ris.component_mask = 0x1;
5568             ris.type = slot == FRAG_RESULT_DEPTH ? nir_type_float32 : nir_type_uint32;
5569             create_io_var(nir, &ris);
5570             outputs_accessed &= ~BITFIELD64_BIT(slot);
5571             break;
5572          default:
5573             break;
5574          }
5575       }
5576       /* the rest of the outputs can be generated normally */
5577       loop_io_var_mask(nir, mode, false, false, outputs_accessed);
5578       return;
5579    }
5580 
5581    /* vertex inputs are special: handle separately */
5582    if (nir->info.stage == MESA_SHADER_VERTEX && mode == nir_var_shader_in) {
5583       assert(!inputs_read_indirectly);
5584       u_foreach_bit64(slot, inputs_read) {
5585          /* explicitly handle builtins */
5586          if (slot != VERT_ATTRIB_POS && slot != VERT_ATTRIB_POINT_SIZE)
5587             continue;
5588 
5589          uint32_t component_mask = slot == VERT_ATTRIB_POINT_SIZE ? 0x1 : 0xf;
5590          struct rework_io_state ris = {
5591             .location = slot,
5592             .mode = mode,
5593             .stage = nir->info.stage,
5594             .bit_size = 32,
5595             .component_mask = component_mask,
5596             .type = nir_type_float32,
5597             .newname = scan_io_var_slot(nir, nir_var_shader_in, slot, false).newname,
5598          };
5599          create_io_var(nir, &ris);
5600          inputs_read &= ~BITFIELD64_BIT(slot);
5601       }
5602       /* the rest of the inputs can be generated normally */
5603       loop_io_var_mask(nir, mode, false, false, inputs_read);
5604       return;
5605    }
5606 
5607    /* these are the masks to process based on the mode: nothing "special" as above */
5608    uint64_t mask = mode == nir_var_shader_in ? inputs_read : outputs_accessed;
5609    uint64_t indirect_mask = mode == nir_var_shader_in ? inputs_read_indirectly : outputs_accessed_indirectly;
5610    u_foreach_bit64(slot, mask) {
5611       struct rework_io_state ris = {
5612          .location = slot,
5613          .mode = mode,
5614          .stage = nir->info.stage,
5615          .arrayed_io = (mode == nir_var_shader_in ? zs->arrayed_inputs : zs->arrayed_outputs) & BITFIELD64_BIT(slot),
5616       };
5617       /* explicitly handle builtins */
5618       unsigned max_components = 0;
5619       switch (slot) {
5620       case VARYING_SLOT_FOGC:
5621          /* use intr components */
5622          break;
5623       case VARYING_SLOT_POS:
5624       case VARYING_SLOT_CLIP_VERTEX:
5625       case VARYING_SLOT_PNTC:
5626       case VARYING_SLOT_BOUNDING_BOX0:
5627       case VARYING_SLOT_BOUNDING_BOX1:
5628          max_components = 4;
5629          ris.type = nir_type_float32;
5630          break;
5631       case VARYING_SLOT_CLIP_DIST0:
5632          max_components = nir->info.clip_distance_array_size;
5633          assert(max_components);
5634          ris.type = nir_type_float32;
5635          break;
5636       case VARYING_SLOT_CULL_DIST0:
5637          max_components = nir->info.cull_distance_array_size;
5638          assert(max_components);
5639          ris.type = nir_type_float32;
5640          break;
5641       case VARYING_SLOT_CLIP_DIST1:
5642       case VARYING_SLOT_CULL_DIST1:
5643          mask &= ~BITFIELD64_BIT(slot);
5644          indirect_mask &= ~BITFIELD64_BIT(slot);
5645          continue;
5646       case VARYING_SLOT_TESS_LEVEL_OUTER:
5647          max_components = 4;
5648          ris.type = nir_type_float32;
5649          break;
5650       case VARYING_SLOT_TESS_LEVEL_INNER:
5651          max_components = 2;
5652          ris.type = nir_type_float32;
5653          break;
5654       case VARYING_SLOT_PRIMITIVE_ID:
5655       case VARYING_SLOT_LAYER:
5656       case VARYING_SLOT_VIEWPORT:
5657       case VARYING_SLOT_FACE:
5658       case VARYING_SLOT_VIEW_INDEX:
5659       case VARYING_SLOT_VIEWPORT_MASK:
5660          ris.type = nir_type_int32;
5661          max_components = 1;
5662          break;
5663       case VARYING_SLOT_PSIZ:
5664          max_components = 1;
5665          ris.type = nir_type_float32;
5666          break;
5667       default:
5668          break;
5669       }
5670       if (!max_components)
5671          continue;
5672       switch (slot) {
5673       case VARYING_SLOT_CLIP_DIST0:
5674       case VARYING_SLOT_CLIP_DIST1:
5675       case VARYING_SLOT_CULL_DIST0:
5676       case VARYING_SLOT_CULL_DIST1:
5677       case VARYING_SLOT_TESS_LEVEL_OUTER:
5678       case VARYING_SLOT_TESS_LEVEL_INNER:
5679          /* compact arrays */
5680          ris.component_mask = 0x1;
5681          ris.array_size = max_components;
5682          break;
5683       default:
5684          ris.component_mask = BITFIELD_MASK(max_components);
5685          break;
5686       }
5687       ris.bit_size = 32;
5688       create_io_var(nir, &ris);
5689       mask &= ~BITFIELD64_BIT(slot);
5690       /* eliminate clip/cull distance scanning early */
5691       indirect_mask &= ~BITFIELD64_BIT(slot);
5692    }
5693 
5694    /* patch i/o */
5695    if ((nir->info.stage == MESA_SHADER_TESS_CTRL && mode == nir_var_shader_out) ||
5696        (nir->info.stage == MESA_SHADER_TESS_EVAL && mode == nir_var_shader_in)) {
5697       uint64_t patch_outputs_accessed = nir->info.patch_outputs_read | nir->info.patch_outputs_written;
5698       uint64_t indirect_patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read_indirectly : nir->info.patch_outputs_accessed_indirectly;
5699       uint64_t patch_mask = mode == nir_var_shader_in ? nir->info.patch_inputs_read : patch_outputs_accessed;
5700 
5701       loop_io_var_mask(nir, mode, true, true, indirect_patch_mask);
5702       loop_io_var_mask(nir, mode, false, true, patch_mask);
5703    }
5704 
5705    /* regular i/o */
5706    loop_io_var_mask(nir, mode, true, false, indirect_mask);
5707    loop_io_var_mask(nir, mode, false, false, mask);
5708 }
5709 
5710 static int
zink_type_size(const struct glsl_type * type,bool bindless)5711 zink_type_size(const struct glsl_type *type, bool bindless)
5712 {
5713    return glsl_count_attribute_slots(type, false);
5714 }
5715 
5716 static nir_mem_access_size_align
mem_access_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5717 mem_access_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5718                          uint8_t bit_size, uint32_t align,
5719                          uint32_t align_offset, bool offset_is_const,
5720                          const void *cb_data)
5721 {
5722    align = nir_combined_align(align, align_offset);
5723 
5724    assert(util_is_power_of_two_nonzero(align));
5725 
5726    /* simply drop the bit_size for unaligned load/stores */
5727    if (align < (bit_size / 8)) {
5728       return (nir_mem_access_size_align){
5729          .num_components = MIN2(bytes / align, 4),
5730          .bit_size = align * 8,
5731          .align = align,
5732       };
5733    } else {
5734       return (nir_mem_access_size_align){
5735          .num_components = MIN2(bytes / (bit_size / 8), 4),
5736          .bit_size = bit_size,
5737          .align = bit_size / 8,
5738       };
5739    }
5740 }
5741 
5742 static nir_mem_access_size_align
mem_access_scratch_size_align_cb(nir_intrinsic_op intrin,uint8_t bytes,uint8_t bit_size,uint32_t align,uint32_t align_offset,bool offset_is_const,const void * cb_data)5743 mem_access_scratch_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
5744                                  uint8_t bit_size, uint32_t align,
5745                                  uint32_t align_offset, bool offset_is_const,
5746                                  const void *cb_data)
5747 {
5748    bit_size = *(const uint8_t *)cb_data;
5749    align = nir_combined_align(align, align_offset);
5750 
5751    assert(util_is_power_of_two_nonzero(align));
5752 
5753    return (nir_mem_access_size_align){
5754       .num_components = MIN2(bytes / (bit_size / 8), 4),
5755       .bit_size = bit_size,
5756       .align = bit_size / 8,
5757    };
5758 }
5759 
5760 static bool
alias_scratch_memory_scan_bit_size(struct nir_builder * b,nir_intrinsic_instr * instr,void * data)5761 alias_scratch_memory_scan_bit_size(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
5762 {
5763    uint8_t *bit_size = data;
5764    switch (instr->intrinsic) {
5765    case nir_intrinsic_load_scratch:
5766       *bit_size = MIN2(*bit_size, instr->def.bit_size);
5767       return false;
5768    case nir_intrinsic_store_scratch:
5769       *bit_size = MIN2(*bit_size, instr->src[0].ssa->bit_size);
5770       return false;
5771    default:
5772       return false;
5773    }
5774 }
5775 
5776 static bool
alias_scratch_memory(nir_shader * nir)5777 alias_scratch_memory(nir_shader *nir)
5778 {
5779    uint8_t bit_size = 64;
5780 
5781    nir_shader_intrinsics_pass(nir, alias_scratch_memory_scan_bit_size, nir_metadata_all, &bit_size);
5782    nir_lower_mem_access_bit_sizes_options lower_scratch_mem_access_options = {
5783       .modes = nir_var_function_temp,
5784       .may_lower_unaligned_stores_to_atomics = true,
5785       .callback = mem_access_scratch_size_align_cb,
5786       .cb_data = &bit_size,
5787    };
5788    return nir_lower_mem_access_bit_sizes(nir, &lower_scratch_mem_access_options);
5789 }
5790 
5791 static uint8_t
lower_vec816_alu(const nir_instr * instr,const void * cb_data)5792 lower_vec816_alu(const nir_instr *instr, const void *cb_data)
5793 {
5794    return 4;
5795 }
5796 
5797 static unsigned
zink_lower_bit_size_cb(const nir_instr * instr,void * data)5798 zink_lower_bit_size_cb(const nir_instr *instr, void *data)
5799 {
5800    switch (instr->type) {
5801    case nir_instr_type_alu: {
5802       nir_alu_instr *alu = nir_instr_as_alu(instr);
5803       switch (alu->op) {
5804       case nir_op_bit_count:
5805       case nir_op_find_lsb:
5806       case nir_op_ifind_msb:
5807       case nir_op_ufind_msb:
5808          return alu->src[0].src.ssa->bit_size == 32 ? 0 : 32;
5809       default:
5810          return 0;
5811       }
5812    }
5813    default:
5814       return 0;
5815    }
5816 }
5817 
5818 static bool
fix_vertex_input_locations_instr(nir_builder * b,nir_intrinsic_instr * intr,void * data)5819 fix_vertex_input_locations_instr(nir_builder *b, nir_intrinsic_instr *intr, void *data)
5820 {
5821    bool is_load = false;
5822    bool is_input = false;
5823    bool is_interp = false;
5824    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp) || !is_input)
5825       return false;
5826 
5827    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5828    if (sem.location < VERT_ATTRIB_GENERIC0)
5829       return false;
5830    sem.location = VERT_ATTRIB_GENERIC0 + nir_intrinsic_base(intr);
5831    nir_intrinsic_set_io_semantics(intr, sem);
5832    return true;
5833 }
5834 
5835 static bool
fix_vertex_input_locations(nir_shader * nir)5836 fix_vertex_input_locations(nir_shader *nir)
5837 {
5838    if (nir->info.stage != MESA_SHADER_VERTEX)
5839       return false;
5840 
5841    return nir_shader_intrinsics_pass(nir, fix_vertex_input_locations_instr, nir_metadata_all, NULL);
5842 }
5843 
5844 struct trivial_revectorize_state {
5845    bool has_xfb;
5846    uint32_t component_mask;
5847    nir_intrinsic_instr *base;
5848    nir_intrinsic_instr *next_emit_vertex;
5849    nir_intrinsic_instr *merge[NIR_MAX_VEC_COMPONENTS];
5850    struct set *deletions;
5851 };
5852 
5853 /* always skip xfb; scalarized xfb is preferred */
5854 static bool
intr_has_xfb(nir_intrinsic_instr * intr)5855 intr_has_xfb(nir_intrinsic_instr *intr)
5856 {
5857    if (!nir_intrinsic_has_io_xfb(intr))
5858       return false;
5859    for (unsigned i = 0; i < 2; i++) {
5860       if (nir_intrinsic_io_xfb(intr).out[i].num_components || nir_intrinsic_io_xfb2(intr).out[i].num_components) {
5861          return true;
5862       }
5863    }
5864    return false;
5865 }
5866 
5867 /* helper to avoid vectorizing i/o for different vertices */
5868 static nir_intrinsic_instr *
find_next_emit_vertex(nir_intrinsic_instr * intr)5869 find_next_emit_vertex(nir_intrinsic_instr *intr)
5870 {
5871    bool found = false;
5872    nir_foreach_instr_safe(instr, intr->instr.block) {
5873       if (instr->type == nir_instr_type_intrinsic) {
5874          nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5875          if (!found && test_intr != intr)
5876             continue;
5877          if (!found) {
5878             assert(intr == test_intr);
5879             found = true;
5880             continue;
5881          }
5882          if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5883             return test_intr;
5884       }
5885    }
5886    return NULL;
5887 }
5888 
5889 /* scan for vectorizable instrs on a given location */
5890 static bool
trivial_revectorize_intr_scan(nir_shader * nir,nir_intrinsic_instr * intr,struct trivial_revectorize_state * state)5891 trivial_revectorize_intr_scan(nir_shader *nir, nir_intrinsic_instr *intr, struct trivial_revectorize_state *state)
5892 {
5893    nir_intrinsic_instr *base = state->base;
5894 
5895    if (intr == base)
5896       return false;
5897 
5898    if (intr->intrinsic != base->intrinsic)
5899       return false;
5900 
5901    if (_mesa_set_search(state->deletions, intr))
5902       return false;
5903 
5904    bool is_load = false;
5905    bool is_input = false;
5906    bool is_interp = false;
5907    filter_io_instr(intr, &is_load, &is_input, &is_interp);
5908 
5909    nir_io_semantics base_sem = nir_intrinsic_io_semantics(base);
5910    nir_io_semantics test_sem = nir_intrinsic_io_semantics(intr);
5911    nir_alu_type base_type = is_load ? nir_intrinsic_dest_type(base) : nir_intrinsic_src_type(base);
5912    nir_alu_type test_type = is_load ? nir_intrinsic_dest_type(intr) : nir_intrinsic_src_type(intr);
5913    int c = nir_intrinsic_component(intr);
5914    /* already detected */
5915    if (state->component_mask & BITFIELD_BIT(c))
5916       return false;
5917    /* not a match */
5918    if (base_sem.location != test_sem.location || base_sem.num_slots != test_sem.num_slots || base_type != test_type)
5919       return false;
5920    /* only vectorize when all srcs match */
5921    for (unsigned i = !is_input; i < nir_intrinsic_infos[intr->intrinsic].num_srcs; i++) {
5922       if (!nir_srcs_equal(intr->src[i], base->src[i]))
5923          return false;
5924    }
5925    /* never match xfb */
5926    state->has_xfb |= intr_has_xfb(intr);
5927    if (state->has_xfb)
5928       return false;
5929    if (nir->info.stage == MESA_SHADER_GEOMETRY) {
5930       /* only match same vertex */
5931       if (state->next_emit_vertex != find_next_emit_vertex(intr))
5932          return false;
5933    }
5934    uint32_t mask = is_load ? BITFIELD_RANGE(c, intr->num_components) : (nir_intrinsic_write_mask(intr) << c);
5935    state->component_mask |= mask;
5936    u_foreach_bit(component, mask)
5937       state->merge[component] = intr;
5938 
5939    return true;
5940 }
5941 
5942 static bool
trivial_revectorize_scan(struct nir_builder * b,nir_intrinsic_instr * intr,void * data)5943 trivial_revectorize_scan(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
5944 {
5945    bool is_load = false;
5946    bool is_input = false;
5947    bool is_interp = false;
5948    if (!filter_io_instr(intr, &is_load, &is_input, &is_interp))
5949       return false;
5950    if (intr->num_components != 1)
5951       return false;
5952    nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
5953    if (!is_input || b->shader->info.stage != MESA_SHADER_VERTEX) {
5954       /* always ignore compact arrays */
5955       switch (sem.location) {
5956       case VARYING_SLOT_CLIP_DIST0:
5957       case VARYING_SLOT_CLIP_DIST1:
5958       case VARYING_SLOT_CULL_DIST0:
5959       case VARYING_SLOT_CULL_DIST1:
5960       case VARYING_SLOT_TESS_LEVEL_INNER:
5961       case VARYING_SLOT_TESS_LEVEL_OUTER:
5962          return false;
5963       default: break;
5964       }
5965    }
5966    /* always ignore to-be-deleted instrs */
5967    if (_mesa_set_search(data, intr))
5968       return false;
5969 
5970    /* never vectorize xfb */
5971    if (intr_has_xfb(intr))
5972       return false;
5973 
5974    int ic = nir_intrinsic_component(intr);
5975    uint32_t mask = is_load ? BITFIELD_RANGE(ic, intr->num_components) : (nir_intrinsic_write_mask(intr) << ic);
5976    /* already vectorized */
5977    if (util_bitcount(mask) == 4)
5978       return false;
5979    struct trivial_revectorize_state state = {
5980       .component_mask = mask,
5981       .base = intr,
5982       /* avoid clobbering i/o for different vertices */
5983       .next_emit_vertex = b->shader->info.stage == MESA_SHADER_GEOMETRY ? find_next_emit_vertex(intr) : NULL,
5984       .deletions = data,
5985    };
5986    u_foreach_bit(bit, mask)
5987       state.merge[bit] = intr;
5988    bool progress = false;
5989    nir_foreach_instr(instr, intr->instr.block) {
5990       if (instr->type != nir_instr_type_intrinsic)
5991          continue;
5992       nir_intrinsic_instr *test_intr = nir_instr_as_intrinsic(instr);
5993       /* no matching across vertex emission */
5994       if (test_intr->intrinsic == nir_intrinsic_emit_vertex)
5995          break;
5996       progress |= trivial_revectorize_intr_scan(b->shader, test_intr, &state);
5997    }
5998    if (!progress || state.has_xfb)
5999       return false;
6000 
6001    /* verify nothing crazy happened */
6002    assert(state.component_mask);
6003    for (unsigned i = 0; i < 4; i++) {
6004       assert(!state.merge[i] || !intr_has_xfb(state.merge[i]));
6005    }
6006 
6007    unsigned first_component = ffs(state.component_mask) - 1;
6008    unsigned num_components = util_bitcount(state.component_mask);
6009    unsigned num_contiguous = 0;
6010    uint32_t contiguous_mask = 0;
6011    for (unsigned i = 0; i < num_components; i++) {
6012       unsigned c = i + first_component;
6013       /* calc mask of contiguous components to vectorize */
6014       if (state.component_mask & BITFIELD_BIT(c)) {
6015          num_contiguous++;
6016          contiguous_mask |= BITFIELD_BIT(c);
6017       }
6018       /* on the first gap or the the last component, vectorize */
6019       if (!(state.component_mask & BITFIELD_BIT(c)) || i == num_components - 1) {
6020          if (num_contiguous > 1) {
6021             /* reindex to enable easy src/dest index comparison */
6022             nir_index_ssa_defs(nir_shader_get_entrypoint(b->shader));
6023             /* determine the first/last instr to use for the base (vectorized) load/store */
6024             unsigned first_c = ffs(contiguous_mask) - 1;
6025             nir_intrinsic_instr *base = NULL;
6026             unsigned test_idx = is_load ? UINT32_MAX : 0;
6027             for (unsigned j = 0; j < num_contiguous; j++) {
6028                unsigned merge_c = j + first_c;
6029                nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6030                /* avoid breaking ssa ordering by using:
6031                 * - first instr for vectorized load
6032                 * - last instr for vectorized store
6033                 * this guarantees all srcs have been seen
6034                 */
6035                if ((is_load && merge_intr->def.index < test_idx) ||
6036                    (!is_load && merge_intr->src[0].ssa->index >= test_idx)) {
6037                   test_idx = is_load ? merge_intr->def.index : merge_intr->src[0].ssa->index;
6038                   base = merge_intr;
6039                }
6040             }
6041             assert(base);
6042             /* update instr components */
6043             nir_intrinsic_set_component(base, nir_intrinsic_component(state.merge[first_c]));
6044             unsigned orig_components = base->num_components;
6045             base->num_components = num_contiguous;
6046             /* do rewrites after loads and before stores */
6047             b->cursor = is_load ? nir_after_instr(&base->instr) : nir_before_instr(&base->instr);
6048             if (is_load) {
6049                base->def.num_components = num_contiguous;
6050                /* iterate the contiguous loaded components and rewrite merged dests */
6051                for (unsigned j = 0; j < num_contiguous; j++) {
6052                   unsigned merge_c = j + first_c;
6053                   nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6054                   /* detect if the merged instr loaded multiple components and use swizzle mask for rewrite */
6055                   unsigned use_components = merge_intr == base ? orig_components : merge_intr->def.num_components;
6056                   nir_def *swiz = nir_channels(b, &base->def, BITFIELD_RANGE(j, use_components));
6057                   nir_def_rewrite_uses_after(&merge_intr->def, swiz, merge_intr == base ? swiz->parent_instr : &merge_intr->instr);
6058                   j += use_components - 1;
6059                }
6060             } else {
6061                nir_def *comp[NIR_MAX_VEC_COMPONENTS];
6062                /* generate swizzled vec of store components and rewrite store src */
6063                for (unsigned j = 0; j < num_contiguous; j++) {
6064                   unsigned merge_c = j + first_c;
6065                   nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6066                   /* detect if the merged instr stored multiple components and extract them for rewrite */
6067                   unsigned use_components = merge_intr == base ? orig_components : merge_intr->num_components;
6068                   for (unsigned k = 0; k < use_components; k++)
6069                      comp[j + k] = nir_channel(b, merge_intr->src[0].ssa, k);
6070                   j += use_components - 1;
6071                }
6072                nir_def *val = nir_vec(b, comp, num_contiguous);
6073                nir_src_rewrite(&base->src[0], val);
6074                nir_intrinsic_set_write_mask(base, BITFIELD_MASK(num_contiguous));
6075             }
6076             /* deleting instructions during a foreach explodes the compiler, so delete later */
6077             for (unsigned j = 0; j < num_contiguous; j++) {
6078                unsigned merge_c = j + first_c;
6079                nir_intrinsic_instr *merge_intr = state.merge[merge_c];
6080                if (merge_intr != base)
6081                   _mesa_set_add(data, &merge_intr->instr);
6082             }
6083          }
6084          contiguous_mask = 0;
6085          num_contiguous = 0;
6086       }
6087    }
6088 
6089    return true;
6090 }
6091 
6092 /* attempt to revectorize scalar i/o, ignoring xfb and "hard stuff" */
6093 static bool
trivial_revectorize(nir_shader * nir)6094 trivial_revectorize(nir_shader *nir)
6095 {
6096    struct set deletions;
6097 
6098    if (nir->info.stage > MESA_SHADER_FRAGMENT)
6099       return false;
6100 
6101    _mesa_set_init(&deletions, NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6102    bool progress = nir_shader_intrinsics_pass(nir, trivial_revectorize_scan, nir_metadata_dominance, &deletions);
6103    /* now it's safe to delete */
6104    set_foreach_remove(&deletions, entry) {
6105       nir_instr *instr = (void*)entry->key;
6106       nir_instr_remove(instr);
6107    }
6108    ralloc_free(deletions.table);
6109    return progress;
6110 }
6111 
6112 static bool
flatten_image_arrays_intr(struct nir_builder * b,nir_instr * instr,void * data)6113 flatten_image_arrays_intr(struct nir_builder *b, nir_instr *instr, void *data)
6114 {
6115    if (instr->type != nir_instr_type_deref)
6116       return false;
6117 
6118    nir_deref_instr *deref = nir_instr_as_deref(instr);
6119    if (deref->deref_type != nir_deref_type_array)
6120       return false;
6121    nir_deref_instr *parent = nir_deref_instr_parent(deref);
6122    if (!parent || parent->deref_type != nir_deref_type_array)
6123       return false;
6124    nir_variable *var = nir_deref_instr_get_variable(deref);
6125    const struct glsl_type *type = glsl_without_array(var->type);
6126    if (type == var->type || (!glsl_type_is_sampler(type) && !glsl_type_is_image(type)))
6127       return false;
6128 
6129    nir_deref_instr *parent_parent = nir_deref_instr_parent(parent);
6130    int parent_size = glsl_array_size(parent->type);
6131    b->cursor = nir_after_instr(instr);
6132    nir_deref_instr *new_deref = nir_build_deref_array(b, parent_parent, nir_iadd(b, nir_imul_imm(b, parent->arr.index.ssa, parent_size), deref->arr.index.ssa));
6133    nir_def_rewrite_uses_after(&deref->def, &new_deref->def, &new_deref->instr);
6134    _mesa_set_add(data, instr);
6135    _mesa_set_add(data, &parent->instr);
6136    return true;
6137 }
6138 
6139 static bool
flatten_image_arrays(nir_shader * nir)6140 flatten_image_arrays(nir_shader *nir)
6141 {
6142    bool progress = false;
6143    nir_foreach_variable_with_modes(var, nir, nir_var_uniform | nir_var_image) {
6144       const struct glsl_type *type = glsl_without_array(var->type);
6145       if (!glsl_type_is_sampler(type) && !glsl_type_is_image(type))
6146          continue;
6147       if (type == var->type)
6148          continue;
6149       var->type = glsl_array_type(type, glsl_get_aoa_size(var->type), sizeof(void*));
6150       progress = true;
6151    }
6152    struct set *deletions = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
6153    progress |= nir_shader_instructions_pass(nir, flatten_image_arrays_intr, nir_metadata_dominance, deletions);
6154    set_foreach_remove(deletions, he) {
6155       nir_instr *instr = (void*)he->key;
6156       nir_instr_remove_v(instr);
6157    }
6158    _mesa_set_destroy(deletions, NULL);
6159    if (progress)
6160       nir_fixup_deref_types(nir);
6161    return progress;
6162 }
6163 
6164 static bool
bound_image_arrays_instr(struct nir_builder * b,nir_instr * instr,void * data)6165 bound_image_arrays_instr(struct nir_builder *b, nir_instr *instr, void *data)
6166 {
6167    if (instr->type != nir_instr_type_deref)
6168       return false;
6169 
6170    nir_deref_instr *deref = nir_instr_as_deref(instr);
6171    if (deref->deref_type != nir_deref_type_array)
6172       return false;
6173 
6174    if (!nir_src_is_const(deref->arr.index))
6175       return false;
6176    nir_deref_instr *parent = nir_deref_instr_parent(deref);
6177    int parent_size = glsl_array_size(parent->type);
6178    unsigned idx = nir_src_as_uint(deref->arr.index);
6179    if (idx >= parent_size) {
6180       b->cursor = nir_before_instr(instr);
6181       nir_src_rewrite(&deref->arr.index, nir_imm_zero(b, 1, 32));
6182       return true;
6183    }
6184    return false;
6185 }
6186 
6187 static bool
bound_image_arrays(nir_shader * nir)6188 bound_image_arrays(nir_shader *nir)
6189 {
6190    return nir_shader_instructions_pass(nir, bound_image_arrays_instr, nir_metadata_dominance, NULL);
6191 }
6192 
6193 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir)6194 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir)
6195 {
6196    struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6197 
6198    zs->has_edgeflags = nir->info.stage == MESA_SHADER_VERTEX &&
6199                        nir->info.outputs_written & VARYING_BIT_EDGE;
6200 
6201    zs->sinfo.have_vulkan_memory_model = screen->info.have_KHR_vulkan_memory_model;
6202    zs->sinfo.have_workgroup_memory_explicit_layout = screen->info.have_KHR_workgroup_memory_explicit_layout;
6203    if (screen->info.have_KHR_shader_float_controls) {
6204       if (screen->info.props12.shaderDenormFlushToZeroFloat16)
6205          zs->sinfo.float_controls.flush_denorms |= 0x1;
6206       if (screen->info.props12.shaderDenormFlushToZeroFloat32)
6207          zs->sinfo.float_controls.flush_denorms |= 0x2;
6208       if (screen->info.props12.shaderDenormFlushToZeroFloat64)
6209          zs->sinfo.float_controls.flush_denorms |= 0x4;
6210 
6211       if (screen->info.props12.shaderDenormPreserveFloat16)
6212          zs->sinfo.float_controls.preserve_denorms |= 0x1;
6213       if (screen->info.props12.shaderDenormPreserveFloat32)
6214          zs->sinfo.float_controls.preserve_denorms |= 0x2;
6215       if (screen->info.props12.shaderDenormPreserveFloat64)
6216          zs->sinfo.float_controls.preserve_denorms |= 0x4;
6217 
6218       zs->sinfo.float_controls.denorms_all_independence =
6219          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
6220 
6221       zs->sinfo.float_controls.denorms_32_bit_independence =
6222          zs->sinfo.float_controls.denorms_all_independence ||
6223          screen->info.props12.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
6224    }
6225    zs->sinfo.bindless_set_idx = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6226 
6227    util_queue_fence_init(&zs->precompile.fence);
6228    util_dynarray_init(&zs->pipeline_libs, zs);
6229    zs->hash = _mesa_hash_pointer(zs);
6230 
6231    zs->programs = _mesa_pointer_set_create(NULL);
6232    simple_mtx_init(&zs->lock, mtx_plain);
6233    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6234    zs->info.name = ralloc_strdup(zs, nir->info.name);
6235 
6236    zs->can_inline = true;
6237    zs->nir = nir;
6238 
6239    if (nir->info.stage != MESA_SHADER_KERNEL)
6240       match_tex_dests(nir, zs, true);
6241 
6242    return zs;
6243 }
6244 
6245 void
zink_shader_init(struct zink_screen * screen,struct zink_shader * zs)6246 zink_shader_init(struct zink_screen *screen, struct zink_shader *zs)
6247 {
6248    bool have_psiz = false;
6249    nir_shader *nir = zs->nir;
6250 
6251    if (nir->info.stage == MESA_SHADER_KERNEL) {
6252       nir_lower_mem_access_bit_sizes_options lower_mem_access_options = {
6253          .modes = nir_var_all ^ nir_var_function_temp,
6254          .may_lower_unaligned_stores_to_atomics = true,
6255          .callback = mem_access_size_align_cb,
6256          .cb_data = screen,
6257       };
6258       NIR_PASS_V(nir, nir_lower_mem_access_bit_sizes, &lower_mem_access_options);
6259       NIR_PASS_V(nir, nir_lower_bit_size, zink_lower_bit_size_cb, NULL);
6260       NIR_PASS_V(nir, alias_scratch_memory);
6261       NIR_PASS_V(nir, nir_lower_alu_width, lower_vec816_alu, NULL);
6262       NIR_PASS_V(nir, nir_lower_alu_vec8_16_srcs);
6263    }
6264 
6265    NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in | nir_var_shader_out, NULL, NULL);
6266    optimize_nir(nir, NULL, true);
6267    NIR_PASS_V(nir, bound_image_arrays);
6268    NIR_PASS_V(nir, flatten_image_arrays);
6269    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
6270       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
6271          NIR_PASS_V(nir, lower_bindless_io);
6272          break;
6273       }
6274    }
6275    if (nir->info.stage < MESA_SHADER_FRAGMENT)
6276       nir_gather_xfb_info_from_intrinsics(nir);
6277    NIR_PASS_V(nir, fix_vertex_input_locations);
6278    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6279    scan_nir(screen, nir, zs);
6280    NIR_PASS_V(nir, nir_opt_vectorize, NULL, NULL);
6281    NIR_PASS_V(nir, trivial_revectorize);
6282    if (nir->info.io_lowered) {
6283       rework_io_vars(nir, nir_var_shader_in, zs);
6284       rework_io_vars(nir, nir_var_shader_out, zs);
6285       nir_sort_variables_by_location(nir, nir_var_shader_in);
6286       nir_sort_variables_by_location(nir, nir_var_shader_out);
6287    }
6288 
6289    if (nir->info.stage < MESA_SHADER_COMPUTE)
6290       create_gfx_pushconst(nir);
6291 
6292    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
6293             nir->info.stage == MESA_SHADER_TESS_EVAL)
6294       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
6295 
6296    if (nir->info.stage < MESA_SHADER_FRAGMENT)
6297       have_psiz = check_psiz(nir);
6298    if (nir->info.stage == MESA_SHADER_FRAGMENT)
6299       zs->flat_flags = zink_flat_flags(nir);
6300 
6301    if (!gl_shader_stage_is_compute(nir->info.stage) && nir->info.separate_shader)
6302       NIR_PASS_V(nir, fixup_io_locations);
6303 
6304    NIR_PASS_V(nir, lower_basevertex);
6305    NIR_PASS_V(nir, lower_baseinstance);
6306    NIR_PASS_V(nir, split_bitfields);
6307    if (!screen->info.feats.features.shaderStorageImageMultisample)
6308       NIR_PASS_V(nir, strip_tex_ms);
6309    NIR_PASS_V(nir, nir_lower_frexp); /* TODO: Use the spirv instructions for this. */
6310 
6311    if (screen->need_2D_zs)
6312       NIR_PASS_V(nir, lower_1d_shadow, screen);
6313 
6314    {
6315       nir_lower_subgroups_options subgroup_options = {0};
6316       subgroup_options.lower_to_scalar = true;
6317       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
6318       subgroup_options.ballot_bit_size = 32;
6319       subgroup_options.ballot_components = 4;
6320       subgroup_options.lower_subgroup_masks = true;
6321       if (!(screen->info.subgroup.supportedStages & mesa_to_vk_shader_stage(clamp_stage(&nir->info)))) {
6322          subgroup_options.subgroup_size = 1;
6323          subgroup_options.lower_vote_trivial = true;
6324       }
6325       subgroup_options.lower_inverse_ballot = true;
6326       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
6327    }
6328 
6329    optimize_nir(nir, NULL, true);
6330    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6331    NIR_PASS_V(nir, nir_lower_discard_if, (nir_lower_discard_if_to_cf |
6332                                           nir_lower_demote_if_to_cf |
6333                                           nir_lower_terminate_if_to_cf));
6334 
6335    bool needs_size = analyze_io(zs, nir);
6336    NIR_PASS_V(nir, unbreak_bos, zs, needs_size);
6337    /* run in compile if there could be inlined uniforms */
6338    if (!screen->driconf.inline_uniforms && !nir->info.num_inlinable_uniforms) {
6339       NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_shared, NULL, NULL);
6340       NIR_PASS_V(nir, rewrite_bo_access, screen);
6341       NIR_PASS_V(nir, remove_bo_access, zs);
6342    }
6343 
6344    struct zink_bindless_info bindless = {0};
6345    bindless.bindless_set = screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS];
6346    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out)
6347       var->data.is_xfb = false;
6348 
6349    optimize_nir(nir, NULL, true);
6350    prune_io(nir);
6351 
6352    if (nir->info.stage == MESA_SHADER_KERNEL) {
6353       NIR_PASS_V(nir, type_images);
6354    }
6355 
6356    unsigned ubo_binding_mask = 0;
6357    unsigned ssbo_binding_mask = 0;
6358    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
6359       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
6360                                         nir_var_image |
6361                                         nir_var_mem_ubo |
6362                                         nir_var_mem_ssbo)) {
6363          enum zink_descriptor_type ztype;
6364          const struct glsl_type *type = glsl_without_array(var->type);
6365          if (var->data.mode == nir_var_mem_ubo) {
6366             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
6367             /* buffer 0 is a push descriptor */
6368             var->data.descriptor_set = !!var->data.driver_location;
6369             var->data.binding = !var->data.driver_location ? clamp_stage(&nir->info) :
6370                                 zink_binding(nir->info.stage,
6371                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
6372                                              var->data.driver_location,
6373                                              screen->compact_descriptors);
6374             assert(var->data.driver_location || var->data.binding < 10);
6375             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
6376             int binding = var->data.binding;
6377 
6378             if (!var->data.driver_location) {
6379                zs->has_uniforms = true;
6380             } else if (!(ubo_binding_mask & BITFIELD_BIT(binding))) {
6381                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6382                zs->bindings[ztype][zs->num_bindings[ztype]].binding = binding;
6383                zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6384                zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6385                assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6386                zs->num_bindings[ztype]++;
6387                ubo_binding_mask |= BITFIELD_BIT(binding);
6388             }
6389          } else if (var->data.mode == nir_var_mem_ssbo) {
6390             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
6391             var->data.descriptor_set = screen->desc_set_id[ztype];
6392             var->data.binding = zink_binding(clamp_stage(&nir->info),
6393                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
6394                                              var->data.driver_location,
6395                                              screen->compact_descriptors);
6396             if (!(ssbo_binding_mask & BITFIELD_BIT(var->data.binding))) {
6397                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6398                zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6399                zs->bindings[ztype][zs->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
6400                zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_length(var->type);
6401                assert(zs->bindings[ztype][zs->num_bindings[ztype]].size);
6402                zs->num_bindings[ztype]++;
6403                ssbo_binding_mask |= BITFIELD_BIT(var->data.binding);
6404             }
6405          } else {
6406             assert(var->data.mode == nir_var_uniform ||
6407                    var->data.mode == nir_var_image);
6408             if (var->data.bindless) {
6409                zs->bindless = true;
6410                handle_bindless_var(nir, var, type, &bindless);
6411             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
6412                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : glsl_type_is_bare_sampler(type) ? VK_DESCRIPTOR_TYPE_SAMPLER : zink_sampler_type(type);
6413                if (nir->info.stage == MESA_SHADER_KERNEL && vktype == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
6414                   vktype = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
6415                ztype = zink_desc_type_from_vktype(vktype);
6416                var->data.driver_location = var->data.binding;
6417                var->data.descriptor_set = screen->desc_set_id[ztype];
6418                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location, screen->compact_descriptors);
6419                zs->bindings[ztype][zs->num_bindings[ztype]].index = var->data.driver_location;
6420                zs->bindings[ztype][zs->num_bindings[ztype]].binding = var->data.binding;
6421                zs->bindings[ztype][zs->num_bindings[ztype]].type = vktype;
6422                if (glsl_type_is_array(var->type))
6423                   zs->bindings[ztype][zs->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
6424                else
6425                   zs->bindings[ztype][zs->num_bindings[ztype]].size = 1;
6426                zs->num_bindings[ztype]++;
6427             } else if (var->data.mode == nir_var_uniform) {
6428                /* this is a dead uniform */
6429                var->data.mode = 0;
6430                exec_node_remove(&var->node);
6431             }
6432          }
6433       }
6434    }
6435    bool bindless_lowered = false;
6436    NIR_PASS(bindless_lowered, nir, lower_bindless, &bindless);
6437    zs->bindless |= bindless_lowered;
6438 
6439    if (!screen->info.feats.features.shaderInt64 || !screen->info.feats.features.shaderFloat64)
6440       NIR_PASS_V(nir, lower_64bit_vars, screen->info.feats.features.shaderInt64);
6441    if (nir->info.stage != MESA_SHADER_KERNEL)
6442       NIR_PASS_V(nir, match_tex_dests, zs, false);
6443 
6444    if (!nir->info.internal)
6445       nir_foreach_shader_out_variable(var, nir)
6446          var->data.explicit_xfb_buffer = 0;
6447    if (nir->xfb_info && nir->xfb_info->output_count && nir->info.outputs_written)
6448       update_so_info(zs, nir, nir->info.outputs_written, have_psiz);
6449    zink_shader_serialize_blob(nir, &zs->blob);
6450    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6451 }
6452 
6453 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)6454 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
6455 {
6456    struct zink_screen *screen = zink_screen(pscreen);
6457    nir_shader *nir = nirptr;
6458 
6459    nir_lower_tex_options tex_opts = {
6460       .lower_invalid_implicit_lod = true,
6461    };
6462    /*
6463       Sampled Image must be an object whose type is OpTypeSampledImage.
6464       The Dim operand of the underlying OpTypeImage must be 1D, 2D, 3D,
6465       or Rect, and the Arrayed and MS operands must be 0.
6466       - SPIRV, OpImageSampleProj* opcodes
6467     */
6468    tex_opts.lower_txp = BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) |
6469                         BITFIELD_BIT(GLSL_SAMPLER_DIM_MS);
6470    tex_opts.lower_txp_array = true;
6471    if (!screen->info.feats.features.shaderImageGatherExtended)
6472       tex_opts.lower_tg4_offsets = true;
6473    NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
6474    optimize_nir(nir, NULL, false);
6475    if (nir->info.stage == MESA_SHADER_VERTEX)
6476       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
6477    if (screen->driconf.inline_uniforms)
6478       nir_find_inlinable_uniforms(nir);
6479 
6480    return NULL;
6481 }
6482 
6483 void
zink_shader_free(struct zink_screen * screen,struct zink_shader * shader)6484 zink_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6485 {
6486    _mesa_set_destroy(shader->programs, NULL);
6487    util_queue_fence_wait(&shader->precompile.fence);
6488    util_queue_fence_destroy(&shader->precompile.fence);
6489    zink_descriptor_shader_deinit(screen, shader);
6490    if (screen->info.have_EXT_shader_object) {
6491       VKSCR(DestroyShaderEXT)(screen->dev, shader->precompile.obj.obj, NULL);
6492    } else {
6493       if (shader->precompile.obj.mod)
6494          VKSCR(DestroyShaderModule)(screen->dev, shader->precompile.obj.mod, NULL);
6495       if (shader->precompile.gpl)
6496          VKSCR(DestroyPipeline)(screen->dev, shader->precompile.gpl, NULL);
6497    }
6498    blob_finish(&shader->blob);
6499    ralloc_free(shader->spirv);
6500    free(shader->precompile.bindings);
6501    ralloc_free(shader);
6502 }
6503 
6504 static bool
gfx_shader_prune(struct zink_screen * screen,struct zink_shader * shader)6505 gfx_shader_prune(struct zink_screen *screen, struct zink_shader *shader)
6506 {
6507    /* this shader may still be precompiling, so access here must be locked and singular */
6508    simple_mtx_lock(&shader->lock);
6509    struct set_entry *entry = _mesa_set_next_entry(shader->programs, NULL);
6510    struct zink_gfx_program *prog = (void*)(entry ? entry->key : NULL);
6511    if (entry)
6512       _mesa_set_remove(shader->programs, entry);
6513    simple_mtx_unlock(&shader->lock);
6514    if (!prog)
6515       return false;
6516    gl_shader_stage stage = shader->info.stage;
6517    assert(stage < ZINK_GFX_SHADER_COUNT);
6518    util_queue_fence_wait(&prog->base.cache_fence);
6519    unsigned stages_present = prog->stages_present;
6520    if (prog->shaders[MESA_SHADER_TESS_CTRL] &&
6521          prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated)
6522       stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL);
6523    unsigned idx = zink_program_cache_stages(stages_present);
6524    if (!prog->base.removed && prog->stages_present == prog->stages_remaining &&
6525          (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated)) {
6526       struct hash_table *ht = &prog->base.ctx->program_cache[idx];
6527       simple_mtx_lock(&prog->base.ctx->program_lock[idx]);
6528       struct hash_entry *he = _mesa_hash_table_search(ht, prog->shaders);
6529       assert(he && he->data == prog);
6530       _mesa_hash_table_remove(ht, he);
6531       prog->base.removed = true;
6532       simple_mtx_unlock(&prog->base.ctx->program_lock[idx]);
6533 
6534       for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) {
6535          for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) {
6536             hash_table_foreach(&prog->pipelines[r][i], table_entry) {
6537                struct zink_gfx_pipeline_cache_entry *pc_entry = table_entry->data;
6538 
6539                util_queue_fence_wait(&pc_entry->fence);
6540             }
6541          }
6542       }
6543    }
6544    if (stage == MESA_SHADER_FRAGMENT || !shader->non_fs.is_generated) {
6545       prog->shaders[stage] = NULL;
6546       prog->stages_remaining &= ~BITFIELD_BIT(stage);
6547    }
6548    /* only remove generated tcs during parent tes destruction */
6549    if (stage == MESA_SHADER_TESS_EVAL && shader->non_fs.generated_tcs)
6550       prog->shaders[MESA_SHADER_TESS_CTRL] = NULL;
6551    if (stage != MESA_SHADER_FRAGMENT &&
6552       prog->shaders[MESA_SHADER_GEOMETRY] &&
6553       prog->shaders[MESA_SHADER_GEOMETRY]->non_fs.parent ==
6554       shader) {
6555       prog->shaders[MESA_SHADER_GEOMETRY] = NULL;
6556    }
6557    zink_gfx_program_reference(screen, &prog, NULL);
6558    return true;
6559 }
6560 
6561 void
zink_gfx_shader_free(struct zink_screen * screen,struct zink_shader * shader)6562 zink_gfx_shader_free(struct zink_screen *screen, struct zink_shader *shader)
6563 {
6564    assert(shader->info.stage != MESA_SHADER_COMPUTE);
6565    util_queue_fence_wait(&shader->precompile.fence);
6566 
6567    /* if the shader is still precompiling, the program set must be pruned under lock */
6568    while (gfx_shader_prune(screen, shader));
6569 
6570    while (util_dynarray_contains(&shader->pipeline_libs, struct zink_gfx_lib_cache*)) {
6571       struct zink_gfx_lib_cache *libs = util_dynarray_pop(&shader->pipeline_libs, struct zink_gfx_lib_cache*);
6572       if (!libs->removed) {
6573          libs->removed = true;
6574          unsigned idx = zink_program_cache_stages(libs->stages_present);
6575          simple_mtx_lock(&screen->pipeline_libs_lock[idx]);
6576          _mesa_set_remove_key(&screen->pipeline_libs[idx], libs);
6577          simple_mtx_unlock(&screen->pipeline_libs_lock[idx]);
6578       }
6579       zink_gfx_lib_cache_unref(screen, libs);
6580    }
6581    if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
6582        shader->non_fs.generated_tcs) {
6583       /* automatically destroy generated tcs shaders when tes is destroyed */
6584       zink_gfx_shader_free(screen, shader->non_fs.generated_tcs);
6585       shader->non_fs.generated_tcs = NULL;
6586    }
6587    if (shader->info.stage != MESA_SHADER_FRAGMENT) {
6588       for (unsigned int i = 0; i < ARRAY_SIZE(shader->non_fs.generated_gs); i++) {
6589          for (int j = 0; j < ARRAY_SIZE(shader->non_fs.generated_gs[0]); j++) {
6590             if (shader->non_fs.generated_gs[i][j]) {
6591                /* automatically destroy generated gs shaders when owner is destroyed */
6592                zink_gfx_shader_free(screen, shader->non_fs.generated_gs[i][j]);
6593                shader->non_fs.generated_gs[i][j] = NULL;
6594             }
6595          }
6596       }
6597    }
6598    zink_shader_free(screen, shader);
6599 }
6600 
6601 
6602 struct zink_shader_object
zink_shader_tcs_compile(struct zink_screen * screen,struct zink_shader * zs,unsigned patch_vertices,bool can_shobj,struct zink_program * pg)6603 zink_shader_tcs_compile(struct zink_screen *screen, struct zink_shader *zs, unsigned patch_vertices, bool can_shobj, struct zink_program *pg)
6604 {
6605    assert(zs->info.stage == MESA_SHADER_TESS_CTRL);
6606    /* shortcut all the nir passes since we just have to change this one word */
6607    zs->spirv->words[zs->spirv->tcs_vertices_out_word] = patch_vertices;
6608    return zink_shader_spirv_compile(screen, zs, NULL, can_shobj, pg);
6609 }
6610 
6611 /* creating a passthrough tcs shader that's roughly:
6612 
6613 #version 150
6614 #extension GL_ARB_tessellation_shader : require
6615 
6616 in vec4 some_var[gl_MaxPatchVertices];
6617 out vec4 some_var_out;
6618 
6619 layout(push_constant) uniform tcsPushConstants {
6620     layout(offset = 0) float TessLevelInner[2];
6621     layout(offset = 8) float TessLevelOuter[4];
6622 } u_tcsPushConstants;
6623 layout(vertices = $vertices_per_patch) out;
6624 void main()
6625 {
6626   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
6627   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
6628   some_var_out = some_var[gl_InvocationID];
6629 }
6630 
6631 */
6632 void
zink_shader_tcs_init(struct zink_screen * screen,struct zink_shader * zs,nir_shader * tes,nir_shader ** nir_ret)6633 zink_shader_tcs_init(struct zink_screen *screen, struct zink_shader *zs, nir_shader *tes, nir_shader **nir_ret)
6634 {
6635    nir_shader *nir = zs->nir;
6636 
6637    nir_builder b = nir_builder_at(nir_before_impl(nir_shader_get_entrypoint(nir)));
6638 
6639    nir_def *invocation_id = nir_load_invocation_id(&b);
6640 
6641    nir_foreach_shader_in_variable(var, tes) {
6642       if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
6643          continue;
6644       const struct glsl_type *in_type = var->type;
6645       const struct glsl_type *out_type = var->type;
6646       char buf[1024];
6647       snprintf(buf, sizeof(buf), "%s_out", var->name);
6648       if (!nir_is_arrayed_io(var, MESA_SHADER_TESS_EVAL)) {
6649          const struct glsl_type *type = var->type;
6650          in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
6651          out_type = glsl_array_type(type, nir->info.tess.tcs_vertices_out, 0);
6652       }
6653 
6654       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
6655       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
6656       out->data.location = in->data.location = var->data.location;
6657       out->data.location_frac = in->data.location_frac = var->data.location_frac;
6658 
6659       /* gl_in[] receives values from equivalent built-in output
6660          variables written by the vertex shader (section 2.14.7).  Each array
6661          element of gl_in[] is a structure holding values for a specific vertex of
6662          the input patch.  The length of gl_in[] is equal to the
6663          implementation-dependent maximum patch size (gl_MaxPatchVertices).
6664          - ARB_tessellation_shader
6665        */
6666       /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
6667       nir_deref_instr *in_value = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
6668       nir_deref_instr *out_value = nir_build_deref_array(&b, nir_build_deref_var(&b, out), invocation_id);
6669       copy_vars(&b, out_value, in_value);
6670    }
6671    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
6672    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
6673    gl_TessLevelInner->data.patch = 1;
6674    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
6675    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
6676    gl_TessLevelOuter->data.patch = 1;
6677 
6678    create_gfx_pushconst(nir);
6679 
6680    nir_def *load_inner = nir_load_push_constant_zink(&b, 2, 32,
6681                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_INNER_LEVEL));
6682    nir_def *load_outer = nir_load_push_constant_zink(&b, 4, 32,
6683                                                          nir_imm_int(&b, ZINK_GFX_PUSHCONST_DEFAULT_OUTER_LEVEL));
6684 
6685    for (unsigned i = 0; i < 2; i++) {
6686       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
6687       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
6688    }
6689    for (unsigned i = 0; i < 4; i++) {
6690       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
6691       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
6692    }
6693 
6694    nir_validate_shader(nir, "created");
6695 
6696    optimize_nir(nir, NULL, true);
6697    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
6698    NIR_PASS_V(nir, nir_convert_from_ssa, true);
6699 
6700    *nir_ret = nir;
6701    zink_shader_serialize_blob(nir, &zs->blob);
6702 }
6703 
6704 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,unsigned vertices_per_patch)6705 zink_shader_tcs_create(struct zink_screen *screen, unsigned vertices_per_patch)
6706 {
6707    struct zink_shader *zs = rzalloc(NULL, struct zink_shader);
6708    util_queue_fence_init(&zs->precompile.fence);
6709    zs->hash = _mesa_hash_pointer(zs);
6710    zs->programs = _mesa_pointer_set_create(NULL);
6711    simple_mtx_init(&zs->lock, mtx_plain);
6712 
6713    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
6714    nir_function *fn = nir_function_create(nir, "main");
6715    fn->is_entrypoint = true;
6716    nir_function_impl_create(fn);
6717    zs->nir = nir;
6718 
6719    nir->info.tess.tcs_vertices_out = vertices_per_patch;
6720    memcpy(&zs->info, &nir->info, sizeof(nir->info));
6721    zs->non_fs.is_generated = true;
6722    return zs;
6723 }
6724 
6725 bool
zink_shader_has_cubes(nir_shader * nir)6726 zink_shader_has_cubes(nir_shader *nir)
6727 {
6728    nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
6729       const struct glsl_type *type = glsl_without_array(var->type);
6730       if (glsl_type_is_sampler(type) && glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE)
6731          return true;
6732    }
6733    return false;
6734 }
6735 
6736 nir_shader *
zink_shader_blob_deserialize(struct zink_screen * screen,struct blob * blob)6737 zink_shader_blob_deserialize(struct zink_screen *screen, struct blob *blob)
6738 {
6739    struct blob_reader blob_reader;
6740    blob_reader_init(&blob_reader, blob->data, blob->size);
6741    return nir_deserialize(NULL, &screen->nir_options, &blob_reader);
6742 }
6743 
6744 nir_shader *
zink_shader_deserialize(struct zink_screen * screen,struct zink_shader * zs)6745 zink_shader_deserialize(struct zink_screen *screen, struct zink_shader *zs)
6746 {
6747    return zink_shader_blob_deserialize(screen, &zs->blob);
6748 }
6749 
6750 void
zink_shader_serialize_blob(nir_shader * nir,struct blob * blob)6751 zink_shader_serialize_blob(nir_shader *nir, struct blob *blob)
6752 {
6753    blob_init(blob);
6754 #ifndef NDEBUG
6755    bool strip = !(zink_debug & (ZINK_DEBUG_NIR | ZINK_DEBUG_SPIRV | ZINK_DEBUG_TGSI));
6756 #else
6757    bool strip = false;
6758 #endif
6759    nir_serialize(blob, nir, strip);
6760 }
6761 
6762 void
zink_print_shader(struct zink_screen * screen,struct zink_shader * zs,FILE * fp)6763 zink_print_shader(struct zink_screen *screen, struct zink_shader *zs, FILE *fp)
6764 {
6765    nir_shader *nir = zink_shader_deserialize(screen, zs);
6766    nir_print_shader(nir, fp);
6767    ralloc_free(nir);
6768 }
6769