xref: /aosp_15_r20/external/mesa3d/src/compiler/nir/nir_lower_clip_cull_distance_arrays.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 
28 /**
29  * This file contains two different lowering passes.
30  *
31  * 1. nir_lower_clip_cull_distance_arrays()
32  *
33  *    This pass combines clip and cull distance arrays in separate locations
34  *    and colocates them both in VARYING_SLOT_CLIP_DIST0.  It does so by
35  *    maintaining two arrays but making them compact and using location_frac
36  *    to stack them on top of each other.
37  *
38  * 2. nir_lower_clip_cull_distance_to_vec4s()
39  *
40  *    This pass accounts for the difference between the way
41  *    gl_ClipDistance is declared in standard GLSL (as an array of
42  *    floats), and the way it is frequently implemented in hardware (as
43  *    a pair of vec4s, with four clip distances packed into each).
44  *
45  *    The declaration of gl_ClipDistance is replaced with a declaration
46  *    of gl_ClipDistanceMESA, and any references to gl_ClipDistance are
47  *    translated to refer to gl_ClipDistanceMESA with the appropriate
48  *    swizzling of array indices.  For instance:
49  *
50  *      gl_ClipDistance[i]
51  *
52  *    is translated into:
53  *
54  *      gl_ClipDistanceMESA[i>>2][i&3]
55  */
56 
57 #define GLSL_CLIP_VAR_NAME "gl_ClipDistanceMESA"
58 
59 struct lower_distance_state {
60    /**
61     * Pointer to the declaration of gl_ClipDistance, if found.
62     *
63     * Note:
64     *
65     * - the in_var is for geometry and both tessellation shader inputs only.
66     *
67     * - since gl_ClipDistance is available in tessellation control,
68     *   tessellation evaluation and geometry shaders as both an input
69     *   and an output, it's possible for both old_distance_out_var
70     *   and old_distance_in_var to be non-null.
71     */
72    nir_variable *old_distance_out_var;
73    nir_variable *old_distance_in_var;
74 
75    /**
76     * Pointer to the newly-created gl_ClipDistanceMESA variable.
77     */
78    nir_variable *new_distance_out_var;
79    nir_variable *new_distance_in_var;
80 
81    /**
82     * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
83     */
84    gl_shader_stage shader_stage;
85    const char *in_name;
86    int total_size;
87    int offset;
88 };
89 
90 /**
91  * Get the length of the clip/cull distance array, looking past
92  * any interface block arrays.
93  */
94 static unsigned
get_unwrapped_array_length(nir_shader * nir,nir_variable * var)95 get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
96 {
97    if (!var)
98       return 0;
99 
100    /* Unwrap GS input and TCS input/output interfaces.  We want the
101     * underlying clip/cull distance array length, not the per-vertex
102     * array length.
103     */
104    const struct glsl_type *type = var->type;
105    if (nir_is_arrayed_io(var, nir->info.stage))
106       type = glsl_get_array_element(type);
107 
108    if (var->data.per_view) {
109       assert(glsl_type_is_array(type));
110       type = glsl_get_array_element(type);
111    }
112 
113    assert(glsl_type_is_array(type));
114 
115    return glsl_get_length(type);
116 }
117 
118 /**
119  * Replace any declaration of 'in_name' as an array of floats with a
120  * declaration of gl_ClipDistanceMESA as an array of vec4's.
121  */
122 static void
replace_var_declaration(struct lower_distance_state * state,nir_shader * sh,nir_variable * var,const char * in_name)123 replace_var_declaration(struct lower_distance_state *state, nir_shader *sh,
124                         nir_variable *var, const char *in_name)
125 {
126    nir_variable **old_var;
127    nir_variable **new_var;
128 
129    if (!var->name || strcmp(var->name, in_name) != 0)
130       return;
131 
132    assert(glsl_type_is_array(var->type));
133    if (var->data.mode == nir_var_shader_out) {
134       if (state->old_distance_out_var)
135          return;
136 
137       old_var = &state->old_distance_out_var;
138       new_var = &state->new_distance_out_var;
139    } else if (var->data.mode == nir_var_shader_in) {
140       if (state->old_distance_in_var)
141          return;
142 
143       old_var = &state->old_distance_in_var;
144       new_var = &state->new_distance_in_var;
145    } else {
146       unreachable("not reached");
147    }
148 
149    *old_var = var;
150 
151    if (!(*new_var)) {
152       unsigned new_size = (state->total_size + 3) / 4;
153 
154       *new_var = rzalloc(sh, nir_variable);
155       (*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
156       (*new_var)->data.mode = var->data.mode;
157       (*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
158       (*new_var)->data.assigned = true;
159       (*new_var)->data.how_declared = var->data.how_declared;
160 
161       nir_shader_add_variable(sh, *new_var);
162 
163       if (!glsl_type_is_array(glsl_get_array_element(var->type))) {
164          /* gl_ClipDistance (used for vertex, tessellation evaluation and
165           * geometry output, and fragment input).
166           */
167          assert((var->data.mode == nir_var_shader_in &&
168                  sh->info.stage == MESA_SHADER_FRAGMENT) ||
169                 (var->data.mode == nir_var_shader_out &&
170                  (sh->info.stage == MESA_SHADER_VERTEX ||
171                   sh->info.stage == MESA_SHADER_TESS_EVAL ||
172                   sh->info.stage == MESA_SHADER_GEOMETRY)));
173 
174          assert(glsl_get_base_type(glsl_get_array_element(var->type)) ==
175                 GLSL_TYPE_FLOAT);
176 
177          /* And change the properties that we need to change */
178          (*new_var)->type = glsl_array_type(glsl_vec4_type(), new_size, 0);
179       } else {
180          /* 2D gl_ClipDistance (used for tessellation control, tessellation
181           * evaluation and geometry input, and tessellation control output).
182           */
183          assert((var->data.mode == nir_var_shader_in &&
184                  (sh->info.stage == MESA_SHADER_GEOMETRY ||
185                   sh->info.stage == MESA_SHADER_TESS_EVAL)) ||
186                 sh->info.stage == MESA_SHADER_TESS_CTRL);
187 
188          assert (glsl_get_base_type(glsl_get_array_element(glsl_get_array_element(var->type))) ==
189                  GLSL_TYPE_FLOAT);
190 
191          /* And change the properties that we need to change */
192          (*new_var)->type =
193             glsl_array_type(glsl_array_type(glsl_vec4_type(), new_size, 0),
194                             glsl_array_size(var->type), 0);
195       }
196    }
197 }
198 
199 static nir_def *
interp_deref(nir_builder * b,nir_intrinsic_instr * old_intrin,nir_deref_instr * deref)200 interp_deref(nir_builder *b, nir_intrinsic_instr *old_intrin,
201              nir_deref_instr *deref)
202 {
203    nir_intrinsic_instr *intrin =
204       nir_intrinsic_instr_create(b->shader, old_intrin->intrinsic);
205    intrin->num_components = 4;
206    intrin->src[0] = nir_src_for_ssa(&deref->def);
207 
208    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
209        intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
210       intrin->src[1] = nir_src_for_ssa(old_intrin->src[1].ssa);
211 
212    nir_def_init(&intrin->instr, &intrin->def, 4, 32);
213    nir_builder_instr_insert(b, &intrin->instr);
214 
215    return &intrin->def;
216 }
217 
218 /* Replace any expression that indexes one of the floats in gl_ClipDistance
219  * with an expression that indexes into one of the vec4's in
220  * gl_ClipDistanceMESA and accesses the appropriate component.
221  */
222 static void
lower_distance_deref(struct lower_distance_state * state,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_instr * deref,nir_variable * new_var)223 lower_distance_deref(struct lower_distance_state *state, nir_builder *b,
224                      nir_intrinsic_instr *intrin, nir_deref_instr *deref,
225                      nir_variable *new_var)
226 {
227    nir_deref_path path;
228    nir_deref_path_init(&path, deref, NULL);
229 
230    assert(path.path[0]->deref_type == nir_deref_type_var);
231    nir_deref_instr **p = &path.path[1];
232 
233    b->cursor = nir_before_instr(&intrin->instr);
234    nir_deref_instr *deref_var = nir_build_deref_var(b, new_var);
235 
236    /* Handle 2D arrays such as Geom shader inputs */
237    if (glsl_type_is_array(glsl_get_array_element(new_var->type))) {
238       assert((*p)->deref_type == nir_deref_type_array);
239       deref_var = nir_build_deref_array(b, deref_var, (*p)->arr.index.ssa);
240       p++;
241    }
242 
243    assert((*p)->deref_type == nir_deref_type_array);
244 
245    /**
246     * Create the necessary values to index into gl_ClipDistanceMESA based
247     * on the value previously used to index into gl_ClipDistance.
248     *
249     * An array index selects one of the vec4's in gl_ClipDistanceMESA
250     * a swizzle then selects a component within the selected vec4.
251     */
252    nir_src old_index = (*p)->arr.index;
253    if (nir_src_is_const(old_index)) {
254       unsigned const_val = nir_src_as_uint(old_index) + state->offset;
255       unsigned swizzle = const_val % 4;
256 
257       nir_deref_instr *def_arr_instr =
258          nir_build_deref_array_imm(b, deref_var, const_val / 4);
259 
260       if (intrin->intrinsic == nir_intrinsic_store_deref) {
261          nir_def *value = intrin->src[1].ssa;
262          nir_build_write_masked_store(b, def_arr_instr, value, swizzle);
263       } else {
264          assert(intrin->intrinsic == nir_intrinsic_load_deref ||
265                 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
266                 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
267                 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
268 
269          nir_def *load_def;
270          if (intrin->intrinsic == nir_intrinsic_load_deref)
271             load_def = nir_load_deref(b, def_arr_instr);
272          else
273             load_def = interp_deref(b, intrin, def_arr_instr);
274 
275          nir_def *swz = nir_channel(b, load_def, swizzle);
276          nir_def_rewrite_uses(&intrin->def, swz);
277       }
278    } else {
279       nir_def *index = nir_iadd_imm(b, old_index.ssa, state->offset);
280       nir_def *swizzle = nir_umod_imm(b, index, 4);
281       index = nir_ishr_imm(b, index, 2); /* index / 4 */
282 
283       nir_deref_instr *def_arr_instr =
284          nir_build_deref_array(b, deref_var, index);
285 
286       if (intrin->intrinsic == nir_intrinsic_store_deref) {
287          nir_def *value = intrin->src[1].ssa;
288          nir_build_write_masked_stores(b, def_arr_instr, value, swizzle, 0, 4);
289       } else {
290          assert(intrin->intrinsic == nir_intrinsic_load_deref ||
291                 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
292                 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
293                 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
294 
295          nir_def *load_def;
296          if (intrin->intrinsic == nir_intrinsic_load_deref)
297             load_def = nir_load_deref(b, def_arr_instr);
298          else
299             load_def = interp_deref(b, intrin, def_arr_instr);
300 
301          nir_def *swz = nir_vector_extract(b, load_def, swizzle);
302          nir_def_rewrite_uses(&intrin->def, swz);
303       }
304    }
305 
306    nir_deref_path_finish(&path);
307 }
308 
309 static bool
replace_with_derefs_to_vec4(nir_builder * b,nir_intrinsic_instr * intr,void * cb_data)310 replace_with_derefs_to_vec4(nir_builder *b, nir_intrinsic_instr *intr,
311                             void *cb_data)
312 {
313    struct lower_distance_state *state =
314       (struct lower_distance_state *) cb_data;
315    nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
316 
317    /* Copy deref lowering is expected to happen before we get here */
318    assert(intr->intrinsic != nir_intrinsic_copy_deref);
319    assert(intr->intrinsic != nir_intrinsic_interp_deref_at_vertex);
320 
321    if (intr->intrinsic != nir_intrinsic_load_deref &&
322        intr->intrinsic != nir_intrinsic_store_deref &&
323        intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
324        intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
325        intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
326       return false;
327 
328    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
329    if (!nir_deref_mode_is_one_of(deref, mask))
330       return false;
331 
332    nir_variable *var = nir_deref_instr_get_variable(deref);
333 
334    /* The var has already been lowered to a temp so the derefs have already
335     * been replaced. We can end up here when a shader has both clip and cull
336     * arrays.
337     */
338    if (var->data.mode != nir_var_shader_in &&
339        var->data.mode != nir_var_shader_out)
340       return false;
341 
342    if (var->data.mode == nir_var_shader_out &&
343       var != state->old_distance_out_var)
344       return false;
345 
346    if (var->data.mode == nir_var_shader_in &&
347        var != state->old_distance_in_var)
348       return false;
349 
350    nir_variable *new_var = var->data.mode == nir_var_shader_in ?
351       state->new_distance_in_var : state->new_distance_out_var;
352 
353    lower_distance_deref(state, b, intr, deref, new_var);
354 
355    return true;
356 }
357 
358 static void
lower_distance_to_vec4(nir_shader * shader,struct lower_distance_state * state)359 lower_distance_to_vec4(nir_shader *shader, struct lower_distance_state *state)
360 {
361    /* Replace declarations */
362    nir_foreach_variable_with_modes_safe(var, shader,
363                                         nir_var_shader_in | nir_var_shader_out) {
364       replace_var_declaration(state, shader, var, state->in_name);
365    }
366 
367    if (!state->old_distance_in_var && !state->old_distance_out_var)
368       return;
369 
370    /* Replace derefs, we may have indirect store lowering which will change
371     * control flow of the shader.
372     */
373    nir_shader_intrinsics_pass(shader, replace_with_derefs_to_vec4,
374                               nir_metadata_none, state);
375 
376    /* Mark now lowered vars as ordinary globals to be dead code eliminated.
377     * Also clear the compact flag to avoid issues with validation.
378     */
379    if (state->old_distance_out_var) {
380       state->old_distance_out_var->data.mode = nir_var_shader_temp;
381       state->old_distance_out_var->data.compact = false;
382    }
383 
384    if (state->old_distance_in_var) {
385       state->old_distance_in_var->data.mode = nir_var_shader_temp;
386       state->old_distance_in_var->data.compact = false;
387    }
388 }
389 
390 bool
nir_lower_clip_cull_distance_to_vec4s(nir_shader * shader)391 nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader)
392 {
393    int clip_size = 0;
394    int cull_size = 0;
395 
396    nir_variable_mode mode = nir_var_shader_in | nir_var_shader_out;
397    nir_foreach_variable_with_modes(var, shader, mode) {
398       if ((var->data.mode == nir_var_shader_in &&
399            shader->info.stage == MESA_SHADER_VERTEX) ||
400           (var->data.mode == nir_var_shader_out &&
401            shader->info.stage == MESA_SHADER_FRAGMENT) ||
402           shader->info.stage == MESA_SHADER_COMPUTE)
403          continue;
404 
405 
406       if (var->data.location == VARYING_SLOT_CLIP_DIST0)
407          clip_size = MAX2(clip_size, get_unwrapped_array_length(shader, var));
408 
409       if (var->data.location == VARYING_SLOT_CULL_DIST0)
410          cull_size = MAX2(cull_size, get_unwrapped_array_length(shader, var));
411    }
412 
413    if (clip_size == 0 && cull_size == 0) {
414       nir_shader_preserve_all_metadata(shader);
415       return false;
416    }
417 
418    struct lower_distance_state state;
419    state.old_distance_out_var = NULL;
420    state.old_distance_in_var = NULL;
421    state.new_distance_out_var = NULL;
422    state.new_distance_in_var = NULL;
423    state.shader_stage = shader->info.stage;
424    state.in_name = "gl_ClipDistance";
425    state.total_size = clip_size + cull_size;
426    state.offset = 0;
427    lower_distance_to_vec4(shader, &state);
428 
429    state.old_distance_out_var = NULL;
430    state.old_distance_in_var = NULL;
431    state.in_name ="gl_CullDistance";
432    state.offset = clip_size;
433    lower_distance_to_vec4(shader, &state);
434 
435    nir_fixup_deref_modes(shader);
436 
437    /* Assume we made progress */
438    return true;
439 }
440 
441 static bool
combine_clip_cull(nir_shader * nir,nir_variable_mode mode,bool store_info)442 combine_clip_cull(nir_shader *nir,
443                   nir_variable_mode mode,
444                   bool store_info)
445 {
446    nir_variable *cull = NULL;
447    nir_variable *clip = NULL;
448 
449    nir_foreach_variable_with_modes(var, nir, mode) {
450       if (var->data.location == VARYING_SLOT_CLIP_DIST0)
451          clip = var;
452 
453       if (var->data.location == VARYING_SLOT_CULL_DIST0)
454          cull = var;
455    }
456 
457    if (!cull && !clip) {
458       /* If this is run after optimizations and the variables have been
459        * eliminated, we should update the shader info, because no other
460        * place does that.
461        */
462       if (store_info) {
463          nir->info.clip_distance_array_size = 0;
464          nir->info.cull_distance_array_size = 0;
465       }
466       return false;
467    }
468 
469    if (!cull && clip) {
470       /* The GLSL IR lowering pass must have converted these to vectors */
471       if (!clip->data.compact)
472          return false;
473 
474       /* If this pass has already run, don't repeat.  We would think that
475        * the combined clip/cull distance array was clip-only and mess up.
476        */
477       if (clip->data.how_declared == nir_var_hidden)
478          return false;
479    }
480 
481    const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
482    const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
483 
484    if (store_info) {
485       nir->info.clip_distance_array_size = clip_array_size;
486       nir->info.cull_distance_array_size = cull_array_size;
487    }
488 
489    if (clip) {
490       assert(clip->data.compact);
491       clip->data.how_declared = nir_var_hidden;
492    }
493 
494    if (cull) {
495       assert(cull->data.compact);
496       cull->data.how_declared = nir_var_hidden;
497       cull->data.location = VARYING_SLOT_CLIP_DIST0 + clip_array_size / 4;
498       cull->data.location_frac = clip_array_size % 4;
499    }
500 
501    return true;
502 }
503 
504 bool
nir_lower_clip_cull_distance_arrays(nir_shader * nir)505 nir_lower_clip_cull_distance_arrays(nir_shader *nir)
506 {
507    bool progress = false;
508 
509    if (nir->info.stage <= MESA_SHADER_GEOMETRY ||
510        nir->info.stage == MESA_SHADER_MESH)
511       progress |= combine_clip_cull(nir, nir_var_shader_out, true);
512 
513    if (nir->info.stage > MESA_SHADER_VERTEX &&
514        nir->info.stage <= MESA_SHADER_FRAGMENT) {
515       progress |= combine_clip_cull(nir, nir_var_shader_in,
516                                     nir->info.stage == MESA_SHADER_FRAGMENT);
517    }
518 
519    nir_foreach_function_impl(impl, nir) {
520       if (progress) {
521          nir_metadata_preserve(impl,
522                                nir_metadata_control_flow |
523                                nir_metadata_live_defs |
524                                nir_metadata_loop_analysis);
525       } else {
526          nir_metadata_preserve(impl, nir_metadata_all);
527       }
528    }
529 
530    return progress;
531 }
532