1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 /**
29 * This file contains two different lowering passes.
30 *
31 * 1. nir_lower_clip_cull_distance_arrays()
32 *
33 * This pass combines clip and cull distance arrays in separate locations
34 * and colocates them both in VARYING_SLOT_CLIP_DIST0. It does so by
35 * maintaining two arrays but making them compact and using location_frac
36 * to stack them on top of each other.
37 *
38 * 2. nir_lower_clip_cull_distance_to_vec4s()
39 *
40 * This pass accounts for the difference between the way
41 * gl_ClipDistance is declared in standard GLSL (as an array of
42 * floats), and the way it is frequently implemented in hardware (as
43 * a pair of vec4s, with four clip distances packed into each).
44 *
45 * The declaration of gl_ClipDistance is replaced with a declaration
46 * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are
47 * translated to refer to gl_ClipDistanceMESA with the appropriate
48 * swizzling of array indices. For instance:
49 *
50 * gl_ClipDistance[i]
51 *
52 * is translated into:
53 *
54 * gl_ClipDistanceMESA[i>>2][i&3]
55 */
56
57 #define GLSL_CLIP_VAR_NAME "gl_ClipDistanceMESA"
58
59 struct lower_distance_state {
60 /**
61 * Pointer to the declaration of gl_ClipDistance, if found.
62 *
63 * Note:
64 *
65 * - the in_var is for geometry and both tessellation shader inputs only.
66 *
67 * - since gl_ClipDistance is available in tessellation control,
68 * tessellation evaluation and geometry shaders as both an input
69 * and an output, it's possible for both old_distance_out_var
70 * and old_distance_in_var to be non-null.
71 */
72 nir_variable *old_distance_out_var;
73 nir_variable *old_distance_in_var;
74
75 /**
76 * Pointer to the newly-created gl_ClipDistanceMESA variable.
77 */
78 nir_variable *new_distance_out_var;
79 nir_variable *new_distance_in_var;
80
81 /**
82 * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX)
83 */
84 gl_shader_stage shader_stage;
85 const char *in_name;
86 int total_size;
87 int offset;
88 };
89
90 /**
91 * Get the length of the clip/cull distance array, looking past
92 * any interface block arrays.
93 */
94 static unsigned
get_unwrapped_array_length(nir_shader * nir,nir_variable * var)95 get_unwrapped_array_length(nir_shader *nir, nir_variable *var)
96 {
97 if (!var)
98 return 0;
99
100 /* Unwrap GS input and TCS input/output interfaces. We want the
101 * underlying clip/cull distance array length, not the per-vertex
102 * array length.
103 */
104 const struct glsl_type *type = var->type;
105 if (nir_is_arrayed_io(var, nir->info.stage))
106 type = glsl_get_array_element(type);
107
108 if (var->data.per_view) {
109 assert(glsl_type_is_array(type));
110 type = glsl_get_array_element(type);
111 }
112
113 assert(glsl_type_is_array(type));
114
115 return glsl_get_length(type);
116 }
117
118 /**
119 * Replace any declaration of 'in_name' as an array of floats with a
120 * declaration of gl_ClipDistanceMESA as an array of vec4's.
121 */
122 static void
replace_var_declaration(struct lower_distance_state * state,nir_shader * sh,nir_variable * var,const char * in_name)123 replace_var_declaration(struct lower_distance_state *state, nir_shader *sh,
124 nir_variable *var, const char *in_name)
125 {
126 nir_variable **old_var;
127 nir_variable **new_var;
128
129 if (!var->name || strcmp(var->name, in_name) != 0)
130 return;
131
132 assert(glsl_type_is_array(var->type));
133 if (var->data.mode == nir_var_shader_out) {
134 if (state->old_distance_out_var)
135 return;
136
137 old_var = &state->old_distance_out_var;
138 new_var = &state->new_distance_out_var;
139 } else if (var->data.mode == nir_var_shader_in) {
140 if (state->old_distance_in_var)
141 return;
142
143 old_var = &state->old_distance_in_var;
144 new_var = &state->new_distance_in_var;
145 } else {
146 unreachable("not reached");
147 }
148
149 *old_var = var;
150
151 if (!(*new_var)) {
152 unsigned new_size = (state->total_size + 3) / 4;
153
154 *new_var = rzalloc(sh, nir_variable);
155 (*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
156 (*new_var)->data.mode = var->data.mode;
157 (*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
158 (*new_var)->data.assigned = true;
159 (*new_var)->data.how_declared = var->data.how_declared;
160
161 nir_shader_add_variable(sh, *new_var);
162
163 if (!glsl_type_is_array(glsl_get_array_element(var->type))) {
164 /* gl_ClipDistance (used for vertex, tessellation evaluation and
165 * geometry output, and fragment input).
166 */
167 assert((var->data.mode == nir_var_shader_in &&
168 sh->info.stage == MESA_SHADER_FRAGMENT) ||
169 (var->data.mode == nir_var_shader_out &&
170 (sh->info.stage == MESA_SHADER_VERTEX ||
171 sh->info.stage == MESA_SHADER_TESS_EVAL ||
172 sh->info.stage == MESA_SHADER_GEOMETRY)));
173
174 assert(glsl_get_base_type(glsl_get_array_element(var->type)) ==
175 GLSL_TYPE_FLOAT);
176
177 /* And change the properties that we need to change */
178 (*new_var)->type = glsl_array_type(glsl_vec4_type(), new_size, 0);
179 } else {
180 /* 2D gl_ClipDistance (used for tessellation control, tessellation
181 * evaluation and geometry input, and tessellation control output).
182 */
183 assert((var->data.mode == nir_var_shader_in &&
184 (sh->info.stage == MESA_SHADER_GEOMETRY ||
185 sh->info.stage == MESA_SHADER_TESS_EVAL)) ||
186 sh->info.stage == MESA_SHADER_TESS_CTRL);
187
188 assert (glsl_get_base_type(glsl_get_array_element(glsl_get_array_element(var->type))) ==
189 GLSL_TYPE_FLOAT);
190
191 /* And change the properties that we need to change */
192 (*new_var)->type =
193 glsl_array_type(glsl_array_type(glsl_vec4_type(), new_size, 0),
194 glsl_array_size(var->type), 0);
195 }
196 }
197 }
198
199 static nir_def *
interp_deref(nir_builder * b,nir_intrinsic_instr * old_intrin,nir_deref_instr * deref)200 interp_deref(nir_builder *b, nir_intrinsic_instr *old_intrin,
201 nir_deref_instr *deref)
202 {
203 nir_intrinsic_instr *intrin =
204 nir_intrinsic_instr_create(b->shader, old_intrin->intrinsic);
205 intrin->num_components = 4;
206 intrin->src[0] = nir_src_for_ssa(&deref->def);
207
208 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
209 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)
210 intrin->src[1] = nir_src_for_ssa(old_intrin->src[1].ssa);
211
212 nir_def_init(&intrin->instr, &intrin->def, 4, 32);
213 nir_builder_instr_insert(b, &intrin->instr);
214
215 return &intrin->def;
216 }
217
218 /* Replace any expression that indexes one of the floats in gl_ClipDistance
219 * with an expression that indexes into one of the vec4's in
220 * gl_ClipDistanceMESA and accesses the appropriate component.
221 */
222 static void
lower_distance_deref(struct lower_distance_state * state,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_instr * deref,nir_variable * new_var)223 lower_distance_deref(struct lower_distance_state *state, nir_builder *b,
224 nir_intrinsic_instr *intrin, nir_deref_instr *deref,
225 nir_variable *new_var)
226 {
227 nir_deref_path path;
228 nir_deref_path_init(&path, deref, NULL);
229
230 assert(path.path[0]->deref_type == nir_deref_type_var);
231 nir_deref_instr **p = &path.path[1];
232
233 b->cursor = nir_before_instr(&intrin->instr);
234 nir_deref_instr *deref_var = nir_build_deref_var(b, new_var);
235
236 /* Handle 2D arrays such as Geom shader inputs */
237 if (glsl_type_is_array(glsl_get_array_element(new_var->type))) {
238 assert((*p)->deref_type == nir_deref_type_array);
239 deref_var = nir_build_deref_array(b, deref_var, (*p)->arr.index.ssa);
240 p++;
241 }
242
243 assert((*p)->deref_type == nir_deref_type_array);
244
245 /**
246 * Create the necessary values to index into gl_ClipDistanceMESA based
247 * on the value previously used to index into gl_ClipDistance.
248 *
249 * An array index selects one of the vec4's in gl_ClipDistanceMESA
250 * a swizzle then selects a component within the selected vec4.
251 */
252 nir_src old_index = (*p)->arr.index;
253 if (nir_src_is_const(old_index)) {
254 unsigned const_val = nir_src_as_uint(old_index) + state->offset;
255 unsigned swizzle = const_val % 4;
256
257 nir_deref_instr *def_arr_instr =
258 nir_build_deref_array_imm(b, deref_var, const_val / 4);
259
260 if (intrin->intrinsic == nir_intrinsic_store_deref) {
261 nir_def *value = intrin->src[1].ssa;
262 nir_build_write_masked_store(b, def_arr_instr, value, swizzle);
263 } else {
264 assert(intrin->intrinsic == nir_intrinsic_load_deref ||
265 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
266 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
267 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
268
269 nir_def *load_def;
270 if (intrin->intrinsic == nir_intrinsic_load_deref)
271 load_def = nir_load_deref(b, def_arr_instr);
272 else
273 load_def = interp_deref(b, intrin, def_arr_instr);
274
275 nir_def *swz = nir_channel(b, load_def, swizzle);
276 nir_def_rewrite_uses(&intrin->def, swz);
277 }
278 } else {
279 nir_def *index = nir_iadd_imm(b, old_index.ssa, state->offset);
280 nir_def *swizzle = nir_umod_imm(b, index, 4);
281 index = nir_ishr_imm(b, index, 2); /* index / 4 */
282
283 nir_deref_instr *def_arr_instr =
284 nir_build_deref_array(b, deref_var, index);
285
286 if (intrin->intrinsic == nir_intrinsic_store_deref) {
287 nir_def *value = intrin->src[1].ssa;
288 nir_build_write_masked_stores(b, def_arr_instr, value, swizzle, 0, 4);
289 } else {
290 assert(intrin->intrinsic == nir_intrinsic_load_deref ||
291 intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
292 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
293 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset);
294
295 nir_def *load_def;
296 if (intrin->intrinsic == nir_intrinsic_load_deref)
297 load_def = nir_load_deref(b, def_arr_instr);
298 else
299 load_def = interp_deref(b, intrin, def_arr_instr);
300
301 nir_def *swz = nir_vector_extract(b, load_def, swizzle);
302 nir_def_rewrite_uses(&intrin->def, swz);
303 }
304 }
305
306 nir_deref_path_finish(&path);
307 }
308
309 static bool
replace_with_derefs_to_vec4(nir_builder * b,nir_intrinsic_instr * intr,void * cb_data)310 replace_with_derefs_to_vec4(nir_builder *b, nir_intrinsic_instr *intr,
311 void *cb_data)
312 {
313 struct lower_distance_state *state =
314 (struct lower_distance_state *) cb_data;
315 nir_variable_mode mask = nir_var_shader_in | nir_var_shader_out;
316
317 /* Copy deref lowering is expected to happen before we get here */
318 assert(intr->intrinsic != nir_intrinsic_copy_deref);
319 assert(intr->intrinsic != nir_intrinsic_interp_deref_at_vertex);
320
321 if (intr->intrinsic != nir_intrinsic_load_deref &&
322 intr->intrinsic != nir_intrinsic_store_deref &&
323 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
324 intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
325 intr->intrinsic != nir_intrinsic_interp_deref_at_offset)
326 return false;
327
328 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
329 if (!nir_deref_mode_is_one_of(deref, mask))
330 return false;
331
332 nir_variable *var = nir_deref_instr_get_variable(deref);
333
334 /* The var has already been lowered to a temp so the derefs have already
335 * been replaced. We can end up here when a shader has both clip and cull
336 * arrays.
337 */
338 if (var->data.mode != nir_var_shader_in &&
339 var->data.mode != nir_var_shader_out)
340 return false;
341
342 if (var->data.mode == nir_var_shader_out &&
343 var != state->old_distance_out_var)
344 return false;
345
346 if (var->data.mode == nir_var_shader_in &&
347 var != state->old_distance_in_var)
348 return false;
349
350 nir_variable *new_var = var->data.mode == nir_var_shader_in ?
351 state->new_distance_in_var : state->new_distance_out_var;
352
353 lower_distance_deref(state, b, intr, deref, new_var);
354
355 return true;
356 }
357
358 static void
lower_distance_to_vec4(nir_shader * shader,struct lower_distance_state * state)359 lower_distance_to_vec4(nir_shader *shader, struct lower_distance_state *state)
360 {
361 /* Replace declarations */
362 nir_foreach_variable_with_modes_safe(var, shader,
363 nir_var_shader_in | nir_var_shader_out) {
364 replace_var_declaration(state, shader, var, state->in_name);
365 }
366
367 if (!state->old_distance_in_var && !state->old_distance_out_var)
368 return;
369
370 /* Replace derefs, we may have indirect store lowering which will change
371 * control flow of the shader.
372 */
373 nir_shader_intrinsics_pass(shader, replace_with_derefs_to_vec4,
374 nir_metadata_none, state);
375
376 /* Mark now lowered vars as ordinary globals to be dead code eliminated.
377 * Also clear the compact flag to avoid issues with validation.
378 */
379 if (state->old_distance_out_var) {
380 state->old_distance_out_var->data.mode = nir_var_shader_temp;
381 state->old_distance_out_var->data.compact = false;
382 }
383
384 if (state->old_distance_in_var) {
385 state->old_distance_in_var->data.mode = nir_var_shader_temp;
386 state->old_distance_in_var->data.compact = false;
387 }
388 }
389
390 bool
nir_lower_clip_cull_distance_to_vec4s(nir_shader * shader)391 nir_lower_clip_cull_distance_to_vec4s(nir_shader *shader)
392 {
393 int clip_size = 0;
394 int cull_size = 0;
395
396 nir_variable_mode mode = nir_var_shader_in | nir_var_shader_out;
397 nir_foreach_variable_with_modes(var, shader, mode) {
398 if ((var->data.mode == nir_var_shader_in &&
399 shader->info.stage == MESA_SHADER_VERTEX) ||
400 (var->data.mode == nir_var_shader_out &&
401 shader->info.stage == MESA_SHADER_FRAGMENT) ||
402 shader->info.stage == MESA_SHADER_COMPUTE)
403 continue;
404
405
406 if (var->data.location == VARYING_SLOT_CLIP_DIST0)
407 clip_size = MAX2(clip_size, get_unwrapped_array_length(shader, var));
408
409 if (var->data.location == VARYING_SLOT_CULL_DIST0)
410 cull_size = MAX2(cull_size, get_unwrapped_array_length(shader, var));
411 }
412
413 if (clip_size == 0 && cull_size == 0) {
414 nir_shader_preserve_all_metadata(shader);
415 return false;
416 }
417
418 struct lower_distance_state state;
419 state.old_distance_out_var = NULL;
420 state.old_distance_in_var = NULL;
421 state.new_distance_out_var = NULL;
422 state.new_distance_in_var = NULL;
423 state.shader_stage = shader->info.stage;
424 state.in_name = "gl_ClipDistance";
425 state.total_size = clip_size + cull_size;
426 state.offset = 0;
427 lower_distance_to_vec4(shader, &state);
428
429 state.old_distance_out_var = NULL;
430 state.old_distance_in_var = NULL;
431 state.in_name ="gl_CullDistance";
432 state.offset = clip_size;
433 lower_distance_to_vec4(shader, &state);
434
435 nir_fixup_deref_modes(shader);
436
437 /* Assume we made progress */
438 return true;
439 }
440
441 static bool
combine_clip_cull(nir_shader * nir,nir_variable_mode mode,bool store_info)442 combine_clip_cull(nir_shader *nir,
443 nir_variable_mode mode,
444 bool store_info)
445 {
446 nir_variable *cull = NULL;
447 nir_variable *clip = NULL;
448
449 nir_foreach_variable_with_modes(var, nir, mode) {
450 if (var->data.location == VARYING_SLOT_CLIP_DIST0)
451 clip = var;
452
453 if (var->data.location == VARYING_SLOT_CULL_DIST0)
454 cull = var;
455 }
456
457 if (!cull && !clip) {
458 /* If this is run after optimizations and the variables have been
459 * eliminated, we should update the shader info, because no other
460 * place does that.
461 */
462 if (store_info) {
463 nir->info.clip_distance_array_size = 0;
464 nir->info.cull_distance_array_size = 0;
465 }
466 return false;
467 }
468
469 if (!cull && clip) {
470 /* The GLSL IR lowering pass must have converted these to vectors */
471 if (!clip->data.compact)
472 return false;
473
474 /* If this pass has already run, don't repeat. We would think that
475 * the combined clip/cull distance array was clip-only and mess up.
476 */
477 if (clip->data.how_declared == nir_var_hidden)
478 return false;
479 }
480
481 const unsigned clip_array_size = get_unwrapped_array_length(nir, clip);
482 const unsigned cull_array_size = get_unwrapped_array_length(nir, cull);
483
484 if (store_info) {
485 nir->info.clip_distance_array_size = clip_array_size;
486 nir->info.cull_distance_array_size = cull_array_size;
487 }
488
489 if (clip) {
490 assert(clip->data.compact);
491 clip->data.how_declared = nir_var_hidden;
492 }
493
494 if (cull) {
495 assert(cull->data.compact);
496 cull->data.how_declared = nir_var_hidden;
497 cull->data.location = VARYING_SLOT_CLIP_DIST0 + clip_array_size / 4;
498 cull->data.location_frac = clip_array_size % 4;
499 }
500
501 return true;
502 }
503
504 bool
nir_lower_clip_cull_distance_arrays(nir_shader * nir)505 nir_lower_clip_cull_distance_arrays(nir_shader *nir)
506 {
507 bool progress = false;
508
509 if (nir->info.stage <= MESA_SHADER_GEOMETRY ||
510 nir->info.stage == MESA_SHADER_MESH)
511 progress |= combine_clip_cull(nir, nir_var_shader_out, true);
512
513 if (nir->info.stage > MESA_SHADER_VERTEX &&
514 nir->info.stage <= MESA_SHADER_FRAGMENT) {
515 progress |= combine_clip_cull(nir, nir_var_shader_in,
516 nir->info.stage == MESA_SHADER_FRAGMENT);
517 }
518
519 nir_foreach_function_impl(impl, nir) {
520 if (progress) {
521 nir_metadata_preserve(impl,
522 nir_metadata_control_flow |
523 nir_metadata_live_defs |
524 nir_metadata_loop_analysis);
525 } else {
526 nir_metadata_preserve(impl, nir_metadata_all);
527 }
528 }
529
530 return progress;
531 }
532