1 /*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2021 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * Linker functions related specifically to linking varyings between shader
27 * stages.
28 */
29
30 #include "main/errors.h"
31 #include "main/macros.h"
32 #include "main/menums.h"
33 #include "main/mtypes.h"
34 #include "program/symbol_table.h"
35 #include "util/hash_table.h"
36 #include "util/u_math.h"
37 #include "util/perf/cpu_trace.h"
38
39 #include "nir.h"
40 #include "nir_builder.h"
41 #include "nir_deref.h"
42 #include "gl_nir.h"
43 #include "gl_nir_link_varyings.h"
44 #include "gl_nir_linker.h"
45 #include "linker_util.h"
46 #include "string_to_uint_map.h"
47
48 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
49
50 /* Temporary storage for the set of attributes that need locations assigned. */
51 struct temp_attr {
52 unsigned slots;
53 unsigned original_idx;
54 nir_variable *var;
55 };
56
57 /* Used below in the call to qsort. */
58 static int
compare_attr(const void * a,const void * b)59 compare_attr(const void *a, const void *b)
60 {
61 const struct temp_attr *const l = (const struct temp_attr *) a;
62 const struct temp_attr *const r = (const struct temp_attr *) b;
63
64 /* Reversed because we want a descending order sort below. */
65 if (r->slots != l->slots)
66 return r->slots - l->slots;
67
68 return l->original_idx - r->original_idx;
69 }
70
71 /**
72 * Get the varying type stripped of the outermost array if we're processing
73 * a stage whose varyings are arrays indexed by a vertex number (such as
74 * geometry shader inputs).
75 */
76 static const struct glsl_type *
get_varying_type(const nir_variable * var,gl_shader_stage stage)77 get_varying_type(const nir_variable *var, gl_shader_stage stage)
78 {
79 const struct glsl_type *type = var->type;
80 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
81 assert(glsl_type_is_array(type));
82 type = glsl_get_array_element(type);
83 }
84
85 return type;
86 }
87
88 /**
89 * Find a contiguous set of available bits in a bitmask.
90 *
91 * \param used_mask Bits representing used (1) and unused (0) locations
92 * \param needed_count Number of contiguous bits needed.
93 *
94 * \return
95 * Base location of the available bits on success or -1 on failure.
96 */
97 static int
find_available_slots(unsigned used_mask,unsigned needed_count)98 find_available_slots(unsigned used_mask, unsigned needed_count)
99 {
100 unsigned needed_mask = (1 << needed_count) - 1;
101 const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
102
103 /* The comparison to 32 is redundant, but without it GCC emits "warning:
104 * cannot optimize possibly infinite loops" for the loop below.
105 */
106 if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
107 return -1;
108
109 for (int i = 0; i <= max_bit_to_test; i++) {
110 if ((needed_mask & ~used_mask) == needed_mask)
111 return i;
112
113 needed_mask <<= 1;
114 }
115
116 return -1;
117 }
118
119 /* Find deref based on variable name.
120 * Note: This function does not support arrays.
121 */
122 static bool
find_deref(nir_shader * shader,const char * name)123 find_deref(nir_shader *shader, const char *name)
124 {
125 nir_foreach_function(func, shader) {
126 nir_foreach_block(block, func->impl) {
127 nir_foreach_instr(instr, block) {
128 if (instr->type == nir_instr_type_deref) {
129 nir_deref_instr *deref = nir_instr_as_deref(instr);
130 if (deref->deref_type == nir_deref_type_var &&
131 strcmp(deref->var->name, name) == 0)
132 return true;
133 }
134 }
135 }
136 }
137
138 return false;
139 }
140
141 /**
142 * Validate the types and qualifiers of an output from one stage against the
143 * matching input to another stage.
144 */
145 static void
cross_validate_types_and_qualifiers(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)146 cross_validate_types_and_qualifiers(const struct gl_constants *consts,
147 struct gl_shader_program *prog,
148 const nir_variable *input,
149 const nir_variable *output,
150 gl_shader_stage consumer_stage,
151 gl_shader_stage producer_stage)
152 {
153 /* Check that the types match between stages.
154 */
155 const struct glsl_type *type_to_match = input->type;
156
157 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
158 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
159 consumer_stage != MESA_SHADER_FRAGMENT) ||
160 consumer_stage == MESA_SHADER_GEOMETRY;
161 if (extra_array_level) {
162 assert(glsl_type_is_array(type_to_match));
163 type_to_match = glsl_get_array_element(type_to_match);
164 }
165
166 if (type_to_match != output->type) {
167 if (glsl_type_is_struct(output->type)) {
168 /* Structures across shader stages can have different name
169 * and considered to match in type if and only if structure
170 * members match in name, type, qualification, and declaration
171 * order. The precision doesn’t need to match.
172 */
173 if (!glsl_record_compare(output->type, type_to_match,
174 false, /* match_name */
175 true, /* match_locations */
176 false /* match_precision */)) {
177 linker_error(prog,
178 "%s shader output `%s' declared as struct `%s', "
179 "doesn't match in type with %s shader input "
180 "declared as struct `%s'\n",
181 _mesa_shader_stage_to_string(producer_stage),
182 output->name,
183 glsl_get_type_name(output->type),
184 _mesa_shader_stage_to_string(consumer_stage),
185 glsl_get_type_name(input->type));
186 }
187 } else if (!glsl_type_is_array(output->type) ||
188 !is_gl_identifier(output->name)) {
189 /* There is a bit of a special case for gl_TexCoord. This
190 * built-in is unsized by default. Applications that variable
191 * access it must redeclare it with a size. There is some
192 * language in the GLSL spec that implies the fragment shader
193 * and vertex shader do not have to agree on this size. Other
194 * driver behave this way, and one or two applications seem to
195 * rely on it.
196 *
197 * Neither declaration needs to be modified here because the array
198 * sizes are fixed later when update_array_sizes is called.
199 *
200 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
201 *
202 * "Unlike user-defined varying variables, the built-in
203 * varying variables don't have a strict one-to-one
204 * correspondence between the vertex language and the
205 * fragment language."
206 */
207 linker_error(prog,
208 "%s shader output `%s' declared as type `%s', "
209 "but %s shader input declared as type `%s'\n",
210 _mesa_shader_stage_to_string(producer_stage),
211 output->name,
212 glsl_get_type_name(output->type),
213 _mesa_shader_stage_to_string(consumer_stage),
214 glsl_get_type_name(input->type));
215 return;
216 }
217 }
218
219 /* Check that all of the qualifiers match between stages.
220 */
221
222 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
223 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
224 * conformance test suite does not verify that the qualifiers must match.
225 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
226 * OpenGLES 3.0 drivers, so we relax the checking in all cases.
227 */
228 if (false /* always skip the centroid check */ &&
229 prog->GLSL_Version < (prog->IsES ? 310 : 430) &&
230 input->data.centroid != output->data.centroid) {
231 linker_error(prog,
232 "%s shader output `%s' %s centroid qualifier, "
233 "but %s shader input %s centroid qualifier\n",
234 _mesa_shader_stage_to_string(producer_stage),
235 output->name,
236 (output->data.centroid) ? "has" : "lacks",
237 _mesa_shader_stage_to_string(consumer_stage),
238 (input->data.centroid) ? "has" : "lacks");
239 return;
240 }
241
242 if (input->data.sample != output->data.sample) {
243 linker_error(prog,
244 "%s shader output `%s' %s sample qualifier, "
245 "but %s shader input %s sample qualifier\n",
246 _mesa_shader_stage_to_string(producer_stage),
247 output->name,
248 (output->data.sample) ? "has" : "lacks",
249 _mesa_shader_stage_to_string(consumer_stage),
250 (input->data.sample) ? "has" : "lacks");
251 return;
252 }
253
254 if (input->data.patch != output->data.patch) {
255 linker_error(prog,
256 "%s shader output `%s' %s patch qualifier, "
257 "but %s shader input %s patch qualifier\n",
258 _mesa_shader_stage_to_string(producer_stage),
259 output->name,
260 (output->data.patch) ? "has" : "lacks",
261 _mesa_shader_stage_to_string(consumer_stage),
262 (input->data.patch) ? "has" : "lacks");
263 return;
264 }
265
266 /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
267 *
268 * "As only outputs need be declared with invariant, an output from
269 * one shader stage will still match an input of a subsequent stage
270 * without the input being declared as invariant."
271 *
272 * while GLSL 4.10 says:
273 *
274 * "For variables leaving one shader and coming into another shader,
275 * the invariant keyword has to be used in both shaders, or a link
276 * error will result."
277 *
278 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
279 *
280 * "The invariance of varyings that are declared in both the vertex
281 * and fragment shaders must match."
282 */
283 if (input->data.explicit_invariant != output->data.explicit_invariant &&
284 prog->GLSL_Version < (prog->IsES ? 300 : 420)) {
285 linker_error(prog,
286 "%s shader output `%s' %s invariant qualifier, "
287 "but %s shader input %s invariant qualifier\n",
288 _mesa_shader_stage_to_string(producer_stage),
289 output->name,
290 (output->data.explicit_invariant) ? "has" : "lacks",
291 _mesa_shader_stage_to_string(consumer_stage),
292 (input->data.explicit_invariant) ? "has" : "lacks");
293 return;
294 }
295
296 /* GLSL >= 4.40 removes text requiring interpolation qualifiers
297 * to match cross stage, they must only match within the same stage.
298 *
299 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
300 *
301 * "It is a link-time error if, within the same stage, the interpolation
302 * qualifiers of variables of the same name do not match.
303 *
304 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
305 *
306 * "When no interpolation qualifier is present, smooth interpolation
307 * is used."
308 *
309 * So we match variables where one is smooth and the other has no explicit
310 * qualifier.
311 */
312 unsigned input_interpolation = input->data.interpolation;
313 unsigned output_interpolation = output->data.interpolation;
314 if (prog->IsES) {
315 if (input_interpolation == INTERP_MODE_NONE)
316 input_interpolation = INTERP_MODE_SMOOTH;
317 if (output_interpolation == INTERP_MODE_NONE)
318 output_interpolation = INTERP_MODE_SMOOTH;
319 }
320 if (input_interpolation != output_interpolation &&
321 prog->GLSL_Version < 440) {
322 if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
323 linker_error(prog,
324 "%s shader output `%s' specifies %s "
325 "interpolation qualifier, "
326 "but %s shader input specifies %s "
327 "interpolation qualifier\n",
328 _mesa_shader_stage_to_string(producer_stage),
329 output->name,
330 interpolation_string(output->data.interpolation),
331 _mesa_shader_stage_to_string(consumer_stage),
332 interpolation_string(input->data.interpolation));
333 return;
334 } else {
335 linker_warning(prog,
336 "%s shader output `%s' specifies %s "
337 "interpolation qualifier, "
338 "but %s shader input specifies %s "
339 "interpolation qualifier\n",
340 _mesa_shader_stage_to_string(producer_stage),
341 output->name,
342 interpolation_string(output->data.interpolation),
343 _mesa_shader_stage_to_string(consumer_stage),
344 interpolation_string(input->data.interpolation));
345 }
346 }
347 }
348
349 /**
350 * Validate front and back color outputs against single color input
351 */
352 static void
cross_validate_front_and_back_color(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * front_color,const nir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)353 cross_validate_front_and_back_color(const struct gl_constants *consts,
354 struct gl_shader_program *prog,
355 const nir_variable *input,
356 const nir_variable *front_color,
357 const nir_variable *back_color,
358 gl_shader_stage consumer_stage,
359 gl_shader_stage producer_stage)
360 {
361 if (front_color != NULL && front_color->data.assigned)
362 cross_validate_types_and_qualifiers(consts, prog, input, front_color,
363 consumer_stage, producer_stage);
364
365 if (back_color != NULL && back_color->data.assigned)
366 cross_validate_types_and_qualifiers(consts, prog, input, back_color,
367 consumer_stage, producer_stage);
368 }
369
370 static unsigned
compute_variable_location_slot(nir_variable * var,gl_shader_stage stage)371 compute_variable_location_slot(nir_variable *var, gl_shader_stage stage)
372 {
373 unsigned location_start = VARYING_SLOT_VAR0;
374
375 switch (stage) {
376 case MESA_SHADER_VERTEX:
377 if (var->data.mode == nir_var_shader_in)
378 location_start = VERT_ATTRIB_GENERIC0;
379 break;
380 case MESA_SHADER_TESS_CTRL:
381 case MESA_SHADER_TESS_EVAL:
382 if (var->data.patch)
383 location_start = VARYING_SLOT_PATCH0;
384 break;
385 case MESA_SHADER_FRAGMENT:
386 if (var->data.mode == nir_var_shader_out)
387 location_start = FRAG_RESULT_DATA0;
388 break;
389 default:
390 break;
391 }
392
393 return var->data.location - location_start;
394 }
395
396
397 struct explicit_location_info {
398 nir_variable *var;
399 bool base_type_is_integer;
400 unsigned base_type_bit_size;
401 unsigned interpolation;
402 bool centroid;
403 bool sample;
404 bool patch;
405 };
406
407 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],nir_variable * var,unsigned location,unsigned component,unsigned location_limit,const struct glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,struct gl_shader_program * prog,gl_shader_stage stage)408 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
409 nir_variable *var,
410 unsigned location,
411 unsigned component,
412 unsigned location_limit,
413 const struct glsl_type *type,
414 unsigned interpolation,
415 bool centroid,
416 bool sample,
417 bool patch,
418 struct gl_shader_program *prog,
419 gl_shader_stage stage)
420 {
421 unsigned last_comp;
422 unsigned base_type_bit_size;
423 const struct glsl_type *type_without_array = glsl_without_array(type);
424 const bool base_type_is_integer =
425 glsl_base_type_is_integer(glsl_get_base_type(type_without_array));
426 const bool is_struct = glsl_type_is_struct(type_without_array);
427 if (is_struct) {
428 /* structs don't have a defined underlying base type so just treat all
429 * component slots as used and set the bit size to 0. If there is
430 * location aliasing, we'll fail anyway later.
431 */
432 last_comp = 4;
433 base_type_bit_size = 0;
434 } else {
435 unsigned dmul = glsl_type_is_64bit(type_without_array) ? 2 : 1;
436 last_comp = component + glsl_get_vector_elements(type_without_array) * dmul;
437 base_type_bit_size =
438 glsl_base_type_get_bit_size(glsl_get_base_type(type_without_array));
439 }
440
441 while (location < location_limit) {
442 unsigned comp = 0;
443 while (comp < 4) {
444 struct explicit_location_info *info =
445 &explicit_locations[location][comp];
446
447 if (info->var) {
448 if (glsl_type_is_struct(glsl_without_array(info->var->type)) ||
449 is_struct) {
450 /* Structs cannot share location since they are incompatible
451 * with any other underlying numerical type.
452 */
453 linker_error(prog,
454 "%s shader has multiple %sputs sharing the "
455 "same location that don't have the same "
456 "underlying numerical type. Struct variable '%s', "
457 "location %u\n",
458 _mesa_shader_stage_to_string(stage),
459 var->data.mode == nir_var_shader_in ? "in" : "out",
460 is_struct ? var->name : info->var->name,
461 location);
462 return false;
463 } else if (comp >= component && comp < last_comp) {
464 /* Component aliasing is not allowed */
465 linker_error(prog,
466 "%s shader has multiple %sputs explicitly "
467 "assigned to location %d and component %d\n",
468 _mesa_shader_stage_to_string(stage),
469 var->data.mode == nir_var_shader_in ? "in" : "out",
470 location, comp);
471 return false;
472 } else {
473 /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
474 * Qualifiers, Page 67, (Location aliasing):
475 *
476 * " Further, when location aliasing, the aliases sharing the
477 * location must have the same underlying numerical type
478 * and bit width (floating-point or integer, 32-bit versus
479 * 64-bit, etc.) and the same auxiliary storage and
480 * interpolation qualification."
481 */
482
483 /* If the underlying numerical type isn't integer, implicitly
484 * it will be float or else we would have failed by now.
485 */
486 if (info->base_type_is_integer != base_type_is_integer) {
487 linker_error(prog,
488 "%s shader has multiple %sputs sharing the "
489 "same location that don't have the same "
490 "underlying numerical type. Location %u "
491 "component %u.\n",
492 _mesa_shader_stage_to_string(stage),
493 var->data.mode == nir_var_shader_in ?
494 "in" : "out", location, comp);
495 return false;
496 }
497
498 if (info->base_type_bit_size != base_type_bit_size) {
499 linker_error(prog,
500 "%s shader has multiple %sputs sharing the "
501 "same location that don't have the same "
502 "underlying numerical bit size. Location %u "
503 "component %u.\n",
504 _mesa_shader_stage_to_string(stage),
505 var->data.mode == nir_var_shader_in ?
506 "in" : "out", location, comp);
507 return false;
508 }
509
510 if (info->interpolation != interpolation) {
511 linker_error(prog,
512 "%s shader has multiple %sputs sharing the "
513 "same location that don't have the same "
514 "interpolation qualification. Location %u "
515 "component %u.\n",
516 _mesa_shader_stage_to_string(stage),
517 var->data.mode == nir_var_shader_in ?
518 "in" : "out", location, comp);
519 return false;
520 }
521
522 if (info->centroid != centroid ||
523 info->sample != sample ||
524 info->patch != patch) {
525 linker_error(prog,
526 "%s shader has multiple %sputs sharing the "
527 "same location that don't have the same "
528 "auxiliary storage qualification. Location %u "
529 "component %u.\n",
530 _mesa_shader_stage_to_string(stage),
531 var->data.mode == nir_var_shader_in ?
532 "in" : "out", location, comp);
533 return false;
534 }
535 }
536 } else if (comp >= component && comp < last_comp) {
537 info->var = var;
538 info->base_type_is_integer = base_type_is_integer;
539 info->base_type_bit_size = base_type_bit_size;
540 info->interpolation = interpolation;
541 info->centroid = centroid;
542 info->sample = sample;
543 info->patch = patch;
544 }
545
546 comp++;
547
548 /* We need to do some special handling for doubles as dvec3 and
549 * dvec4 consume two consecutive locations. We don't need to
550 * worry about components beginning at anything other than 0 as
551 * the spec does not allow this for dvec3 and dvec4.
552 */
553 if (comp == 4 && last_comp > 4) {
554 last_comp = last_comp - 4;
555 /* Bump location index and reset the component index */
556 location++;
557 comp = 0;
558 component = 0;
559 }
560 }
561
562 location++;
563 }
564
565 return true;
566 }
567
568 static void
resize_input_array(nir_shader * shader,struct gl_shader_program * prog,unsigned stage,unsigned num_vertices)569 resize_input_array(nir_shader *shader, struct gl_shader_program *prog,
570 unsigned stage, unsigned num_vertices)
571 {
572 nir_foreach_shader_in_variable(var, shader) {
573 if (!glsl_type_is_array(var->type) || var->data.patch)
574 continue;
575
576 unsigned size = glsl_array_size(var->type);
577
578 if (stage == MESA_SHADER_GEOMETRY) {
579 /* Generate a link error if the shader has declared this array with
580 * an incorrect size.
581 */
582 if (!var->data.implicit_sized_array &&
583 size != -1 && size != num_vertices) {
584 linker_error(prog, "size of array %s declared as %u, "
585 "but number of input vertices is %u\n",
586 var->name, size, num_vertices);
587 break;
588 }
589
590 /* Generate a link error if the shader attempts to access an input
591 * array using an index too large for its actual size assigned at
592 * link time.
593 */
594 if (var->data.max_array_access >= (int)num_vertices) {
595 linker_error(prog, "%s shader accesses element %i of "
596 "%s, but only %i input vertices\n",
597 _mesa_shader_stage_to_string(stage),
598 var->data.max_array_access, var->name, num_vertices);
599 break;
600 }
601 }
602
603 var->type = glsl_array_type(var->type->fields.array, num_vertices, 0);
604 var->data.max_array_access = num_vertices - 1;
605 }
606
607 nir_fixup_deref_types(shader);
608 }
609
610 /**
611 * Resize tessellation evaluation per-vertex inputs to the size of
612 * tessellation control per-vertex outputs.
613 */
614 void
resize_tes_inputs(const struct gl_constants * consts,struct gl_shader_program * prog)615 resize_tes_inputs(const struct gl_constants *consts,
616 struct gl_shader_program *prog)
617 {
618 if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL)
619 return;
620
621 struct gl_linked_shader *tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
622 struct gl_linked_shader *tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
623
624 /* If no control shader is present, then the TES inputs are statically
625 * sized to MaxPatchVertices; the actual size of the arrays won't be
626 * known until draw time.
627 */
628 const int num_vertices = tcs
629 ? tcs->Program->info.tess.tcs_vertices_out
630 : consts->MaxPatchVertices;
631
632 resize_input_array(tes->Program->nir, prog, MESA_SHADER_TESS_EVAL,
633 num_vertices);
634 if (tcs) {
635 /* Convert the gl_PatchVerticesIn system value into a constant, since
636 * the value is known at this point.
637 */
638 nir_variable *var =
639 nir_find_variable_with_location(tes->Program->nir,
640 nir_var_system_value,
641 SYSTEM_VALUE_VERTICES_IN);
642 if (var) {
643 var->data.location = 0;
644 var->data.explicit_location = false;
645 var->data.mode = nir_var_mem_constant;
646
647 nir_constant *val = rzalloc(var, nir_constant);
648 val->values[0].i32 = num_vertices;
649 var->constant_initializer = val;
650
651 nir_fixup_deref_modes(tes->Program->nir);
652 }
653 }
654 }
655
656 void
set_geom_shader_input_array_size(struct gl_shader_program * prog)657 set_geom_shader_input_array_size(struct gl_shader_program *prog)
658 {
659 if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] == NULL)
660 return;
661
662 /* Set the size of geometry shader input arrays */
663 nir_shader *nir = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->nir;
664 unsigned num_vertices =
665 mesa_vertices_per_prim(nir->info.gs.input_primitive);
666 resize_input_array(nir, prog, MESA_SHADER_GEOMETRY, num_vertices);
667 }
668
669 static bool
validate_explicit_variable_location(const struct gl_constants * consts,struct explicit_location_info explicit_locations[][4],nir_variable * var,struct gl_shader_program * prog,struct gl_linked_shader * sh)670 validate_explicit_variable_location(const struct gl_constants *consts,
671 struct explicit_location_info explicit_locations[][4],
672 nir_variable *var,
673 struct gl_shader_program *prog,
674 struct gl_linked_shader *sh)
675 {
676 const struct glsl_type *type = get_varying_type(var, sh->Stage);
677 unsigned num_elements = glsl_count_attribute_slots(type, false);
678 unsigned idx = compute_variable_location_slot(var, sh->Stage);
679 unsigned slot_limit = idx + num_elements;
680
681 /* Vertex shader inputs and fragment shader outputs are validated in
682 * assign_attribute_or_color_locations() so we should not attempt to
683 * validate them again here.
684 */
685 unsigned slot_max;
686 if (var->data.mode == nir_var_shader_out) {
687 assert(sh->Stage != MESA_SHADER_FRAGMENT);
688 slot_max = consts->Program[sh->Stage].MaxOutputComponents / 4;
689 } else {
690 assert(var->data.mode == nir_var_shader_in);
691 assert(sh->Stage != MESA_SHADER_VERTEX);
692 slot_max = consts->Program[sh->Stage].MaxInputComponents / 4;
693 }
694
695 if (slot_limit > slot_max) {
696 linker_error(prog,
697 "Invalid location %u in %s shader\n",
698 idx, _mesa_shader_stage_to_string(sh->Stage));
699 return false;
700 }
701
702 const struct glsl_type *type_without_array = glsl_without_array(type);
703 if (glsl_type_is_interface(type_without_array)) {
704 for (unsigned i = 0; i < glsl_get_length(type_without_array); i++) {
705 const struct glsl_struct_field *field =
706 glsl_get_struct_field_data(type_without_array, i);
707 unsigned field_location = field->location -
708 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
709 unsigned field_slots = glsl_count_attribute_slots(field->type, false);
710 if (!check_location_aliasing(explicit_locations, var,
711 field_location,
712 0,
713 field_location + field_slots,
714 field->type,
715 field->interpolation,
716 field->centroid,
717 field->sample,
718 field->patch,
719 prog, sh->Stage)) {
720 return false;
721 }
722 }
723 } else if (!check_location_aliasing(explicit_locations, var,
724 idx, var->data.location_frac,
725 slot_limit, type,
726 var->data.interpolation,
727 var->data.centroid,
728 var->data.sample,
729 var->data.patch,
730 prog, sh->Stage)) {
731 return false;
732 }
733
734 return true;
735 }
736
737 /**
738 * Validate explicit locations for the inputs to the first stage and the
739 * outputs of the last stage in a program, if those are not the VS and FS
740 * shaders.
741 */
742 void
gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants * consts,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)743 gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
744 struct gl_shader_program *prog,
745 gl_shader_stage first_stage,
746 gl_shader_stage last_stage)
747 {
748 /* VS inputs and FS outputs are validated in
749 * assign_attribute_or_color_locations()
750 */
751 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
752 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
753 if (!validate_first_stage && !validate_last_stage)
754 return;
755
756 struct explicit_location_info explicit_locations[MAX_VARYING][4];
757
758 gl_shader_stage stages[2] = { first_stage, last_stage };
759 bool validate_stage[2] = { validate_first_stage, validate_last_stage };
760 nir_variable_mode var_mode[2] = { nir_var_shader_in, nir_var_shader_out };
761
762 for (unsigned i = 0; i < 2; i++) {
763 if (!validate_stage[i])
764 continue;
765
766 gl_shader_stage stage = stages[i];
767
768 struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
769 assert(sh);
770
771 memset(explicit_locations, 0, sizeof(explicit_locations));
772
773 nir_foreach_variable_with_modes(var, sh->Program->nir, var_mode[i]) {
774 if (!var->data.explicit_location ||
775 var->data.location < VARYING_SLOT_VAR0)
776 continue;
777
778 if (!validate_explicit_variable_location(consts, explicit_locations,
779 var, prog, sh)) {
780 return;
781 }
782 }
783 }
784 }
785
786 /**
787 * Check if we should force input / output matching between shader
788 * interfaces.
789 *
790 * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
791 *
792 * "Only the input variables that are actually read need to be
793 * written by the previous stage; it is allowed to have
794 * superfluous declarations of input variables."
795 *
796 * However it's not defined anywhere as to how we should handle
797 * inputs that are not written in the previous stage and it's not
798 * clear what "actually read" means.
799 *
800 * The GLSL 4.20 spec however is much clearer:
801 *
802 * "Only the input variables that are statically read need to
803 * be written by the previous stage; it is allowed to have
804 * superfluous declarations of input variables."
805 *
806 * It also has a table that states it is an error to statically
807 * read an input that is not defined in the previous stage. While
808 * it is not an error to not statically write to the output (it
809 * just needs to be defined to not be an error).
810 *
811 * The text in the GLSL 4.20 spec was an attempt to clarify the
812 * previous spec iterations. However given the difference in spec
813 * and that some applications seem to depend on not erroring when
814 * the input is not actually read in control flow we only apply
815 * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
816 * seen in the wild that depend on the less strict interpretation.
817 */
818 static bool
static_input_output_matching(struct gl_shader_program * prog)819 static_input_output_matching(struct gl_shader_program *prog)
820 {
821 return prog->GLSL_Version >= (prog->IsES ? 0 : 420);
822 }
823
824 /**
825 * Validate that outputs from one stage match inputs of another
826 */
827 void
gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)828 gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
829 struct gl_shader_program *prog,
830 struct gl_linked_shader *producer,
831 struct gl_linked_shader *consumer)
832 {
833 struct _mesa_symbol_table *table = _mesa_symbol_table_ctor();
834 struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {0};
835 struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {0};
836
837 /* Find all shader outputs in the "producer" stage.
838 */
839 nir_foreach_variable_with_modes(var, producer->Program->nir, nir_var_shader_out) {
840 if (!var->data.explicit_location
841 || var->data.location < VARYING_SLOT_VAR0) {
842 /* Interface block validation is handled elsewhere */
843 if (!var->interface_type || is_gl_identifier(var->name))
844 _mesa_symbol_table_add_symbol(table, var->name, var);
845
846 } else {
847 /* User-defined varyings with explicit locations are handled
848 * differently because they do not need to have matching names.
849 */
850 if (!validate_explicit_variable_location(consts,
851 output_explicit_locations,
852 var, prog, producer)) {
853 goto out;
854 }
855 }
856 }
857
858 /* Find all shader inputs in the "consumer" stage. Any variables that have
859 * matching outputs already in the symbol table must have the same type and
860 * qualifiers.
861 *
862 * Exception: if the consumer is the geometry shader, then the inputs
863 * should be arrays and the type of the array element should match the type
864 * of the corresponding producer output.
865 */
866 nir_foreach_variable_with_modes(input, consumer->Program->nir, nir_var_shader_in) {
867 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
868 const nir_variable *front_color =
869 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontColor");
870
871 const nir_variable *back_color =
872 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackColor");
873
874 cross_validate_front_and_back_color(consts, prog, input,
875 front_color, back_color,
876 consumer->Stage, producer->Stage);
877 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
878 const nir_variable *front_color =
879 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontSecondaryColor");
880
881 const nir_variable *back_color =
882 (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackSecondaryColor");
883
884 cross_validate_front_and_back_color(consts, prog, input,
885 front_color, back_color,
886 consumer->Stage, producer->Stage);
887 } else {
888 /* The rules for connecting inputs and outputs change in the presence
889 * of explicit locations. In this case, we no longer care about the
890 * names of the variables. Instead, we care only about the
891 * explicitly assigned location.
892 */
893 nir_variable *output = NULL;
894 if (input->data.explicit_location
895 && input->data.location >= VARYING_SLOT_VAR0) {
896
897 const struct glsl_type *type =
898 get_varying_type(input, consumer->Stage);
899 unsigned num_elements = glsl_count_attribute_slots(type, false);
900 unsigned idx =
901 compute_variable_location_slot(input, consumer->Stage);
902 unsigned slot_limit = idx + num_elements;
903
904 if (!validate_explicit_variable_location(consts,
905 input_explicit_locations,
906 input, prog, consumer)) {
907 goto out;
908 }
909
910 while (idx < slot_limit) {
911 if (idx >= MAX_VARYING) {
912 linker_error(prog,
913 "Invalid location %u in %s shader\n", idx,
914 _mesa_shader_stage_to_string(consumer->Stage));
915 goto out;
916 }
917
918 output = output_explicit_locations[idx][input->data.location_frac].var;
919
920 if (output == NULL) {
921 /* A linker failure should only happen when there is no
922 * output declaration and there is Static Use of the
923 * declared input.
924 */
925 if (input->data.used && static_input_output_matching(prog)) {
926 linker_error(prog,
927 "%s shader input `%s' with explicit location "
928 "has no matching output\n",
929 _mesa_shader_stage_to_string(consumer->Stage),
930 input->name);
931 break;
932 }
933 } else if (input->data.location != output->data.location) {
934 linker_error(prog,
935 "%s shader input `%s' with explicit location "
936 "has no matching output\n",
937 _mesa_shader_stage_to_string(consumer->Stage),
938 input->name);
939 break;
940 }
941 idx++;
942 }
943 } else {
944 /* Interface block validation is handled elsewhere */
945 if (input->interface_type)
946 continue;
947
948 output = (nir_variable *)
949 _mesa_symbol_table_find_symbol(table, input->name);
950 }
951
952 if (output != NULL) {
953 /* Interface blocks have their own validation elsewhere so don't
954 * try validating them here.
955 */
956 if (!(input->interface_type && output->interface_type))
957 cross_validate_types_and_qualifiers(consts, prog, input, output,
958 consumer->Stage,
959 producer->Stage);
960 } else {
961 /* Check for input vars with unmatched output vars in prev stage
962 * taking into account that interface blocks could have a matching
963 * output but with different name, so we ignore them.
964 */
965 assert(!input->data.assigned);
966 if (input->data.used && !input->interface_type &&
967 !input->data.explicit_location &&
968 static_input_output_matching(prog))
969 linker_error(prog,
970 "%s shader input `%s' "
971 "has no matching output in the previous stage\n",
972 _mesa_shader_stage_to_string(consumer->Stage),
973 input->name);
974 }
975 }
976 }
977
978 out:
979 _mesa_symbol_table_dtor(table);
980 }
981
982 /**
983 * Assign locations for either VS inputs or FS outputs.
984 *
985 * \param mem_ctx Temporary ralloc context used for linking.
986 * \param prog Shader program whose variables need locations
987 * assigned.
988 * \param constants Driver specific constant values for the program.
989 * \param target_index Selector for the program target to receive location
990 * assignmnets. Must be either \c MESA_SHADER_VERTEX or
991 * \c MESA_SHADER_FRAGMENT.
992 * \param do_assignment Whether we are actually marking the assignment or we
993 * are just doing a dry-run checking.
994 *
995 * \return
996 * If locations are (or can be, in case of dry-running) successfully assigned,
997 * true is returned. Otherwise an error is emitted to the shader link log and
998 * false is returned.
999 */
1000 static bool
assign_attribute_or_color_locations(void * mem_ctx,struct gl_shader_program * prog,const struct gl_constants * constants,unsigned target_index,bool do_assignment)1001 assign_attribute_or_color_locations(void *mem_ctx,
1002 struct gl_shader_program *prog,
1003 const struct gl_constants *constants,
1004 unsigned target_index,
1005 bool do_assignment)
1006 {
1007 /* Maximum number of generic locations. This corresponds to either the
1008 * maximum number of draw buffers or the maximum number of generic
1009 * attributes.
1010 */
1011 unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
1012 constants->Program[target_index].MaxAttribs :
1013 MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);
1014
1015 assert(max_index <= 32);
1016 struct temp_attr to_assign[32];
1017
1018 /* Mark invalid locations as being used.
1019 */
1020 unsigned used_locations = ~SAFE_MASK_FROM_INDEX(max_index);
1021 unsigned double_storage_locations = 0;
1022
1023 assert((target_index == MESA_SHADER_VERTEX)
1024 || (target_index == MESA_SHADER_FRAGMENT));
1025
1026 if (prog->_LinkedShaders[target_index] == NULL)
1027 return true;
1028
1029 /* Operate in a total of four passes.
1030 *
1031 * 1. Invalidate the location assignments for all vertex shader inputs.
1032 *
1033 * 2. Assign locations for inputs that have user-defined (via
1034 * glBindVertexAttribLocation) locations and outputs that have
1035 * user-defined locations (via glBindFragDataLocation).
1036 *
1037 * 3. Sort the attributes without assigned locations by number of slots
1038 * required in decreasing order. Fragmentation caused by attribute
1039 * locations assigned by the application may prevent large attributes
1040 * from having enough contiguous space.
1041 *
1042 * 4. Assign locations to any inputs without assigned locations.
1043 */
1044
1045 const int generic_base = (target_index == MESA_SHADER_VERTEX)
1046 ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;
1047
1048 nir_variable_mode io_mode =
1049 (target_index == MESA_SHADER_VERTEX)
1050 ? nir_var_shader_in : nir_var_shader_out;
1051
1052 /* Temporary array for the set of attributes that have locations assigned,
1053 * for the purpose of checking overlapping slots/components of (non-ES)
1054 * fragment shader outputs.
1055 */
1056 nir_variable *assigned[FRAG_RESULT_MAX * 4]; /* (max # of FS outputs) * # components */
1057 unsigned assigned_attr = 0;
1058
1059 unsigned num_attr = 0;
1060
1061 nir_shader *shader = prog->_LinkedShaders[target_index]->Program->nir;
1062 nir_foreach_variable_with_modes(var, shader, io_mode) {
1063
1064 if (var->data.explicit_location) {
1065 if ((var->data.location >= (int)(max_index + generic_base))
1066 || (var->data.location < 0)) {
1067 linker_error(prog,
1068 "invalid explicit location %d specified for `%s'\n",
1069 (var->data.location < 0)
1070 ? var->data.location
1071 : var->data.location - generic_base,
1072 var->name);
1073 return false;
1074 }
1075 } else if (target_index == MESA_SHADER_VERTEX) {
1076 unsigned binding;
1077
1078 if (string_to_uint_map_get(prog->AttributeBindings, &binding, var->name)) {
1079 assert(binding >= VERT_ATTRIB_GENERIC0);
1080 var->data.location = binding;
1081 }
1082 } else if (target_index == MESA_SHADER_FRAGMENT) {
1083 unsigned binding;
1084 unsigned index;
1085 const char *name = var->name;
1086 const struct glsl_type *type = var->type;
1087
1088 while (type) {
1089 /* Check if there's a binding for the variable name */
1090 if (string_to_uint_map_get(prog->FragDataBindings, &binding, name)) {
1091 assert(binding >= FRAG_RESULT_DATA0);
1092 var->data.location = binding;
1093
1094 if (string_to_uint_map_get(prog->FragDataIndexBindings, &index, name)) {
1095 var->data.index = index;
1096 }
1097 break;
1098 }
1099
1100 /* If not, but it's an array type, look for name[0] */
1101 if (glsl_type_is_array(type)) {
1102 name = ralloc_asprintf(mem_ctx, "%s[0]", name);
1103 type = glsl_get_array_element(type);
1104 continue;
1105 }
1106
1107 break;
1108 }
1109 }
1110
1111 if (strcmp(var->name, "gl_LastFragData") == 0)
1112 continue;
1113
1114 /* From GL4.5 core spec, section 15.2 (Shader Execution):
1115 *
1116 * "Output binding assignments will cause LinkProgram to fail:
1117 * ...
1118 * If the program has an active output assigned to a location greater
1119 * than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
1120 * an active output assigned an index greater than or equal to one;"
1121 */
1122 if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
1123 var->data.location - generic_base >=
1124 (int) constants->MaxDualSourceDrawBuffers) {
1125 linker_error(prog,
1126 "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
1127 "with index %u for %s\n",
1128 var->data.location - generic_base, var->data.index,
1129 var->name);
1130 return false;
1131 }
1132
1133 const unsigned slots =
1134 glsl_count_attribute_slots(var->type,
1135 target_index == MESA_SHADER_VERTEX);
1136
1137 /* If the variable is not a built-in and has a location statically
1138 * assigned in the shader (presumably via a layout qualifier), make sure
1139 * that it doesn't collide with other assigned locations. Otherwise,
1140 * add it to the list of variables that need linker-assigned locations.
1141 */
1142 if (var->data.location != -1) {
1143 if (var->data.location >= generic_base && var->data.index < 1) {
1144 /* From page 61 of the OpenGL 4.0 spec:
1145 *
1146 * "LinkProgram will fail if the attribute bindings assigned
1147 * by BindAttribLocation do not leave not enough space to
1148 * assign a location for an active matrix attribute or an
1149 * active attribute array, both of which require multiple
1150 * contiguous generic attributes."
1151 *
1152 * I think above text prohibits the aliasing of explicit and
1153 * automatic assignments. But, aliasing is allowed in manual
1154 * assignments of attribute locations. See below comments for
1155 * the details.
1156 *
1157 * From OpenGL 4.0 spec, page 61:
1158 *
1159 * "It is possible for an application to bind more than one
1160 * attribute name to the same location. This is referred to as
1161 * aliasing. This will only work if only one of the aliased
1162 * attributes is active in the executable program, or if no
1163 * path through the shader consumes more than one attribute of
1164 * a set of attributes aliased to the same location. A link
1165 * error can occur if the linker determines that every path
1166 * through the shader consumes multiple aliased attributes,
1167 * but implementations are not required to generate an error
1168 * in this case."
1169 *
1170 * From GLSL 4.30 spec, page 54:
1171 *
1172 * "A program will fail to link if any two non-vertex shader
1173 * input variables are assigned to the same location. For
1174 * vertex shaders, multiple input variables may be assigned
1175 * to the same location using either layout qualifiers or via
1176 * the OpenGL API. However, such aliasing is intended only to
1177 * support vertex shaders where each execution path accesses
1178 * at most one input per each location. Implementations are
1179 * permitted, but not required, to generate link-time errors
1180 * if they detect that every path through the vertex shader
1181 * executable accesses multiple inputs assigned to any single
1182 * location. For all shader types, a program will fail to link
1183 * if explicit location assignments leave the linker unable
1184 * to find space for other variables without explicit
1185 * assignments."
1186 *
1187 * From OpenGL ES 3.0 spec, page 56:
1188 *
1189 * "Binding more than one attribute name to the same location
1190 * is referred to as aliasing, and is not permitted in OpenGL
1191 * ES Shading Language 3.00 vertex shaders. LinkProgram will
1192 * fail when this condition exists. However, aliasing is
1193 * possible in OpenGL ES Shading Language 1.00 vertex shaders.
1194 * This will only work if only one of the aliased attributes
1195 * is active in the executable program, or if no path through
1196 * the shader consumes more than one attribute of a set of
1197 * attributes aliased to the same location. A link error can
1198 * occur if the linker determines that every path through the
1199 * shader consumes multiple aliased attributes, but implemen-
1200 * tations are not required to generate an error in this case."
1201 *
1202 * After looking at above references from OpenGL, OpenGL ES and
1203 * GLSL specifications, we allow aliasing of vertex input variables
1204 * in: OpenGL 2.0 (and above) and OpenGL ES 2.0.
1205 *
1206 * NOTE: This is not required by the spec but its worth mentioning
1207 * here that we're not doing anything to make sure that no path
1208 * through the vertex shader executable accesses multiple inputs
1209 * assigned to any single location.
1210 */
1211
1212 /* Mask representing the contiguous slots that will be used by
1213 * this attribute.
1214 */
1215 const unsigned attr = var->data.location - generic_base;
1216 const unsigned use_mask = (1 << slots) - 1;
1217 const char *const string = (target_index == MESA_SHADER_VERTEX)
1218 ? "vertex shader input" : "fragment shader output";
1219
1220 /* Generate a link error if the requested locations for this
1221 * attribute exceed the maximum allowed attribute location.
1222 */
1223 if (attr + slots > max_index) {
1224 linker_error(prog,
1225 "insufficient contiguous locations "
1226 "available for %s `%s' %d %d %d\n", string,
1227 var->name, used_locations, use_mask, attr);
1228 return false;
1229 }
1230
1231 /* Generate a link error if the set of bits requested for this
1232 * attribute overlaps any previously allocated bits.
1233 */
1234 if ((~(use_mask << attr) & used_locations) != used_locations) {
1235 if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1236 /* From section 4.4.2 (Output Layout Qualifiers) of the GLSL
1237 * 4.40 spec:
1238 *
1239 * "Additionally, for fragment shader outputs, if two
1240 * variables are placed within the same location, they
1241 * must have the same underlying type (floating-point or
1242 * integer). No component aliasing of output variables or
1243 * members is allowed.
1244 */
1245 for (unsigned i = 0; i < assigned_attr; i++) {
1246 unsigned assigned_slots =
1247 glsl_count_attribute_slots(assigned[i]->type, false);
1248 unsigned assig_attr =
1249 assigned[i]->data.location - generic_base;
1250 unsigned assigned_use_mask = (1 << assigned_slots) - 1;
1251
1252 if ((assigned_use_mask << assig_attr) &
1253 (use_mask << attr)) {
1254
1255 const struct glsl_type *assigned_type =
1256 glsl_without_array(assigned[i]->type);
1257 const struct glsl_type *type =
1258 glsl_without_array(var->type);
1259 if (glsl_get_base_type(assigned_type) !=
1260 glsl_get_base_type(type)) {
1261 linker_error(prog, "types do not match for aliased"
1262 " %ss %s and %s\n", string,
1263 assigned[i]->name, var->name);
1264 return false;
1265 }
1266
1267 unsigned assigned_component_mask =
1268 ((1 << glsl_get_vector_elements(assigned_type)) - 1) <<
1269 assigned[i]->data.location_frac;
1270 unsigned component_mask =
1271 ((1 << glsl_get_vector_elements(type)) - 1) <<
1272 var->data.location_frac;
1273 if (assigned_component_mask & component_mask) {
1274 linker_error(prog, "overlapping component is "
1275 "assigned to %ss %s and %s "
1276 "(component=%d)\n",
1277 string, assigned[i]->name, var->name,
1278 var->data.location_frac);
1279 return false;
1280 }
1281 }
1282 }
1283 } else if (target_index == MESA_SHADER_FRAGMENT ||
1284 (prog->IsES && prog->GLSL_Version >= 300)) {
1285 linker_error(prog, "overlapping location is assigned "
1286 "to %s `%s' %d %d %d\n", string, var->name,
1287 used_locations, use_mask, attr);
1288 return false;
1289 } else {
1290 linker_warning(prog, "overlapping location is assigned "
1291 "to %s `%s' %d %d %d\n", string, var->name,
1292 used_locations, use_mask, attr);
1293 }
1294 }
1295
1296 if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1297 /* Only track assigned variables for non-ES fragment shaders
1298 * to avoid overflowing the array.
1299 *
1300 * At most one variable per fragment output component should
1301 * reach this.
1302 */
1303 assert(assigned_attr < ARRAY_SIZE(assigned));
1304 assigned[assigned_attr] = var;
1305 assigned_attr++;
1306 }
1307
1308 used_locations |= (use_mask << attr);
1309
1310 /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
1311 *
1312 * "A program with more than the value of MAX_VERTEX_ATTRIBS
1313 * active attribute variables may fail to link, unless
1314 * device-dependent optimizations are able to make the program
1315 * fit within available hardware resources. For the purposes
1316 * of this test, attribute variables of the type dvec3, dvec4,
1317 * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
1318 * count as consuming twice as many attributes as equivalent
1319 * single-precision types. While these types use the same number
1320 * of generic attributes as their single-precision equivalents,
1321 * implementations are permitted to consume two single-precision
1322 * vectors of internal storage for each three- or four-component
1323 * double-precision vector."
1324 *
1325 * Mark this attribute slot as taking up twice as much space
1326 * so we can count it properly against limits. According to
1327 * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
1328 * is optional behavior, but it seems preferable.
1329 */
1330 if (glsl_type_is_dual_slot(glsl_without_array(var->type)))
1331 double_storage_locations |= (use_mask << attr);
1332 }
1333
1334 continue;
1335 }
1336
1337 if (num_attr >= max_index) {
1338 linker_error(prog, "too many %s (max %u)",
1339 target_index == MESA_SHADER_VERTEX ?
1340 "vertex shader inputs" : "fragment shader outputs",
1341 max_index);
1342 return false;
1343 }
1344 to_assign[num_attr].slots = slots;
1345 to_assign[num_attr].var = var;
1346 to_assign[num_attr].original_idx = num_attr;
1347 num_attr++;
1348 }
1349
1350 if (!do_assignment)
1351 return true;
1352
1353 if (target_index == MESA_SHADER_VERTEX) {
1354 unsigned total_attribs_size =
1355 util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1356 util_bitcount(double_storage_locations);
1357 if (total_attribs_size > max_index) {
1358 linker_error(prog,
1359 "attempt to use %d vertex attribute slots only %d available ",
1360 total_attribs_size, max_index);
1361 return false;
1362 }
1363 }
1364
1365 /* If all of the attributes were assigned locations by the application (or
1366 * are built-in attributes with fixed locations), return early. This should
1367 * be the common case.
1368 */
1369 if (num_attr == 0)
1370 return true;
1371
1372 qsort(to_assign, num_attr, sizeof(to_assign[0]), &compare_attr);
1373
1374 if (target_index == MESA_SHADER_VERTEX) {
1375 /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can
1376 * only be explicitly assigned by via glBindAttribLocation. Mark it as
1377 * reserved to prevent it from being automatically allocated below.
1378 */
1379 if (find_deref(shader, "gl_Vertex"))
1380 used_locations |= (1 << 0);
1381 }
1382
1383 for (unsigned i = 0; i < num_attr; i++) {
1384 /* Mask representing the contiguous slots that will be used by this
1385 * attribute.
1386 */
1387 const unsigned use_mask = (1 << to_assign[i].slots) - 1;
1388
1389 int location = find_available_slots(used_locations, to_assign[i].slots);
1390
1391 if (location < 0) {
1392 const char *const string = (target_index == MESA_SHADER_VERTEX)
1393 ? "vertex shader input" : "fragment shader output";
1394
1395 linker_error(prog,
1396 "insufficient contiguous locations "
1397 "available for %s `%s'\n",
1398 string, to_assign[i].var->name);
1399 return false;
1400 }
1401
1402 to_assign[i].var->data.location = generic_base + location;
1403 used_locations |= (use_mask << location);
1404
1405 if (glsl_type_is_dual_slot(glsl_without_array(to_assign[i].var->type)))
1406 double_storage_locations |= (use_mask << location);
1407 }
1408
1409 /* Now that we have all the locations, from the GL 4.5 core spec, section
1410 * 11.1.1 (Vertex Attributes), dvec3, dvec4, dmat2x3, dmat2x4, dmat3,
1411 * dmat3x4, dmat4x3, and dmat4 count as consuming twice as many attributes
1412 * as equivalent single-precision types.
1413 */
1414 if (target_index == MESA_SHADER_VERTEX) {
1415 unsigned total_attribs_size =
1416 util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1417 util_bitcount(double_storage_locations);
1418 if (total_attribs_size > max_index) {
1419 linker_error(prog,
1420 "attempt to use %d vertex attribute slots only %d available ",
1421 total_attribs_size, max_index);
1422 return false;
1423 }
1424 }
1425
1426 return true;
1427 }
1428
1429 static bool
varying_has_user_specified_location(const nir_variable * var)1430 varying_has_user_specified_location(const nir_variable *var)
1431 {
1432 return var->data.explicit_location &&
1433 var->data.location >= VARYING_SLOT_VAR0;
1434 }
1435
1436 static void
create_xfb_varying_names(void * mem_ctx,const struct glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const struct glsl_type * ifc_member_t,char *** varying_names)1437 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
1438 size_t name_length, unsigned *count,
1439 const char *ifc_member_name,
1440 const struct glsl_type *ifc_member_t,
1441 char ***varying_names)
1442 {
1443 if (glsl_type_is_interface(t)) {
1444 size_t new_length = name_length;
1445
1446 assert(ifc_member_name && ifc_member_t);
1447 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
1448
1449 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
1450 NULL, NULL, varying_names);
1451 } else if (glsl_type_is_struct(t)) {
1452 for (unsigned i = 0; i < glsl_get_length(t); i++) {
1453 const char *field = glsl_get_struct_elem_name(t, i);
1454 size_t new_length = name_length;
1455
1456 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
1457
1458 create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
1459 new_length, count, NULL, NULL,
1460 varying_names);
1461 }
1462 } else if (glsl_type_is_struct(glsl_without_array(t)) ||
1463 glsl_type_is_interface(glsl_without_array(t)) ||
1464 (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
1465 for (unsigned i = 0; i < glsl_get_length(t); i++) {
1466 size_t new_length = name_length;
1467
1468 /* Append the subscript to the current variable name */
1469 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1470
1471 create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
1472 new_length, count, ifc_member_name,
1473 ifc_member_t, varying_names);
1474 }
1475 } else {
1476 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
1477 }
1478 }
1479
1480 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const struct gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_xfb_decls,char *** varying_names,bool * compact_arrays)1481 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
1482 struct gl_shader_program *prog,
1483 unsigned *num_xfb_decls,
1484 char ***varying_names,
1485 bool *compact_arrays)
1486 {
1487 bool has_xfb_qualifiers = false;
1488
1489 /* We still need to enable transform feedback mode even if xfb_stride is
1490 * only applied to a global out. Also we don't bother to propagate
1491 * xfb_stride to interface block members so this will catch that case also.
1492 */
1493 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1494 if (prog->TransformFeedback.BufferStride[j]) {
1495 has_xfb_qualifiers = true;
1496 break;
1497 }
1498 }
1499
1500 *compact_arrays = sh->Program->nir->options->compact_arrays;
1501 nir_foreach_shader_out_variable(var, sh->Program->nir) {
1502 /* From the ARB_enhanced_layouts spec:
1503 *
1504 * "Any shader making any static use (after preprocessing) of any of
1505 * these *xfb_* qualifiers will cause the shader to be in a
1506 * transform feedback capturing mode and hence responsible for
1507 * describing the transform feedback setup. This mode will capture
1508 * any output selected by *xfb_offset*, directly or indirectly, to
1509 * a transform feedback buffer."
1510 */
1511 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
1512 has_xfb_qualifiers = true;
1513 }
1514
1515 if (var->data.explicit_offset) {
1516 *num_xfb_decls += glsl_varying_count(var->type);
1517 has_xfb_qualifiers = true;
1518 }
1519 }
1520
1521 if (*num_xfb_decls == 0)
1522 return has_xfb_qualifiers;
1523
1524
1525 unsigned i = 0;
1526 *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
1527 nir_foreach_shader_out_variable(var, sh->Program->nir) {
1528 if (var->data.explicit_offset) {
1529 char *name;
1530 const struct glsl_type *type, *member_type;
1531
1532 if (var->data.from_named_ifc_block) {
1533 type = var->interface_type;
1534
1535 /* Find the member type before it was altered by lowering */
1536 const struct glsl_type *type_wa = glsl_without_array(type);
1537 member_type =
1538 glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
1539 name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
1540 } else {
1541 type = var->type;
1542 member_type = NULL;
1543 name = ralloc_strdup(NULL, var->name);
1544 }
1545 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
1546 var->name, member_type, varying_names);
1547 ralloc_free(name);
1548 }
1549 }
1550
1551 assert(i == *num_xfb_decls);
1552 return has_xfb_qualifiers;
1553 }
1554
1555 /**
1556 * Initialize this struct based on a string that was passed to
1557 * glTransformFeedbackVaryings.
1558 *
1559 * If the input is mal-formed, this call still succeeds, but it sets
1560 * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
1561 * will fail to find any matching variable.
1562 */
1563 static void
xfb_decl_init(struct xfb_decl * xfb_decl,const struct gl_constants * consts,const struct gl_extensions * exts,const void * mem_ctx,const char * input,bool compact_arrays)1564 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1565 const struct gl_extensions *exts, const void *mem_ctx,
1566 const char *input, bool compact_arrays)
1567 {
1568 /* We don't have to be pedantic about what is a valid GLSL variable name,
1569 * because any variable with an invalid name can't exist in the IR anyway.
1570 */
1571 xfb_decl->location = -1;
1572 xfb_decl->orig_name = input;
1573 xfb_decl->lowered_builtin_array_variable = none;
1574 xfb_decl->skip_components = 0;
1575 xfb_decl->next_buffer_separator = false;
1576 xfb_decl->matched_candidate = NULL;
1577 xfb_decl->stream_id = 0;
1578 xfb_decl->buffer = 0;
1579 xfb_decl->offset = 0;
1580
1581 if (exts->ARB_transform_feedback3) {
1582 /* Parse gl_NextBuffer. */
1583 if (strcmp(input, "gl_NextBuffer") == 0) {
1584 xfb_decl->next_buffer_separator = true;
1585 return;
1586 }
1587
1588 /* Parse gl_SkipComponents. */
1589 if (strcmp(input, "gl_SkipComponents1") == 0)
1590 xfb_decl->skip_components = 1;
1591 else if (strcmp(input, "gl_SkipComponents2") == 0)
1592 xfb_decl->skip_components = 2;
1593 else if (strcmp(input, "gl_SkipComponents3") == 0)
1594 xfb_decl->skip_components = 3;
1595 else if (strcmp(input, "gl_SkipComponents4") == 0)
1596 xfb_decl->skip_components = 4;
1597
1598 if (xfb_decl->skip_components)
1599 return;
1600 }
1601
1602 /* Parse a declaration. */
1603 const char *base_name_end;
1604 long subscript = link_util_parse_program_resource_name(input, strlen(input),
1605 &base_name_end);
1606 xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1607 if (xfb_decl->var_name == NULL) {
1608 _mesa_error_no_memory(__func__);
1609 return;
1610 }
1611
1612 if (subscript >= 0) {
1613 xfb_decl->array_subscript = subscript;
1614 xfb_decl->is_subscripted = true;
1615 } else {
1616 xfb_decl->is_subscripted = false;
1617 }
1618
1619 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1620 * class must behave specially to account for the fact that gl_ClipDistance
1621 * is converted from a float[8] to a vec4[2].
1622 */
1623 if (!compact_arrays &&
1624 strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
1625 xfb_decl->lowered_builtin_array_variable = clip_distance;
1626 }
1627 if (!compact_arrays &&
1628 strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
1629 xfb_decl->lowered_builtin_array_variable = cull_distance;
1630 }
1631 }
1632
1633 /**
1634 * Determine whether two xfb_decl structs refer to the same variable and
1635 * array index (if applicable).
1636 */
1637 static bool
xfb_decl_is_same(const struct xfb_decl * x,const struct xfb_decl * y)1638 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
1639 {
1640 assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
1641
1642 if (strcmp(x->var_name, y->var_name) != 0)
1643 return false;
1644 if (x->is_subscripted != y->is_subscripted)
1645 return false;
1646 if (x->is_subscripted && x->array_subscript != y->array_subscript)
1647 return false;
1648 return true;
1649 }
1650
1651 /**
1652 * The total number of varying components taken up by this variable. Only
1653 * valid if assign_location() has been called.
1654 */
1655 static unsigned
xfb_decl_num_components(struct xfb_decl * xfb_decl)1656 xfb_decl_num_components(struct xfb_decl *xfb_decl)
1657 {
1658 if (xfb_decl->lowered_builtin_array_variable)
1659 return xfb_decl->size;
1660 else
1661 return xfb_decl->vector_elements * xfb_decl->matrix_columns *
1662 xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1663 }
1664
1665 /**
1666 * Assign a location and stream ID for this xfb_decl object based on the
1667 * transform feedback candidate found by find_candidate.
1668 *
1669 * If an error occurs, the error is reported through linker_error() and false
1670 * is returned.
1671 */
1672 static bool
xfb_decl_assign_location(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,bool disable_varying_packing,bool xfb_enabled)1673 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
1674 const struct gl_constants *consts,
1675 struct gl_shader_program *prog,
1676 bool disable_varying_packing, bool xfb_enabled)
1677 {
1678 assert(xfb_decl_is_varying(xfb_decl));
1679
1680 unsigned fine_location
1681 = xfb_decl->matched_candidate->toplevel_var->data.location * 4
1682 + xfb_decl->matched_candidate->toplevel_var->data.location_frac
1683 + xfb_decl->matched_candidate->struct_offset_floats;
1684 const unsigned dmul =
1685 glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
1686
1687 if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
1688 /* Array variable */
1689 const struct glsl_type *element_type =
1690 glsl_get_array_element(xfb_decl->matched_candidate->type);
1691 const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
1692 const unsigned vector_elements = glsl_get_vector_elements(element_type);
1693 unsigned actual_array_size;
1694 switch (xfb_decl->lowered_builtin_array_variable) {
1695 case clip_distance:
1696 actual_array_size = prog->last_vert_prog ?
1697 prog->last_vert_prog->nir->info.clip_distance_array_size : 0;
1698 break;
1699 case cull_distance:
1700 actual_array_size = prog->last_vert_prog ?
1701 prog->last_vert_prog->nir->info.cull_distance_array_size : 0;
1702 break;
1703 case none:
1704 default:
1705 actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
1706 break;
1707 }
1708
1709 if (xfb_decl->is_subscripted) {
1710 /* Check array bounds. */
1711 if (xfb_decl->array_subscript >= actual_array_size) {
1712 linker_error(prog, "Transform feedback varying %s has index "
1713 "%i, but the array size is %u.",
1714 xfb_decl->orig_name, xfb_decl->array_subscript,
1715 actual_array_size);
1716 return false;
1717 }
1718
1719 bool array_will_be_lowered =
1720 lower_packed_varying_needs_lowering(prog->last_vert_prog->nir,
1721 xfb_decl->matched_candidate->toplevel_var,
1722 nir_var_shader_out,
1723 disable_varying_packing,
1724 xfb_enabled) ||
1725 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_ClipDistance") == 0 ||
1726 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_CullDistance") == 0 ||
1727 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelInner") == 0 ||
1728 strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelOuter") == 0;
1729
1730 unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
1731 1 : (array_will_be_lowered ? vector_elements : 4) * matrix_cols * dmul;
1732 fine_location += array_elem_size * xfb_decl->array_subscript;
1733 xfb_decl->size = 1;
1734 } else {
1735 xfb_decl->size = actual_array_size;
1736 }
1737 xfb_decl->vector_elements = vector_elements;
1738 xfb_decl->matrix_columns = matrix_cols;
1739 if (xfb_decl->lowered_builtin_array_variable)
1740 xfb_decl->type = GL_FLOAT;
1741 else
1742 xfb_decl->type = glsl_get_gl_type(element_type);
1743 } else {
1744 /* Regular variable (scalar, vector, or matrix) */
1745 if (xfb_decl->is_subscripted) {
1746 linker_error(prog, "Transform feedback varying %s requested, "
1747 "but %s is not an array.",
1748 xfb_decl->orig_name, xfb_decl->var_name);
1749 return false;
1750 }
1751 xfb_decl->size = 1;
1752 xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
1753 xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
1754 xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
1755 }
1756 xfb_decl->location = fine_location / 4;
1757 xfb_decl->location_frac = fine_location % 4;
1758
1759 /* From GL_EXT_transform_feedback:
1760 * A program will fail to link if:
1761 *
1762 * * the total number of components to capture in any varying
1763 * variable in <varyings> is greater than the constant
1764 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1765 * buffer mode is SEPARATE_ATTRIBS_EXT;
1766 */
1767 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1768 xfb_decl_num_components(xfb_decl) >
1769 consts->MaxTransformFeedbackSeparateComponents) {
1770 linker_error(prog, "Transform feedback varying %s exceeds "
1771 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1772 xfb_decl->orig_name);
1773 return false;
1774 }
1775
1776 /* Only transform feedback varyings can be assigned to non-zero streams,
1777 * so assign the stream id here.
1778 */
1779 xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
1780
1781 unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
1782 unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
1783 xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
1784 xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
1785 array_offset + struct_offset;
1786
1787 return true;
1788 }
1789
1790 static unsigned
xfb_decl_get_num_outputs(struct xfb_decl * xfb_decl)1791 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
1792 {
1793 if (!xfb_decl_is_varying(xfb_decl)) {
1794 return 0;
1795 }
1796
1797 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1798 unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
1799 unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
1800 return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
1801 } else {
1802 return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
1803 }
1804 }
1805
1806 static bool
xfb_decl_is_varying_written(struct xfb_decl * xfb_decl)1807 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
1808 {
1809 if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
1810 return false;
1811
1812 return xfb_decl->matched_candidate->toplevel_var->data.assigned;
1813 }
1814
1815 /**
1816 * Update gl_transform_feedback_info to reflect this xfb_decl.
1817 *
1818 * If an error occurs, the error is reported through linker_error() and false
1819 * is returned.
1820 */
1821 static bool
xfb_decl_store(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx)1822 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1823 struct gl_shader_program *prog,
1824 struct gl_transform_feedback_info *info,
1825 unsigned buffer, unsigned buffer_index,
1826 const unsigned max_outputs,
1827 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1828 bool *explicit_stride, unsigned *max_member_alignment,
1829 bool has_xfb_qualifiers, const void* mem_ctx)
1830 {
1831 unsigned xfb_offset = 0;
1832 unsigned size = xfb_decl->size;
1833 /* Handle gl_SkipComponents. */
1834 if (xfb_decl->skip_components) {
1835 info->Buffers[buffer].Stride += xfb_decl->skip_components;
1836 size = xfb_decl->skip_components;
1837 goto store_varying;
1838 }
1839
1840 if (xfb_decl->next_buffer_separator) {
1841 size = 0;
1842 goto store_varying;
1843 }
1844
1845 if (has_xfb_qualifiers) {
1846 xfb_offset = xfb_decl->offset / 4;
1847 } else {
1848 xfb_offset = info->Buffers[buffer].Stride;
1849 }
1850 info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1851
1852 {
1853 unsigned location = xfb_decl->location;
1854 unsigned location_frac = xfb_decl->location_frac;
1855 unsigned num_components = xfb_decl_num_components(xfb_decl);
1856
1857 /* From GL_EXT_transform_feedback:
1858 *
1859 * " A program will fail to link if:
1860 *
1861 * * the total number of components to capture is greater than the
1862 * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1863 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1864 *
1865 * From GL_ARB_enhanced_layouts:
1866 *
1867 * " The resulting stride (implicit or explicit) must be less than or
1868 * equal to the implementation-dependent constant
1869 * gl_MaxTransformFeedbackInterleavedComponents."
1870 */
1871 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1872 has_xfb_qualifiers) &&
1873 xfb_offset + num_components >
1874 consts->MaxTransformFeedbackInterleavedComponents) {
1875 linker_error(prog,
1876 "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1877 "limit has been exceeded.");
1878 return false;
1879 }
1880
1881 /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1882 * Page 76, (Transform Feedback Layout Qualifiers):
1883 *
1884 * " No aliasing in output buffers is allowed: It is a compile-time or
1885 * link-time error to specify variables with overlapping transform
1886 * feedback offsets."
1887 */
1888 const unsigned max_components =
1889 consts->MaxTransformFeedbackInterleavedComponents;
1890 const unsigned first_component = xfb_offset;
1891 const unsigned last_component = xfb_offset + num_components - 1;
1892 const unsigned start_word = BITSET_BITWORD(first_component);
1893 const unsigned end_word = BITSET_BITWORD(last_component);
1894 BITSET_WORD *used;
1895 assert(last_component < max_components);
1896
1897 if (!used_components[buffer]) {
1898 used_components[buffer] =
1899 rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1900 }
1901 used = used_components[buffer];
1902
1903 for (unsigned word = start_word; word <= end_word; word++) {
1904 unsigned start_range = 0;
1905 unsigned end_range = BITSET_WORDBITS - 1;
1906
1907 if (word == start_word)
1908 start_range = first_component % BITSET_WORDBITS;
1909
1910 if (word == end_word)
1911 end_range = last_component % BITSET_WORDBITS;
1912
1913 if (used[word] & BITSET_RANGE(start_range, end_range)) {
1914 linker_error(prog,
1915 "variable '%s', xfb_offset (%d) is causing aliasing.",
1916 xfb_decl->orig_name, xfb_offset * 4);
1917 return false;
1918 }
1919 used[word] |= BITSET_RANGE(start_range, end_range);
1920 }
1921
1922 const unsigned type_num_components =
1923 xfb_decl->vector_elements *
1924 (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1925 unsigned current_type_components_left = type_num_components;
1926
1927 while (num_components > 0) {
1928 unsigned output_size = 0;
1929
1930 /* From GL_ARB_enhanced_layouts:
1931 *
1932 * "When an attribute variable declared using an array type is bound to
1933 * generic attribute index <i>, the active array elements are assigned to
1934 * consecutive generic attributes beginning with generic attribute <i>. The
1935 * number of attributes and components assigned to each element are
1936 * determined according to the data type of array elements and "component"
1937 * layout qualifier (if any) specified in the declaration of the array."
1938 *
1939 * "When an attribute variable declared using a matrix type is bound to a
1940 * generic attribute index <i>, its values are taken from consecutive generic
1941 * attributes beginning with generic attribute <i>. Such matrices are
1942 * treated as an array of column vectors with values taken from the generic
1943 * attributes.
1944 * This means there may be gaps in the varyings we are taking values from."
1945 *
1946 * Examples:
1947 *
1948 * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1949 * | | |
1950 * | 32b 32b 32b 32b | 32b 32b 32b 32b |
1951 * | 0 X X Y Y | 4 X Y 0 0 |
1952 * | 1 Z Z 0 0 | 5 X Y 0 0 |
1953 * | 2 X X Y Y | 6 X Y 0 0 |
1954 * | 3 Z Z 0 0 | 7 X Y 0 0 |
1955 *
1956 */
1957 if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1958 output_size = MIN3(num_components, current_type_components_left, 4);
1959 current_type_components_left -= output_size;
1960 if (current_type_components_left == 0) {
1961 current_type_components_left = type_num_components;
1962 }
1963 } else {
1964 output_size = MIN2(num_components, 4 - location_frac);
1965 }
1966
1967 assert((info->NumOutputs == 0 && max_outputs == 0) ||
1968 info->NumOutputs < max_outputs);
1969
1970 /* From the ARB_enhanced_layouts spec:
1971 *
1972 * "If such a block member or variable is not written during a shader
1973 * invocation, the buffer contents at the assigned offset will be
1974 * undefined. Even if there are no static writes to a variable or
1975 * member that is assigned a transform feedback offset, the space is
1976 * still allocated in the buffer and still affects the stride."
1977 */
1978 if (xfb_decl_is_varying_written(xfb_decl)) {
1979 info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1980 info->Outputs[info->NumOutputs].OutputRegister = location;
1981 info->Outputs[info->NumOutputs].NumComponents = output_size;
1982 info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
1983 info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1984 info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1985 ++info->NumOutputs;
1986 }
1987 info->Buffers[buffer].Stream = xfb_decl->stream_id;
1988 xfb_offset += output_size;
1989
1990 num_components -= output_size;
1991 location++;
1992 location_frac = 0;
1993 }
1994 }
1995
1996 if (explicit_stride && explicit_stride[buffer]) {
1997 if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
1998 info->Buffers[buffer].Stride % 2) {
1999 linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
2000 "multiple of 8 as its applied to a type that is or "
2001 "contains a double.",
2002 info->Buffers[buffer].Stride * 4);
2003 return false;
2004 }
2005
2006 if (xfb_offset > info->Buffers[buffer].Stride) {
2007 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
2008 "buffer (%d)", xfb_offset * 4,
2009 info->Buffers[buffer].Stride * 4, buffer);
2010 return false;
2011 }
2012 } else {
2013 if (max_member_alignment && has_xfb_qualifiers) {
2014 max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
2015 _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
2016 info->Buffers[buffer].Stride = ALIGN(xfb_offset,
2017 max_member_alignment[buffer]);
2018 } else {
2019 info->Buffers[buffer].Stride = xfb_offset;
2020 }
2021 }
2022
2023 store_varying:
2024 info->Varyings[info->NumVarying].name.string =
2025 ralloc_strdup(prog, xfb_decl->orig_name);
2026 resource_name_updated(&info->Varyings[info->NumVarying].name);
2027 info->Varyings[info->NumVarying].Type = xfb_decl->type;
2028 info->Varyings[info->NumVarying].Size = size;
2029 info->Varyings[info->NumVarying].BufferIndex = buffer_index;
2030 info->NumVarying++;
2031 info->Buffers[buffer].NumVaryings++;
2032
2033 return true;
2034 }
2035
2036 static const struct tfeedback_candidate *
xfb_decl_find_candidate(struct xfb_decl * xfb_decl,struct gl_shader_program * prog,struct hash_table * tfeedback_candidates)2037 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
2038 struct gl_shader_program *prog,
2039 struct hash_table *tfeedback_candidates)
2040 {
2041 const char *name = xfb_decl->var_name;
2042 switch (xfb_decl->lowered_builtin_array_variable) {
2043 case none:
2044 name = xfb_decl->var_name;
2045 break;
2046 case clip_distance:
2047 case cull_distance:
2048 name = "gl_ClipDistanceMESA";
2049 break;
2050 }
2051 struct hash_entry *entry =
2052 _mesa_hash_table_search(tfeedback_candidates, name);
2053
2054 xfb_decl->matched_candidate = entry ?
2055 (struct tfeedback_candidate *) entry->data : NULL;
2056
2057 if (!xfb_decl->matched_candidate) {
2058 /* From GL_EXT_transform_feedback:
2059 * A program will fail to link if:
2060 *
2061 * * any variable name specified in the <varyings> array is not
2062 * declared as an output in the geometry shader (if present) or
2063 * the vertex shader (if no geometry shader is present);
2064 */
2065 linker_error(prog, "Transform feedback varying %s undeclared.",
2066 xfb_decl->orig_name);
2067 }
2068
2069 return xfb_decl->matched_candidate;
2070 }
2071
2072 /**
2073 * Force a candidate over the previously matched one. It happens when a new
2074 * varying needs to be created to match the xfb declaration, for example,
2075 * to fullfil an alignment criteria.
2076 */
2077 static void
xfb_decl_set_lowered_candidate(struct xfb_decl * xfb_decl,struct tfeedback_candidate * candidate)2078 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
2079 struct tfeedback_candidate *candidate)
2080 {
2081 xfb_decl->matched_candidate = candidate;
2082
2083 /* The subscript part is no longer relevant */
2084 xfb_decl->is_subscripted = false;
2085 xfb_decl->array_subscript = 0;
2086 }
2087
2088 /**
2089 * Parse all the transform feedback declarations that were passed to
2090 * glTransformFeedbackVaryings() and store them in xfb_decl objects.
2091 *
2092 * If an error occurs, the error is reported through linker_error() and false
2093 * is returned.
2094 */
2095 static bool
parse_xfb_decls(const struct gl_constants * consts,const struct gl_extensions * exts,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,struct xfb_decl * decls,bool compact_arrays)2096 parse_xfb_decls(const struct gl_constants *consts,
2097 const struct gl_extensions *exts,
2098 struct gl_shader_program *prog,
2099 const void *mem_ctx, unsigned num_names,
2100 char **varying_names, struct xfb_decl *decls, bool compact_arrays)
2101 {
2102 for (unsigned i = 0; i < num_names; ++i) {
2103 xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i], compact_arrays);
2104
2105 if (!xfb_decl_is_varying(&decls[i]))
2106 continue;
2107
2108 /* From GL_EXT_transform_feedback:
2109 * A program will fail to link if:
2110 *
2111 * * any two entries in the <varyings> array specify the same varying
2112 * variable;
2113 *
2114 * We interpret this to mean "any two entries in the <varyings> array
2115 * specify the same varying variable and array index", since transform
2116 * feedback of arrays would be useless otherwise.
2117 */
2118 for (unsigned j = 0; j < i; ++j) {
2119 if (xfb_decl_is_varying(&decls[j])) {
2120 if (xfb_decl_is_same(&decls[i], &decls[j])) {
2121 linker_error(prog, "Transform feedback varying %s specified "
2122 "more than once.", varying_names[i]);
2123 return false;
2124 }
2125 }
2126 }
2127 }
2128 return true;
2129 }
2130
2131 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)2132 cmp_xfb_offset(const void * x_generic, const void * y_generic)
2133 {
2134 struct xfb_decl *x = (struct xfb_decl *) x_generic;
2135 struct xfb_decl *y = (struct xfb_decl *) y_generic;
2136
2137 if (x->buffer != y->buffer)
2138 return x->buffer - y->buffer;
2139 return x->offset - y->offset;
2140 }
2141
2142 /**
2143 * Store transform feedback location assignments into
2144 * prog->sh.LinkedTransformFeedback based on the data stored in
2145 * xfb_decls.
2146 *
2147 * If an error occurs, the error is reported through linker_error() and false
2148 * is returned.
2149 */
2150 static bool
store_tfeedback_info(const struct gl_constants * consts,struct gl_shader_program * prog,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,bool has_xfb_qualifiers,const void * mem_ctx)2151 store_tfeedback_info(const struct gl_constants *consts,
2152 struct gl_shader_program *prog, unsigned num_xfb_decls,
2153 struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
2154 const void *mem_ctx)
2155 {
2156 if (!prog->last_vert_prog)
2157 return true;
2158
2159 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
2160 * tracking the number of buffers doesn't overflow.
2161 */
2162 assert(consts->MaxTransformFeedbackBuffers < 32);
2163
2164 bool separate_attribs_mode =
2165 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
2166
2167 struct gl_program *xfb_prog = prog->last_vert_prog;
2168 xfb_prog->sh.LinkedTransformFeedback =
2169 rzalloc(xfb_prog, struct gl_transform_feedback_info);
2170
2171 /* The xfb_offset qualifier does not have to be used in increasing order
2172 * however some drivers expect to receive the list of transform feedback
2173 * declarations in order so sort it now for convenience.
2174 */
2175 if (has_xfb_qualifiers) {
2176 qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
2177 cmp_xfb_offset);
2178 }
2179
2180 xfb_prog->sh.LinkedTransformFeedback->Varyings =
2181 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
2182 num_xfb_decls);
2183
2184 unsigned num_outputs = 0;
2185 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2186 if (xfb_decl_is_varying_written(&xfb_decls[i]))
2187 num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
2188 }
2189
2190 xfb_prog->sh.LinkedTransformFeedback->Outputs =
2191 rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
2192 num_outputs);
2193
2194 unsigned num_buffers = 0;
2195 unsigned buffers = 0;
2196 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
2197
2198 if (!has_xfb_qualifiers && separate_attribs_mode) {
2199 /* GL_SEPARATE_ATTRIBS */
2200 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2201 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2202 xfb_prog->sh.LinkedTransformFeedback,
2203 num_buffers, num_buffers, num_outputs,
2204 used_components, NULL, NULL, has_xfb_qualifiers,
2205 mem_ctx))
2206 return false;
2207
2208 buffers |= 1 << num_buffers;
2209 num_buffers++;
2210 }
2211 }
2212 else {
2213 /* GL_INVERLEAVED_ATTRIBS */
2214 int buffer_stream_id = -1;
2215 unsigned buffer =
2216 num_xfb_decls ? xfb_decls[0].buffer : 0;
2217 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
2218 unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
2219 /* Apply any xfb_stride global qualifiers */
2220 if (has_xfb_qualifiers) {
2221 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
2222 if (prog->TransformFeedback.BufferStride[j]) {
2223 explicit_stride[j] = true;
2224 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
2225 prog->TransformFeedback.BufferStride[j] / 4;
2226 }
2227 }
2228 }
2229
2230 for (unsigned i = 0; i < num_xfb_decls; ++i) {
2231 if (has_xfb_qualifiers &&
2232 buffer != xfb_decls[i].buffer) {
2233 /* we have moved to the next buffer so reset stream id */
2234 buffer_stream_id = -1;
2235 num_buffers++;
2236 }
2237
2238 if (xfb_decls[i].next_buffer_separator) {
2239 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2240 xfb_prog->sh.LinkedTransformFeedback,
2241 buffer, num_buffers, num_outputs,
2242 used_components, explicit_stride,
2243 max_member_alignment, has_xfb_qualifiers,
2244 mem_ctx))
2245 return false;
2246 num_buffers++;
2247 buffer_stream_id = -1;
2248 continue;
2249 }
2250
2251 if (has_xfb_qualifiers) {
2252 buffer = xfb_decls[i].buffer;
2253 } else {
2254 buffer = num_buffers;
2255 }
2256
2257 if (xfb_decl_is_varying(&xfb_decls[i])) {
2258 if (buffer_stream_id == -1) {
2259 /* First varying writing to this buffer: remember its stream */
2260 buffer_stream_id = (int) xfb_decls[i].stream_id;
2261
2262 /* Only mark a buffer as active when there is a varying
2263 * attached to it. This behaviour is based on a revised version
2264 * of section 13.2.2 of the GL 4.6 spec.
2265 */
2266 buffers |= 1 << buffer;
2267 } else if (buffer_stream_id !=
2268 (int) xfb_decls[i].stream_id) {
2269 /* Varying writes to the same buffer from a different stream */
2270 linker_error(prog,
2271 "Transform feedback can't capture varyings belonging "
2272 "to different vertex streams in a single buffer. "
2273 "Varying %s writes to buffer from stream %u, other "
2274 "varyings in the same buffer write from stream %u.",
2275 xfb_decls[i].orig_name,
2276 xfb_decls[i].stream_id,
2277 buffer_stream_id);
2278 return false;
2279 }
2280 }
2281
2282 if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2283 xfb_prog->sh.LinkedTransformFeedback,
2284 buffer, num_buffers, num_outputs, used_components,
2285 explicit_stride, max_member_alignment,
2286 has_xfb_qualifiers, mem_ctx))
2287 return false;
2288 }
2289 }
2290 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
2291
2292 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
2293 return true;
2294 }
2295
2296 /**
2297 * Enum representing the order in which varyings are packed within a
2298 * packing class.
2299 *
2300 * Currently we pack vec4's first, then vec2's, then scalar values, then
2301 * vec3's. This order ensures that the only vectors that are at risk of
2302 * having to be "double parked" (split between two adjacent varying slots)
2303 * are the vec3's.
2304 */
2305 enum packing_order_enum {
2306 PACKING_ORDER_VEC4,
2307 PACKING_ORDER_VEC2,
2308 PACKING_ORDER_SCALAR,
2309 PACKING_ORDER_VEC3,
2310 };
2311
2312 /**
2313 * Structure recording the relationship between a single producer output
2314 * and a single consumer input.
2315 */
2316 struct match {
2317 /**
2318 * Packing class for this varying, computed by compute_packing_class().
2319 */
2320 unsigned packing_class;
2321
2322 /**
2323 * Packing order for this varying, computed by compute_packing_order().
2324 */
2325 enum packing_order_enum packing_order;
2326
2327 /**
2328 * The output variable in the producer stage.
2329 */
2330 nir_variable *producer_var;
2331
2332 /**
2333 * The input variable in the consumer stage.
2334 */
2335 nir_variable *consumer_var;
2336
2337 /**
2338 * The location which has been assigned for this varying. This is
2339 * expressed in multiples of a float, with the first generic varying
2340 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
2341 * value 0.
2342 */
2343 unsigned generic_location;
2344
2345 /**
2346 * Original index, used as a fallback sorting key to ensure
2347 * a stable sort
2348 */
2349 unsigned original_index;
2350 };
2351
2352 /**
2353 * Data structure recording the relationship between outputs of one shader
2354 * stage (the "producer") and inputs of another (the "consumer").
2355 */
2356 struct varying_matches
2357 {
2358 /**
2359 * If true, this driver disables varying packing, so all varyings need to
2360 * be aligned on slot boundaries, and take up a number of slots equal to
2361 * their number of matrix columns times their array size.
2362 *
2363 * Packing may also be disabled because our current packing method is not
2364 * safe in SSO or versions of OpenGL where interpolation qualifiers are not
2365 * guaranteed to match across stages.
2366 */
2367 bool disable_varying_packing;
2368
2369 /**
2370 * If true, this driver disables packing for varyings used by transform
2371 * feedback.
2372 */
2373 bool disable_xfb_packing;
2374
2375 /**
2376 * If true, this driver has transform feedback enabled. The transform
2377 * feedback code usually requires at least some packing be done even
2378 * when varying packing is disabled, fortunately where transform feedback
2379 * requires packing it's safe to override the disabled setting. See
2380 * is_varying_packing_safe().
2381 */
2382 bool xfb_enabled;
2383
2384 bool enhanced_layouts_enabled;
2385
2386 /**
2387 * If true, this driver prefers varyings to be aligned to power of two
2388 * in a slot.
2389 */
2390 bool prefer_pot_aligned_varyings;
2391
2392 struct match *matches;
2393
2394 /**
2395 * The number of elements in the \c matches array that are currently in
2396 * use.
2397 */
2398 unsigned num_matches;
2399
2400 /**
2401 * The number of elements that were set aside for the \c matches array when
2402 * it was allocated.
2403 */
2404 unsigned matches_capacity;
2405
2406 gl_shader_stage producer_stage;
2407 gl_shader_stage consumer_stage;
2408 };
2409
2410 /**
2411 * Comparison function passed to qsort() to sort varyings by packing_class and
2412 * then by packing_order.
2413 */
2414 static int
varying_matches_match_comparator(const void * x_generic,const void * y_generic)2415 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
2416 {
2417 const struct match *x = (const struct match *) x_generic;
2418 const struct match *y = (const struct match *) y_generic;
2419
2420 if (x->packing_class != y->packing_class)
2421 return x->packing_class - y->packing_class;
2422 if (x->packing_order != y->packing_order)
2423 return x->packing_order - y->packing_order;
2424 return x->original_index - y->original_index;
2425 }
2426
2427 /**
2428 * Comparison function passed to qsort() to sort varyings used only by
2429 * transform feedback when packing of other varyings is disabled.
2430 */
2431 static int
varying_matches_xfb_comparator(const void * x_generic,const void * y_generic)2432 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
2433 {
2434 const struct match *x = (const struct match *) x_generic;
2435 const struct match *y = (const struct match *) y_generic;
2436 /* if both varying are used by transform feedback, sort them */
2437 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) {
2438 if (y->producer_var != NULL && y->producer_var->data.is_xfb_only)
2439 return 0;
2440 /* if x is varying and y is not, put y first */
2441 return +1;
2442 } else if (y->producer_var != NULL && y->producer_var->data.is_xfb_only) {
2443 /* if y is varying and x is not, leave x first */
2444 return -1;
2445 }
2446
2447 /* otherwise leave the order alone */
2448 return x->original_index - y->original_index;
2449 }
2450
2451 /**
2452 * Comparison function passed to qsort() to sort varyings NOT used by
2453 * transform feedback when packing of xfb varyings is disabled.
2454 */
2455 static int
varying_matches_not_xfb_comparator(const void * x_generic,const void * y_generic)2456 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
2457 {
2458 const struct match *x = (const struct match *) x_generic;
2459 const struct match *y = (const struct match *) y_generic;
2460
2461 if ( (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2462 && (y->producer_var != NULL && !y->producer_var->data.is_xfb) )
2463 /* if both are non-xfb, then sort them */
2464 return varying_matches_match_comparator(x_generic, y_generic);
2465
2466 /* otherwise, leave the order alone */
2467 return x->original_index - y->original_index;
2468 }
2469
2470 static bool
is_unpackable_tess(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)2471 is_unpackable_tess(gl_shader_stage producer_stage,
2472 gl_shader_stage consumer_stage)
2473 {
2474 if (consumer_stage == MESA_SHADER_TESS_EVAL ||
2475 consumer_stage == MESA_SHADER_TESS_CTRL ||
2476 producer_stage == MESA_SHADER_TESS_CTRL)
2477 return true;
2478
2479 return false;
2480 }
2481
2482 static void
init_varying_matches(void * mem_ctx,struct varying_matches * vm,const struct gl_constants * consts,const struct gl_extensions * exts,gl_shader_stage producer_stage,gl_shader_stage consumer_stage,bool sso)2483 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
2484 const struct gl_constants *consts,
2485 const struct gl_extensions *exts,
2486 gl_shader_stage producer_stage,
2487 gl_shader_stage consumer_stage,
2488 bool sso)
2489 {
2490 /* Tessellation shaders treat inputs and outputs as shared memory and can
2491 * access inputs and outputs of other invocations.
2492 * Therefore, they can't be lowered to temps easily (and definitely not
2493 * efficiently).
2494 */
2495 bool unpackable_tess =
2496 is_unpackable_tess(producer_stage, consumer_stage);
2497
2498 /* Transform feedback code assumes varying arrays are packed, so if the
2499 * driver has disabled varying packing, make sure to at least enable
2500 * packing required by transform feedback. See below for exception.
2501 */
2502 bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
2503
2504 /* Some drivers actually requires packing to be explicitly disabled
2505 * for varyings used by transform feedback.
2506 */
2507 bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
2508
2509 /* Disable packing on outward facing interfaces for SSO because in ES we
2510 * need to retain the unpacked varying information for draw time
2511 * validation.
2512 *
2513 * Packing is still enabled on individual arrays, structs, and matrices as
2514 * these are required by the transform feedback code and it is still safe
2515 * to do so. We also enable packing when a varying is only used for
2516 * transform feedback and its not a SSO.
2517 */
2518 bool disable_varying_packing =
2519 consts->DisableVaryingPacking || unpackable_tess;
2520 if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
2521 disable_varying_packing = true;
2522
2523 /* Note: this initial capacity is rather arbitrarily chosen to be large
2524 * enough for many cases without wasting an unreasonable amount of space.
2525 * varying_matches_record() will resize the array if there are more than
2526 * this number of varyings.
2527 */
2528 vm->matches_capacity = 8;
2529 vm->matches = (struct match *)
2530 ralloc_array(mem_ctx, struct match, vm->matches_capacity);
2531 vm->num_matches = 0;
2532
2533 vm->disable_varying_packing = disable_varying_packing;
2534 vm->disable_xfb_packing = disable_xfb_packing;
2535 vm->xfb_enabled = xfb_enabled;
2536 vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
2537 vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
2538 vm->producer_stage = producer_stage;
2539 vm->consumer_stage = consumer_stage;
2540 }
2541
2542 /**
2543 * Packing is always safe on individual arrays, structures, and matrices. It
2544 * is also safe if the varying is only used for transform feedback.
2545 */
2546 static bool
is_varying_packing_safe(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2547 is_varying_packing_safe(struct varying_matches *vm,
2548 const struct glsl_type *type, const nir_variable *var)
2549 {
2550 if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
2551 return false;
2552
2553 return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
2554 glsl_type_is_struct(type) ||
2555 var->data.is_xfb_only);
2556 }
2557
2558 static bool
is_packing_disabled(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2559 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
2560 const nir_variable *var)
2561 {
2562 return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
2563 (vm->disable_xfb_packing && var->data.is_xfb &&
2564 !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
2565 glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
2566 }
2567
2568 /**
2569 * Compute the "packing class" of the given varying. This is an unsigned
2570 * integer with the property that two variables in the same packing class can
2571 * be safely backed into the same vec4.
2572 */
2573 static unsigned
varying_matches_compute_packing_class(const nir_variable * var)2574 varying_matches_compute_packing_class(const nir_variable *var)
2575 {
2576 /* Without help from the back-end, there is no way to pack together
2577 * variables with different interpolation types, because
2578 * lower_packed_varyings must choose exactly one interpolation type for
2579 * each packed varying it creates.
2580 *
2581 * However, we can safely pack together floats, ints, and uints, because:
2582 *
2583 * - varyings of base type "int" and "uint" must use the "flat"
2584 * interpolation type, which can only occur in GLSL 1.30 and above.
2585 *
2586 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2587 * can store flat floats as ints without losing any information (using
2588 * the ir_unop_bitcast_* opcodes).
2589 *
2590 * Therefore, the packing class depends only on the interpolation type.
2591 */
2592 bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
2593 glsl_contains_integer(var->type) || glsl_contains_double(var->type);
2594
2595 const unsigned interp = is_interpolation_flat
2596 ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
2597
2598 assert(interp < (1 << 3));
2599
2600 const unsigned packing_class = (interp << 0) |
2601 (var->data.centroid << 3) |
2602 (var->data.sample << 4) |
2603 (var->data.patch << 5) |
2604 (var->data.must_be_shader_input << 6);
2605
2606 return packing_class;
2607 }
2608
2609 /**
2610 * Compute the "packing order" of the given varying. This is a sort key we
2611 * use to determine when to attempt to pack the given varying relative to
2612 * other varyings in the same packing class.
2613 */
2614 static enum packing_order_enum
varying_matches_compute_packing_order(const nir_variable * var)2615 varying_matches_compute_packing_order(const nir_variable *var)
2616 {
2617 const struct glsl_type *element_type = glsl_without_array(var->type);
2618
2619 switch (glsl_get_component_slots(element_type) % 4) {
2620 case 1: return PACKING_ORDER_SCALAR;
2621 case 2: return PACKING_ORDER_VEC2;
2622 case 3: return PACKING_ORDER_VEC3;
2623 case 0: return PACKING_ORDER_VEC4;
2624 default:
2625 assert(!"Unexpected value of vector_elements");
2626 return PACKING_ORDER_VEC4;
2627 }
2628 }
2629
2630 /**
2631 * Record the given producer/consumer variable pair in the list of variables
2632 * that should later be assigned locations.
2633 *
2634 * It is permissible for \c consumer_var to be NULL (this happens if a
2635 * variable is output by the producer and consumed by transform feedback, but
2636 * not consumed by the consumer).
2637 *
2638 * If \c producer_var has already been paired up with a consumer_var, or
2639 * producer_var is part of fixed pipeline functionality (and hence already has
2640 * a location assigned), this function has no effect.
2641 *
2642 * Note: as a side effect this function may change the interpolation type of
2643 * \c producer_var, but only when the change couldn't possibly affect
2644 * rendering.
2645 */
2646 static void
varying_matches_record(void * mem_ctx,struct varying_matches * vm,nir_variable * producer_var,nir_variable * consumer_var)2647 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
2648 nir_variable *producer_var, nir_variable *consumer_var)
2649 {
2650 assert(producer_var != NULL || consumer_var != NULL);
2651
2652 if ((producer_var &&
2653 (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
2654 (consumer_var &&
2655 (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
2656 /* Either a location already exists for this variable (since it is part
2657 * of fixed functionality), or it has already been assigned explicitly.
2658 */
2659 return;
2660 }
2661
2662 /* The varyings should not have been matched and assgned previously */
2663 assert((producer_var == NULL || producer_var->data.location == -1) &&
2664 (consumer_var == NULL || consumer_var->data.location == -1));
2665
2666 bool needs_flat_qualifier = consumer_var == NULL &&
2667 (glsl_contains_integer(producer_var->type) ||
2668 glsl_contains_double(producer_var->type));
2669
2670 if (!vm->disable_varying_packing &&
2671 (!vm->disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) &&
2672 (needs_flat_qualifier ||
2673 (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
2674 /* Since this varying is not being consumed by the fragment shader, its
2675 * interpolation type varying cannot possibly affect rendering.
2676 * Also, this variable is non-flat and is (or contains) an integer
2677 * or a double.
2678 * If the consumer stage is unknown, don't modify the interpolation
2679 * type as it could affect rendering later with separate shaders.
2680 *
2681 * lower_packed_varyings requires all integer varyings to flat,
2682 * regardless of where they appear. We can trivially satisfy that
2683 * requirement by changing the interpolation type to flat here.
2684 */
2685 if (producer_var) {
2686 producer_var->data.centroid = false;
2687 producer_var->data.sample = false;
2688 producer_var->data.interpolation = INTERP_MODE_FLAT;
2689 }
2690
2691 if (consumer_var) {
2692 consumer_var->data.centroid = false;
2693 consumer_var->data.sample = false;
2694 consumer_var->data.interpolation = INTERP_MODE_FLAT;
2695 }
2696 }
2697
2698 if (vm->num_matches == vm->matches_capacity) {
2699 vm->matches_capacity *= 2;
2700 vm->matches = (struct match *)
2701 reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
2702 }
2703
2704 /* We must use the consumer to compute the packing class because in GL4.4+
2705 * there is no guarantee interpolation qualifiers will match across stages.
2706 *
2707 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
2708 *
2709 * "The type and presence of interpolation qualifiers of variables with
2710 * the same name declared in all linked shaders for the same cross-stage
2711 * interface must match, otherwise the link command will fail.
2712 *
2713 * When comparing an output from one stage to an input of a subsequent
2714 * stage, the input and output don't match if their interpolation
2715 * qualifiers (or lack thereof) are not the same."
2716 *
2717 * This text was also in at least revison 7 of the 4.40 spec but is no
2718 * longer in revision 9 and not in the 4.50 spec.
2719 */
2720 const nir_variable *const var = (consumer_var != NULL)
2721 ? consumer_var : producer_var;
2722
2723 if (producer_var && consumer_var &&
2724 consumer_var->data.must_be_shader_input) {
2725 producer_var->data.must_be_shader_input = 1;
2726 }
2727
2728 vm->matches[vm->num_matches].packing_class
2729 = varying_matches_compute_packing_class(var);
2730 vm->matches[vm->num_matches].packing_order
2731 = varying_matches_compute_packing_order(var);
2732
2733 vm->matches[vm->num_matches].producer_var = producer_var;
2734 vm->matches[vm->num_matches].consumer_var = consumer_var;
2735 vm->num_matches++;
2736 }
2737
2738 /**
2739 * Choose locations for all of the variable matches that were previously
2740 * passed to varying_matches_record().
2741 * \param components returns array[slot] of number of components used
2742 * per slot (1, 2, 3 or 4)
2743 * \param reserved_slots bitmask indicating which varying slots are already
2744 * allocated
2745 * \return number of slots (4-element vectors) allocated
2746 */
2747 static unsigned
varying_matches_assign_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)2748 varying_matches_assign_locations(struct varying_matches *vm,
2749 struct gl_shader_program *prog,
2750 uint8_t components[], uint64_t reserved_slots)
2751 {
2752 /* Establish the original order of the varying_matches array; our
2753 * sorts will use this for sorting when the varyings do not have
2754 * xfb qualifiers
2755 */
2756 for (unsigned i = 0; i < vm->num_matches; i++)
2757 vm->matches[i].original_index = i;
2758
2759 /* If packing has been disabled then we cannot safely sort the varyings by
2760 * class as it may mean we are using a version of OpenGL where
2761 * interpolation qualifiers are not guaranteed to be matching across
2762 * shaders, sorting in this case could result in mismatching shader
2763 * interfaces. So we sort only the varyings used by transform feedback.
2764 *
2765 * If packing is only disabled for xfb varyings (mutually exclusive with
2766 * disable_varying_packing), we then group varyings depending on if they
2767 * are captured for transform feedback.
2768 */
2769 if (vm->disable_varying_packing) {
2770 /* Only sort varyings that are only used by transform feedback. */
2771 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2772 &varying_matches_xfb_comparator);
2773 } else if (vm->disable_xfb_packing) {
2774 /* Only sort varyings that are NOT used by transform feedback. */
2775 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2776 &varying_matches_not_xfb_comparator);
2777 } else {
2778 /* Sort varying matches into an order that makes them easy to pack. */
2779 qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2780 &varying_matches_match_comparator);
2781 }
2782
2783 unsigned generic_location = 0;
2784 unsigned generic_patch_location = MAX_VARYING*4;
2785 bool previous_var_xfb = false;
2786 bool previous_var_xfb_only = false;
2787 unsigned previous_packing_class = ~0u;
2788
2789 /* For tranform feedback separate mode, we know the number of attributes
2790 * is <= the number of buffers. So packing isn't critical. In fact,
2791 * packing vec3 attributes can cause trouble because splitting a vec3
2792 * effectively creates an additional transform feedback output. The
2793 * extra TFB output may exceed device driver limits.
2794 *
2795 * Also don't pack vec3 if the driver prefers power of two aligned
2796 * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
2797 * pot-aligned, we only need to take care of vec3s
2798 */
2799 const bool dont_pack_vec3 =
2800 (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2801 prog->TransformFeedback.NumVarying > 0) ||
2802 vm->prefer_pot_aligned_varyings;
2803
2804 for (unsigned i = 0; i < vm->num_matches; i++) {
2805 unsigned *location = &generic_location;
2806 const nir_variable *var;
2807 const struct glsl_type *type;
2808 bool is_vertex_input = false;
2809
2810 if (vm->matches[i].consumer_var) {
2811 var = vm->matches[i].consumer_var;
2812 type = get_varying_type(var, vm->consumer_stage);
2813 if (vm->consumer_stage == MESA_SHADER_VERTEX)
2814 is_vertex_input = true;
2815 } else {
2816 if (!vm->matches[i].producer_var)
2817 continue; /* The varying was optimised away */
2818
2819 var = vm->matches[i].producer_var;
2820 type = get_varying_type(var, vm->producer_stage);
2821 }
2822
2823 if (var->data.patch)
2824 location = &generic_patch_location;
2825
2826 /* Advance to the next slot if this varying has a different packing
2827 * class than the previous one, and we're not already on a slot
2828 * boundary.
2829 *
2830 * Also advance if varying packing is disabled for transform feedback,
2831 * and previous or current varying is used for transform feedback.
2832 *
2833 * Also advance to the next slot if packing is disabled. This makes sure
2834 * we don't assign varyings the same locations which is possible
2835 * because we still pack individual arrays, records and matrices even
2836 * when packing is disabled. Note we don't advance to the next slot if
2837 * we can pack varyings together that are only used for transform
2838 * feedback.
2839 */
2840 if (var->data.must_be_shader_input ||
2841 (vm->disable_xfb_packing &&
2842 (previous_var_xfb || var->data.is_xfb)) ||
2843 (vm->disable_varying_packing &&
2844 !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2845 (previous_packing_class != vm->matches[i].packing_class) ||
2846 (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2847 dont_pack_vec3)) {
2848 *location = ALIGN(*location, 4);
2849 }
2850
2851 previous_var_xfb = var->data.is_xfb;
2852 previous_var_xfb_only = var->data.is_xfb_only;
2853 previous_packing_class = vm->matches[i].packing_class;
2854
2855 /* The number of components taken up by this variable. For vertex shader
2856 * inputs, we use the number of slots * 4, as they have different
2857 * counting rules.
2858 */
2859 unsigned num_components = 0;
2860 if (is_vertex_input) {
2861 num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
2862 } else {
2863 if (is_packing_disabled(vm, type, var)) {
2864 num_components = glsl_count_attribute_slots(type, false) * 4;
2865 } else {
2866 num_components = glsl_get_component_slots_aligned(type, *location);
2867 }
2868 }
2869
2870 /* The last slot for this variable, inclusive. */
2871 unsigned slot_end = *location + num_components - 1;
2872
2873 /* FIXME: We could be smarter in the below code and loop back over
2874 * trying to fill any locations that we skipped because we couldn't pack
2875 * the varying between an explicit location. For now just let the user
2876 * hit the linking error if we run out of room and suggest they use
2877 * explicit locations.
2878 */
2879 while (slot_end < MAX_VARYING * 4u) {
2880 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2881 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2882
2883 assert(slots > 0);
2884
2885 if ((reserved_slots & slot_mask) == 0) {
2886 break;
2887 }
2888
2889 *location = ALIGN(*location + 1, 4);
2890 slot_end = *location + num_components - 1;
2891 }
2892
2893 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2894 linker_error(prog, "insufficient contiguous locations available for "
2895 "%s it is possible an array or struct could not be "
2896 "packed between varyings with explicit locations. Try "
2897 "using an explicit location for arrays and structs.",
2898 var->name);
2899 }
2900
2901 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2902 for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2903 components[j] = 4;
2904 components[slot_end / 4u] = (slot_end & 3) + 1;
2905 }
2906
2907 vm->matches[i].generic_location = *location;
2908
2909 *location = slot_end + 1;
2910 }
2911
2912 return (generic_location + 3) / 4;
2913 }
2914
2915 static void
varying_matches_assign_temp_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint64_t reserved_slots)2916 varying_matches_assign_temp_locations(struct varying_matches *vm,
2917 struct gl_shader_program *prog,
2918 uint64_t reserved_slots)
2919 {
2920 unsigned tmp_loc = 0;
2921 for (unsigned i = 0; i < vm->num_matches; i++) {
2922 nir_variable *producer_var = vm->matches[i].producer_var;
2923 nir_variable *consumer_var = vm->matches[i].consumer_var;
2924
2925 while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
2926 if (reserved_slots & (UINT64_C(1) << tmp_loc))
2927 tmp_loc++;
2928 else
2929 break;
2930 }
2931
2932 if (producer_var) {
2933 assert(producer_var->data.location == -1);
2934 producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2935 }
2936
2937 if (consumer_var) {
2938 assert(consumer_var->data.location == -1);
2939 consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2940 }
2941
2942 tmp_loc++;
2943 }
2944 }
2945
2946 /**
2947 * Update the producer and consumer shaders to reflect the locations
2948 * assignments that were made by varying_matches_assign_locations().
2949 */
2950 static void
varying_matches_store_locations(struct varying_matches * vm)2951 varying_matches_store_locations(struct varying_matches *vm)
2952 {
2953 /* Check is location needs to be packed with lower_packed_varyings() or if
2954 * we can just use ARB_enhanced_layouts packing.
2955 */
2956 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
2957 const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2958
2959 for (unsigned i = 0; i < vm->num_matches; i++) {
2960 nir_variable *producer_var = vm->matches[i].producer_var;
2961 nir_variable *consumer_var = vm->matches[i].consumer_var;
2962 unsigned generic_location = vm->matches[i].generic_location;
2963 unsigned slot = generic_location / 4;
2964 unsigned offset = generic_location % 4;
2965
2966 if (producer_var) {
2967 producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2968 producer_var->data.location_frac = offset;
2969 }
2970
2971 if (consumer_var) {
2972 consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2973 consumer_var->data.location_frac = offset;
2974 }
2975
2976 /* Find locations suitable for native packing via
2977 * ARB_enhanced_layouts.
2978 */
2979 if (vm->enhanced_layouts_enabled) {
2980 nir_variable *var = producer_var ? producer_var : consumer_var;
2981 unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
2982 const struct glsl_type *type =
2983 get_varying_type(var, stage);
2984 unsigned comp_slots = glsl_get_component_slots(type) + offset;
2985 unsigned slots = comp_slots / 4;
2986 if (comp_slots % 4)
2987 slots += 1;
2988
2989 if (producer_var && consumer_var) {
2990 if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
2991 glsl_type_is_64bit(type)) {
2992 for (unsigned j = 0; j < slots; j++) {
2993 pack_loc[slot + j] = true;
2994 }
2995 } else if (offset + glsl_get_vector_elements(type) > 4) {
2996 pack_loc[slot] = true;
2997 pack_loc[slot + 1] = true;
2998 } else {
2999 loc_type[slot][offset] = type;
3000 }
3001 } else {
3002 for (unsigned j = 0; j < slots; j++) {
3003 pack_loc[slot + j] = true;
3004 }
3005 }
3006 }
3007 }
3008
3009 /* Attempt to use ARB_enhanced_layouts for more efficient packing if
3010 * suitable.
3011 */
3012 if (vm->enhanced_layouts_enabled) {
3013 for (unsigned i = 0; i < vm->num_matches; i++) {
3014 nir_variable *producer_var = vm->matches[i].producer_var;
3015 nir_variable *consumer_var = vm->matches[i].consumer_var;
3016 if (!producer_var || !consumer_var)
3017 continue;
3018
3019 unsigned generic_location = vm->matches[i].generic_location;
3020 unsigned slot = generic_location / 4;
3021 if (pack_loc[slot])
3022 continue;
3023
3024 const struct glsl_type *type =
3025 get_varying_type(producer_var, vm->producer_stage);
3026 bool type_match = true;
3027 for (unsigned j = 0; j < 4; j++) {
3028 if (loc_type[slot][j]) {
3029 if (glsl_get_base_type(type) !=
3030 glsl_get_base_type(loc_type[slot][j]))
3031 type_match = false;
3032 }
3033 }
3034
3035 if (type_match) {
3036 producer_var->data.explicit_location = 1;
3037 consumer_var->data.explicit_location = 1;
3038 }
3039 }
3040 }
3041 }
3042
3043 /**
3044 * Is the given variable a varying variable to be counted against the
3045 * limit in ctx->Const.MaxVarying?
3046 * This includes variables such as texcoords, colors and generic
3047 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
3048 */
3049 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const nir_variable * var)3050 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
3051 {
3052 /* Only fragment shaders will take a varying variable as an input */
3053 if (stage == MESA_SHADER_FRAGMENT &&
3054 var->data.mode == nir_var_shader_in) {
3055 switch (var->data.location) {
3056 case VARYING_SLOT_POS:
3057 case VARYING_SLOT_FACE:
3058 case VARYING_SLOT_PNTC:
3059 return false;
3060 default:
3061 return true;
3062 }
3063 }
3064 return false;
3065 }
3066
3067 struct tfeedback_candidate_generator_state {
3068 /**
3069 * Memory context used to allocate hash table keys and values.
3070 */
3071 void *mem_ctx;
3072
3073 /**
3074 * Hash table in which tfeedback_candidate objects should be stored.
3075 */
3076 struct hash_table *tfeedback_candidates;
3077
3078 gl_shader_stage stage;
3079
3080 /**
3081 * Pointer to the toplevel variable that is being traversed.
3082 */
3083 nir_variable *toplevel_var;
3084
3085 /**
3086 * Total number of varying floats that have been visited so far. This is
3087 * used to determine the offset to each varying within the toplevel
3088 * variable.
3089 */
3090 unsigned varying_floats;
3091
3092 /**
3093 * Offset within the xfb. Counted in floats.
3094 */
3095 unsigned xfb_offset_floats;
3096 };
3097
3098 /**
3099 * Generates tfeedback_candidate structs describing all possible targets of
3100 * transform feedback.
3101 *
3102 * tfeedback_candidate structs are stored in the hash table
3103 * tfeedback_candidates. This hash table maps varying names to instances of the
3104 * tfeedback_candidate struct.
3105 */
3106 static void
tfeedback_candidate_generator(struct tfeedback_candidate_generator_state * state,char ** name,size_t name_length,const struct glsl_type * type,const struct glsl_struct_field * named_ifc_member)3107 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
3108 char **name, size_t name_length,
3109 const struct glsl_type *type,
3110 const struct glsl_struct_field *named_ifc_member)
3111 {
3112 switch (glsl_get_base_type(type)) {
3113 case GLSL_TYPE_INTERFACE:
3114 if (named_ifc_member) {
3115 ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
3116 named_ifc_member->name);
3117 tfeedback_candidate_generator(state, name, name_length,
3118 named_ifc_member->type, NULL);
3119 return;
3120 }
3121 FALLTHROUGH;
3122 case GLSL_TYPE_STRUCT:
3123 for (unsigned i = 0; i < glsl_get_length(type); i++) {
3124 size_t new_length = name_length;
3125
3126 /* Append '.field' to the current variable name. */
3127 if (name) {
3128 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
3129 glsl_get_struct_elem_name(type, i));
3130 }
3131
3132 tfeedback_candidate_generator(state, name, new_length,
3133 glsl_get_struct_field(type, i), NULL);
3134 }
3135
3136 return;
3137 case GLSL_TYPE_ARRAY:
3138 if (glsl_type_is_struct(glsl_without_array(type)) ||
3139 glsl_type_is_interface(glsl_without_array(type)) ||
3140 glsl_type_is_array(glsl_get_array_element(type))) {
3141
3142 for (unsigned i = 0; i < glsl_get_length(type); i++) {
3143 size_t new_length = name_length;
3144
3145 /* Append the subscript to the current variable name */
3146 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
3147
3148 tfeedback_candidate_generator(state, name, new_length,
3149 glsl_get_array_element(type),
3150 named_ifc_member);
3151 }
3152
3153 return;
3154 }
3155 FALLTHROUGH;
3156 default:
3157 assert(!glsl_type_is_struct(glsl_without_array(type)));
3158 assert(!glsl_type_is_interface(glsl_without_array(type)));
3159
3160 struct tfeedback_candidate *candidate
3161 = rzalloc(state->mem_ctx, struct tfeedback_candidate);
3162 candidate->toplevel_var = state->toplevel_var;
3163 candidate->type = type;
3164
3165 if (glsl_type_is_64bit(glsl_without_array(type))) {
3166 /* From ARB_gpu_shader_fp64:
3167 *
3168 * If any variable captured in transform feedback has double-precision
3169 * components, the practical requirements for defined behavior are:
3170 * ...
3171 * (c) each double-precision variable captured must be aligned to a
3172 * multiple of eight bytes relative to the beginning of a vertex.
3173 */
3174 state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
3175 /* 64-bit members of structs are also aligned. */
3176 state->varying_floats = ALIGN(state->varying_floats, 2);
3177 }
3178
3179 candidate->xfb_offset_floats = state->xfb_offset_floats;
3180 candidate->struct_offset_floats = state->varying_floats;
3181
3182 _mesa_hash_table_insert(state->tfeedback_candidates,
3183 ralloc_strdup(state->mem_ctx, *name),
3184 candidate);
3185
3186 const unsigned component_slots = glsl_get_component_slots(type);
3187
3188 if (varying_has_user_specified_location(state->toplevel_var)) {
3189 state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
3190 } else {
3191 state->varying_floats += component_slots;
3192 }
3193
3194 state->xfb_offset_floats += component_slots;
3195 }
3196 }
3197
3198 static void
populate_consumer_input_sets(void * mem_ctx,nir_shader * nir,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3199 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
3200 struct hash_table *consumer_inputs,
3201 struct hash_table *consumer_interface_inputs,
3202 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3203 {
3204 memset(consumer_inputs_with_locations, 0,
3205 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
3206
3207 nir_foreach_shader_in_variable(input_var, nir) {
3208 /* All interface blocks should have been lowered by this point */
3209 assert(!glsl_type_is_interface(input_var->type));
3210
3211 if (input_var->data.explicit_location) {
3212 /* assign_varying_locations only cares about finding the
3213 * nir_variable at the start of a contiguous location block.
3214 *
3215 * - For !producer, consumer_inputs_with_locations isn't used.
3216 *
3217 * - For !consumer, consumer_inputs_with_locations is empty.
3218 *
3219 * For consumer && producer, if you were trying to set some
3220 * nir_variable to the middle of a location block on the other side
3221 * of producer/consumer, cross_validate_outputs_to_inputs() should
3222 * be link-erroring due to either type mismatch or location
3223 * overlaps. If the variables do match up, then they've got a
3224 * matching data.location and you only looked at
3225 * consumer_inputs_with_locations[var->data.location], not any
3226 * following entries for the array/structure.
3227 */
3228 consumer_inputs_with_locations[input_var->data.location] =
3229 input_var;
3230 } else if (input_var->interface_type != NULL) {
3231 char *const iface_field_name =
3232 ralloc_asprintf(mem_ctx, "%s.%s",
3233 glsl_get_type_name(glsl_without_array(input_var->interface_type)),
3234 input_var->name);
3235 _mesa_hash_table_insert(consumer_interface_inputs,
3236 iface_field_name, input_var);
3237 } else {
3238 _mesa_hash_table_insert(consumer_inputs,
3239 ralloc_strdup(mem_ctx, input_var->name),
3240 input_var);
3241 }
3242 }
3243 }
3244
3245 /**
3246 * Find a variable from the consumer that "matches" the specified variable
3247 *
3248 * This function only finds inputs with names that match. There is no
3249 * validation (here) that the types, etc. are compatible.
3250 */
3251 static nir_variable *
get_matching_input(void * mem_ctx,const nir_variable * output_var,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3252 get_matching_input(void *mem_ctx,
3253 const nir_variable *output_var,
3254 struct hash_table *consumer_inputs,
3255 struct hash_table *consumer_interface_inputs,
3256 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3257 {
3258 nir_variable *input_var;
3259
3260 if (output_var->data.explicit_location) {
3261 input_var = consumer_inputs_with_locations[output_var->data.location];
3262 } else if (output_var->interface_type != NULL) {
3263 char *const iface_field_name =
3264 ralloc_asprintf(mem_ctx, "%s.%s",
3265 glsl_get_type_name(glsl_without_array(output_var->interface_type)),
3266 output_var->name);
3267 struct hash_entry *entry =
3268 _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
3269 input_var = entry ? (nir_variable *) entry->data : NULL;
3270 } else {
3271 struct hash_entry *entry =
3272 _mesa_hash_table_search(consumer_inputs, output_var->name);
3273 input_var = entry ? (nir_variable *) entry->data : NULL;
3274 }
3275
3276 return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
3277 ? NULL : input_var;
3278 }
3279
3280 static int
io_variable_cmp(const void * _a,const void * _b)3281 io_variable_cmp(const void *_a, const void *_b)
3282 {
3283 const nir_variable *const a = *(const nir_variable **) _a;
3284 const nir_variable *const b = *(const nir_variable **) _b;
3285
3286 if (a->data.explicit_location && b->data.explicit_location)
3287 return b->data.location - a->data.location;
3288
3289 if (a->data.explicit_location && !b->data.explicit_location)
3290 return 1;
3291
3292 if (!a->data.explicit_location && b->data.explicit_location)
3293 return -1;
3294
3295 return -strcmp(a->name, b->name);
3296 }
3297
3298 /**
3299 * Sort the shader IO variables into canonical order
3300 */
3301 static void
canonicalize_shader_io(nir_shader * nir,nir_variable_mode io_mode)3302 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
3303 {
3304 nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
3305 unsigned num_variables = 0;
3306
3307 nir_foreach_variable_with_modes(var, nir, io_mode) {
3308 /* If we have already encountered more I/O variables that could
3309 * successfully link, bail.
3310 */
3311 if (num_variables == ARRAY_SIZE(var_table))
3312 return;
3313
3314 var_table[num_variables++] = var;
3315 }
3316
3317 if (num_variables == 0)
3318 return;
3319
3320 /* Sort the list in reverse order (io_variable_cmp handles this). Later
3321 * we're going to push the variables on to the IR list as a stack, so we
3322 * want the last variable (in canonical order) to be first in the list.
3323 */
3324 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
3325
3326 /* Remove the variable from it's current location in the varible list, and
3327 * put it at the front.
3328 */
3329 for (unsigned i = 0; i < num_variables; i++) {
3330 exec_node_remove(&var_table[i]->node);
3331 exec_list_push_head(&nir->variables, &var_table[i]->node);
3332 }
3333 }
3334
3335 /**
3336 * Generate a bitfield map of the explicit locations for shader varyings.
3337 *
3338 * Note: For Tessellation shaders we are sitting right on the limits of the
3339 * 64 bit map. Per-vertex and per-patch both have separate location domains
3340 * with a max of MAX_VARYING.
3341 */
3342 static uint64_t
reserved_varying_slot(struct gl_linked_shader * sh,nir_variable_mode io_mode)3343 reserved_varying_slot(struct gl_linked_shader *sh,
3344 nir_variable_mode io_mode)
3345 {
3346 assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
3347 /* Avoid an overflow of the returned value */
3348 assert(MAX_VARYINGS_INCL_PATCH <= 64);
3349
3350 uint64_t slots = 0;
3351 int var_slot;
3352
3353 if (!sh)
3354 return slots;
3355
3356 nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
3357 if (!var->data.explicit_location ||
3358 var->data.location < VARYING_SLOT_VAR0)
3359 continue;
3360
3361 var_slot = var->data.location - VARYING_SLOT_VAR0;
3362
3363 bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
3364 sh->Stage == MESA_SHADER_VERTEX;
3365 unsigned num_elements =
3366 glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
3367 is_gl_vertex_input);
3368 for (unsigned i = 0; i < num_elements; i++) {
3369 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
3370 slots |= UINT64_C(1) << var_slot;
3371 var_slot += 1;
3372 }
3373 }
3374
3375 return slots;
3376 }
3377
3378 /**
3379 * Sets the bits in the inputs_read, or outputs_written
3380 * bitfield corresponding to this variable.
3381 */
3382 static void
set_variable_io_mask(BITSET_WORD * bits,nir_variable * var,gl_shader_stage stage)3383 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
3384 {
3385 assert(var->data.mode == nir_var_shader_in ||
3386 var->data.mode == nir_var_shader_out);
3387 assert(var->data.location >= VARYING_SLOT_VAR0);
3388
3389 const struct glsl_type *type = var->type;
3390 if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
3391 assert(glsl_type_is_array(type));
3392 type = glsl_get_array_element(type);
3393 }
3394
3395 unsigned location = var->data.location - VARYING_SLOT_VAR0;
3396 unsigned slots = glsl_count_attribute_slots(type, false);
3397 for (unsigned i = 0; i < slots; i++) {
3398 BITSET_SET(bits, location + i);
3399 }
3400 }
3401
3402 static uint8_t
get_num_components(nir_variable * var)3403 get_num_components(nir_variable *var)
3404 {
3405 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
3406 return 4;
3407
3408 return glsl_get_vector_elements(glsl_without_array(var->type));
3409 }
3410
3411 static void
tcs_add_output_reads(nir_shader * shader,BITSET_WORD ** read)3412 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
3413 {
3414 nir_foreach_function_impl(impl, shader) {
3415 nir_foreach_block(block, impl) {
3416 nir_foreach_instr(instr, block) {
3417 if (instr->type != nir_instr_type_intrinsic)
3418 continue;
3419
3420 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3421 if (intrin->intrinsic != nir_intrinsic_load_deref)
3422 continue;
3423
3424 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3425 if (!nir_deref_mode_is(deref, nir_var_shader_out))
3426 continue;
3427
3428 nir_variable *var = nir_deref_instr_get_variable(deref);
3429 for (unsigned i = 0; i < get_num_components(var); i++) {
3430 if (var->data.location < VARYING_SLOT_VAR0)
3431 continue;
3432
3433 unsigned comp = var->data.location_frac;
3434 set_variable_io_mask(read[comp + i], var, shader->info.stage);
3435 }
3436 }
3437 }
3438 }
3439 }
3440
3441 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
3442 * as no further NIR pass expects to see this.
3443 */
3444 static bool
replace_unused_interpolate_at_with_undef(nir_builder * b,nir_instr * instr,void * data)3445 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
3446 void *data)
3447 {
3448 if (instr->type == nir_instr_type_intrinsic) {
3449 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3450
3451 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
3452 intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
3453 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
3454 nir_variable *var = nir_intrinsic_get_var(intrin, 0);
3455 if (var->data.mode == nir_var_shader_temp) {
3456 /* Create undef and rewrite the interp uses */
3457 nir_def *undef =
3458 nir_undef(b, intrin->def.num_components,
3459 intrin->def.bit_size);
3460 nir_def_replace(&intrin->def, undef);
3461 return true;
3462 }
3463 }
3464 }
3465
3466 return false;
3467 }
3468
3469 static void
fixup_vars_lowered_to_temp(nir_shader * shader,nir_variable_mode mode)3470 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
3471 {
3472 /* Remove all interpolate uses of the unset varying and replace with undef. */
3473 if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
3474 (void) nir_shader_instructions_pass(shader,
3475 replace_unused_interpolate_at_with_undef,
3476 nir_metadata_control_flow,
3477 NULL);
3478 }
3479
3480 nir_lower_global_vars_to_local(shader);
3481 nir_fixup_deref_modes(shader);
3482 }
3483
3484 /**
3485 * Helper for removing unused shader I/O variables, by demoting them to global
3486 * variables (which may then be dead code eliminated).
3487 *
3488 * Example usage is:
3489 *
3490 * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
3491 * read, patches_read) ||
3492 * progress;
3493 *
3494 * The "used" should be an array of 4 BITSET_WORDs representing each
3495 * .location_frac used. Note that for vector variables, only the first channel
3496 * (.location_frac) is examined for deciding if the variable is used!
3497 */
3498 static bool
remove_unused_io_vars(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,nir_variable_mode mode,BITSET_WORD ** used_by_other_stage)3499 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
3500 struct gl_shader_program *prog,
3501 nir_variable_mode mode,
3502 BITSET_WORD **used_by_other_stage)
3503 {
3504 assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
3505
3506 bool progress = false;
3507 nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
3508
3509 BITSET_WORD **used;
3510 nir_foreach_variable_with_modes_safe(var, shader, mode) {
3511 used = used_by_other_stage;
3512
3513 /* Skip builtins dead builtins are removed elsewhere */
3514 if (is_gl_identifier(var->name))
3515 continue;
3516
3517 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
3518 continue;
3519
3520 /* Skip xfb varyings and any other type we cannot remove */
3521 if (var->data.always_active_io)
3522 continue;
3523
3524 if (var->data.explicit_xfb_buffer)
3525 continue;
3526
3527 BITSET_WORD *other_stage = used[var->data.location_frac];
3528
3529 /* if location == -1 lower varying to global as it has no match and is not
3530 * a xfb varying, this must be done after skiping bultins as builtins
3531 * could be assigned a location of -1.
3532 * We also lower unused varyings with explicit locations.
3533 */
3534 bool use_found = false;
3535 if (var->data.location >= 0) {
3536 unsigned location = var->data.location - VARYING_SLOT_VAR0;
3537
3538 const struct glsl_type *type = var->type;
3539 if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
3540 assert(glsl_type_is_array(type));
3541 type = glsl_get_array_element(type);
3542 }
3543
3544 unsigned slots = glsl_count_attribute_slots(type, false);
3545 for (unsigned i = 0; i < slots; i++) {
3546 if (BITSET_TEST(other_stage, location + i)) {
3547 use_found = true;
3548 break;
3549 }
3550 }
3551 }
3552
3553 if (!use_found) {
3554 /* This one is invalid, make it a global variable instead */
3555 var->data.location = 0;
3556 var->data.mode = nir_var_shader_temp;
3557
3558 progress = true;
3559
3560 if (mode == nir_var_shader_in) {
3561 if (!prog->IsES && prog->GLSL_Version <= 120) {
3562 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3563 *
3564 * Only those varying variables used (i.e. read) in
3565 * the fragment shader executable must be written to
3566 * by the vertex shader executable; declaring
3567 * superfluous varying variables in a vertex shader is
3568 * permissible.
3569 *
3570 * We interpret this text as meaning that the VS must
3571 * write the variable for the FS to read it. See
3572 * "glsl1-varying read but not written" in piglit.
3573 */
3574 linker_error(prog, "%s shader varying %s not written "
3575 "by %s shader\n.",
3576 _mesa_shader_stage_to_string(consumer->info.stage),
3577 var->name,
3578 _mesa_shader_stage_to_string(producer->info.stage));
3579 } else {
3580 linker_warning(prog, "%s shader varying %s not written "
3581 "by %s shader\n.",
3582 _mesa_shader_stage_to_string(consumer->info.stage),
3583 var->name,
3584 _mesa_shader_stage_to_string(producer->info.stage));
3585 }
3586 }
3587 }
3588 }
3589
3590 if (progress)
3591 fixup_vars_lowered_to_temp(shader, mode);
3592
3593 return progress;
3594 }
3595
3596 static bool
remove_unused_varyings(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3597 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
3598 struct gl_shader_program *prog, void *mem_ctx)
3599 {
3600 assert(producer->info.stage != MESA_SHADER_FRAGMENT);
3601 assert(consumer->info.stage != MESA_SHADER_VERTEX);
3602
3603 int max_loc_out = 0;
3604 nir_foreach_shader_out_variable(var, producer) {
3605 if (var->data.location < VARYING_SLOT_VAR0)
3606 continue;
3607
3608 const struct glsl_type *type = var->type;
3609 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
3610 assert(glsl_type_is_array(type));
3611 type = glsl_get_array_element(type);
3612 }
3613 unsigned slots = glsl_count_attribute_slots(type, false);
3614
3615 max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3616 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
3617 }
3618
3619 int max_loc_in = 0;
3620 nir_foreach_shader_in_variable(var, consumer) {
3621 if (var->data.location < VARYING_SLOT_VAR0)
3622 continue;
3623
3624 const struct glsl_type *type = var->type;
3625 if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
3626 assert(glsl_type_is_array(type));
3627 type = glsl_get_array_element(type);
3628 }
3629 unsigned slots = glsl_count_attribute_slots(type, false);
3630
3631 max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3632 (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
3633 }
3634
3635 /* Old glsl shaders that don't use explicit locations can contain greater
3636 * than 64 varyings before unused varyings are removed so we must count them
3637 * and make use of the BITSET macros to keep track of used slots. Once we
3638 * have removed these excess varyings we can make use of further nir varying
3639 * linking optimimisation passes.
3640 */
3641 BITSET_WORD *read[4];
3642 BITSET_WORD *written[4];
3643 int max_loc = MAX2(max_loc_in, max_loc_out);
3644 for (unsigned i = 0; i < 4; i++) {
3645 read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3646 written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3647 }
3648
3649 nir_foreach_shader_out_variable(var, producer) {
3650 if (var->data.location < VARYING_SLOT_VAR0)
3651 continue;
3652
3653 for (unsigned i = 0; i < get_num_components(var); i++) {
3654 unsigned comp = var->data.location_frac;
3655 set_variable_io_mask(written[comp + i], var, producer->info.stage);
3656 }
3657 }
3658
3659 nir_foreach_shader_in_variable(var, consumer) {
3660 if (var->data.location < VARYING_SLOT_VAR0)
3661 continue;
3662
3663 for (unsigned i = 0; i < get_num_components(var); i++) {
3664 unsigned comp = var->data.location_frac;
3665 set_variable_io_mask(read[comp + i], var, consumer->info.stage);
3666 }
3667 }
3668
3669 /* Each TCS invocation can read data written by other TCS invocations,
3670 * so even if the outputs are not used by the TES we must also make
3671 * sure they are not read by the TCS before demoting them to globals.
3672 */
3673 if (producer->info.stage == MESA_SHADER_TESS_CTRL)
3674 tcs_add_output_reads(producer, read);
3675
3676 bool progress = false;
3677 progress =
3678 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
3679 progress =
3680 remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
3681
3682 return progress;
3683 }
3684
3685 static bool
should_add_varying_match_record(nir_variable * const input_var,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)3686 should_add_varying_match_record(nir_variable *const input_var,
3687 struct gl_shader_program *prog,
3688 struct gl_linked_shader *producer,
3689 struct gl_linked_shader *consumer) {
3690
3691 /* If a matching input variable was found, add this output (and the input) to
3692 * the set. If this is a separable program and there is no consumer stage,
3693 * add the output.
3694 *
3695 * Always add TCS outputs. They are shared by all invocations
3696 * within a patch and can be used as shared memory.
3697 */
3698 return input_var || (prog->SeparateShader && consumer == NULL) ||
3699 producer->Stage == MESA_SHADER_TESS_CTRL;
3700 }
3701
3702 /* This assigns some initial unoptimised varying locations so that our nir
3703 * optimisations can perform some initial optimisations and also does initial
3704 * processing of
3705 */
3706 static bool
assign_initial_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,struct varying_matches * vm)3707 assign_initial_varying_locations(const struct gl_constants *consts,
3708 const struct gl_extensions *exts,
3709 void *mem_ctx,
3710 struct gl_shader_program *prog,
3711 struct gl_linked_shader *producer,
3712 struct gl_linked_shader *consumer,
3713 unsigned num_xfb_decls,
3714 struct xfb_decl *xfb_decls,
3715 struct varying_matches *vm)
3716 {
3717 init_varying_matches(mem_ctx, vm, consts, exts,
3718 producer ? producer->Stage : MESA_SHADER_NONE,
3719 consumer ? consumer->Stage : MESA_SHADER_NONE,
3720 prog->SeparateShader);
3721
3722 struct hash_table *tfeedback_candidates =
3723 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3724 _mesa_key_string_equal);
3725 struct hash_table *consumer_inputs =
3726 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3727 _mesa_key_string_equal);
3728 struct hash_table *consumer_interface_inputs =
3729 _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3730 _mesa_key_string_equal);
3731 nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
3732 NULL,
3733 };
3734
3735 if (consumer)
3736 populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
3737 consumer_inputs, consumer_interface_inputs,
3738 consumer_inputs_with_locations);
3739
3740 if (producer) {
3741 nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
3742 /* Only geometry shaders can use non-zero streams */
3743 assert(output_var->data.stream == 0 ||
3744 (output_var->data.stream < MAX_VERTEX_STREAMS &&
3745 producer->Stage == MESA_SHADER_GEOMETRY));
3746
3747 if (num_xfb_decls > 0) {
3748 /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
3749 * ("Vertex Shader Variables / Output Variables")
3750 *
3751 * "Each program object can specify a set of output variables from
3752 * one shader to be recorded in transform feedback mode (see
3753 * section 13.3). The variables that can be recorded are those
3754 * emitted by the first active shader, in order, from the
3755 * following list:
3756 *
3757 * * geometry shader
3758 * * tessellation evaluation shader
3759 * * tessellation control shader
3760 * * vertex shader"
3761 *
3762 * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
3763 * Variables / Output Variables") tessellation control shader is
3764 * not included in the stages list.
3765 */
3766 if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
3767
3768 const struct glsl_type *type = output_var->data.from_named_ifc_block ?
3769 output_var->interface_type : output_var->type;
3770 if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
3771 assert(glsl_type_is_array(type));
3772 type = glsl_get_array_element(type);
3773 }
3774
3775 const struct glsl_struct_field *ifc_member = NULL;
3776 if (output_var->data.from_named_ifc_block) {
3777 ifc_member =
3778 glsl_get_struct_field_data(glsl_without_array(type),
3779 glsl_get_field_index(glsl_without_array(type), output_var->name));
3780 }
3781
3782 char *name;
3783 if (glsl_type_is_struct(glsl_without_array(type)) ||
3784 (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
3785 type = output_var->type;
3786 name = ralloc_strdup(NULL, output_var->name);
3787 } else if (glsl_type_is_interface(glsl_without_array(type))) {
3788 name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
3789 } else {
3790 name = ralloc_strdup(NULL, output_var->name);
3791 }
3792
3793 struct tfeedback_candidate_generator_state state;
3794 state.mem_ctx = mem_ctx;
3795 state.tfeedback_candidates = tfeedback_candidates;
3796 state.stage = producer->Stage;
3797 state.toplevel_var = output_var;
3798 state.varying_floats = 0;
3799 state.xfb_offset_floats = 0;
3800
3801 tfeedback_candidate_generator(&state, &name, strlen(name), type,
3802 ifc_member);
3803 ralloc_free(name);
3804 }
3805 }
3806
3807 nir_variable *const input_var =
3808 get_matching_input(mem_ctx, output_var, consumer_inputs,
3809 consumer_interface_inputs,
3810 consumer_inputs_with_locations);
3811
3812 if (should_add_varying_match_record(input_var, prog, producer,
3813 consumer)) {
3814 varying_matches_record(mem_ctx, vm, output_var, input_var);
3815 }
3816
3817 /* Only stream 0 outputs can be consumed in the next stage */
3818 if (input_var && output_var->data.stream != 0) {
3819 linker_error(prog, "output %s is assigned to stream=%d but "
3820 "is linked to an input, which requires stream=0",
3821 output_var->name, output_var->data.stream);
3822 return false;
3823 }
3824 }
3825 } else {
3826 /* If there's no producer stage, then this must be a separable program.
3827 * For example, we may have a program that has just a fragment shader.
3828 * Later this program will be used with some arbitrary vertex (or
3829 * geometry) shader program. This means that locations must be assigned
3830 * for all the inputs.
3831 */
3832 nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
3833 varying_matches_record(mem_ctx, vm, NULL, input_var);
3834 }
3835 }
3836
3837 for (unsigned i = 0; i < num_xfb_decls; ++i) {
3838 if (!xfb_decl_is_varying(&xfb_decls[i]))
3839 continue;
3840
3841 const struct tfeedback_candidate *matched_candidate
3842 = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
3843
3844 if (matched_candidate == NULL)
3845 return false;
3846
3847 /* There are two situations where a new output varying is needed:
3848 *
3849 * - If varying packing is disabled for xfb and the current declaration
3850 * is subscripting an array, whether the subscript is aligned or not.
3851 * to preserve the rest of the array for the consumer.
3852 *
3853 * - If a builtin variable needs to be copied to a new variable
3854 * before its content is modified by another lowering pass (e.g.
3855 * \c gl_Position is transformed by \c nir_lower_viewport_transform).
3856 */
3857 const bool lowered =
3858 (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
3859 (matched_candidate->toplevel_var->data.explicit_location &&
3860 matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
3861 (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
3862 (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
3863 BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
3864
3865 if (lowered) {
3866 nir_variable *new_var;
3867 struct tfeedback_candidate *new_candidate = NULL;
3868
3869 new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
3870 xfb_decls[i].orig_name,
3871 matched_candidate->toplevel_var);
3872 if (new_var == NULL)
3873 return false;
3874
3875 /* Create new candidate and replace matched_candidate */
3876 new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
3877 new_candidate->toplevel_var = new_var;
3878 new_candidate->type = new_var->type;
3879 new_candidate->struct_offset_floats = 0;
3880 new_candidate->xfb_offset_floats = 0;
3881 _mesa_hash_table_insert(tfeedback_candidates,
3882 ralloc_strdup(mem_ctx, new_var->name),
3883 new_candidate);
3884
3885 xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
3886 matched_candidate = new_candidate;
3887 }
3888
3889 /* Mark as xfb varying */
3890 matched_candidate->toplevel_var->data.is_xfb = 1;
3891
3892 /* Mark xfb varyings as always active */
3893 matched_candidate->toplevel_var->data.always_active_io = 1;
3894
3895 /* Mark any corresponding inputs as always active also. We must do this
3896 * because we have a NIR pass that lowers vectors to scalars and another
3897 * that removes unused varyings.
3898 * We don't split varyings marked as always active because there is no
3899 * point in doing so. This means we need to mark both sides of the
3900 * interface as always active otherwise we will have a mismatch and
3901 * start removing things we shouldn't.
3902 */
3903 nir_variable *const input_var =
3904 get_matching_input(mem_ctx, matched_candidate->toplevel_var,
3905 consumer_inputs, consumer_interface_inputs,
3906 consumer_inputs_with_locations);
3907 if (input_var) {
3908 input_var->data.is_xfb = 1;
3909 input_var->data.always_active_io = 1;
3910 }
3911
3912 /* Add the xfb varying to varying matches if it wasn't already added */
3913 if ((!should_add_varying_match_record(input_var, prog, producer,
3914 consumer) &&
3915 !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
3916 matched_candidate->toplevel_var->data.is_xfb_only = 1;
3917 varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
3918 NULL);
3919 }
3920 }
3921
3922 uint64_t reserved_out_slots = 0;
3923 if (producer)
3924 reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
3925
3926 uint64_t reserved_in_slots = 0;
3927 if (consumer)
3928 reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
3929
3930 /* Assign temporary user varying locations. This is required for our NIR
3931 * varying optimisations to do their matching.
3932 */
3933 const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
3934 varying_matches_assign_temp_locations(vm, prog, reserved_slots);
3935
3936 for (unsigned i = 0; i < num_xfb_decls; ++i) {
3937 if (!xfb_decl_is_varying(&xfb_decls[i]))
3938 continue;
3939
3940 xfb_decls[i].matched_candidate->initial_location =
3941 xfb_decls[i].matched_candidate->toplevel_var->data.location;
3942 xfb_decls[i].matched_candidate->initial_location_frac =
3943 xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
3944 }
3945
3946 return true;
3947 }
3948
3949 static void
link_shader_opts(struct varying_matches * vm,nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3950 link_shader_opts(struct varying_matches *vm,
3951 nir_shader *producer, nir_shader *consumer,
3952 struct gl_shader_program *prog, void *mem_ctx)
3953 {
3954 /* If we can't pack the stage using this pass then we can't lower io to
3955 * scalar just yet. Instead we leave it to a later NIR linking pass that uses
3956 * ARB_enhanced_layout style packing to pack things further.
3957 *
3958 * Otherwise we might end up causing linking errors and perf regressions
3959 * because the new scalars will be assigned individual slots and can overflow
3960 * the available slots.
3961 */
3962 if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
3963 !vm->disable_xfb_packing) {
3964 NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
3965 NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
3966 }
3967
3968 gl_nir_opts(producer);
3969 gl_nir_opts(consumer);
3970
3971 if (nir_link_opt_varyings(producer, consumer))
3972 gl_nir_opts(consumer);
3973
3974 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
3975 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
3976
3977 if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
3978 NIR_PASS(_, producer, nir_lower_global_vars_to_local);
3979 NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
3980
3981 gl_nir_opts(producer);
3982 gl_nir_opts(consumer);
3983
3984 /* Optimizations can cause varyings to become unused.
3985 * nir_compact_varyings() depends on all dead varyings being removed so
3986 * we need to call nir_remove_dead_variables() again here.
3987 */
3988 NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
3989 NULL);
3990 NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
3991 NULL);
3992 }
3993
3994 nir_link_varying_precision(producer, consumer);
3995 }
3996
3997 /**
3998 * Assign locations for all variables that are produced in one pipeline stage
3999 * (the "producer") and consumed in the next stage (the "consumer").
4000 *
4001 * Variables produced by the producer may also be consumed by transform
4002 * feedback.
4003 *
4004 * \param num_xfb_decls is the number of declarations indicating
4005 * variables that may be consumed by transform feedback.
4006 *
4007 * \param xfb_decls is a pointer to an array of xfb_decl objects
4008 * representing the result of parsing the strings passed to
4009 * glTransformFeedbackVaryings(). assign_location() will be called for
4010 * each of these objects that matches one of the outputs of the
4011 * producer.
4012 *
4013 * When num_xfb_decls is nonzero, it is permissible for the consumer to
4014 * be NULL. In this case, varying locations are assigned solely based on the
4015 * requirements of transform feedback.
4016 */
4017 static bool
assign_final_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,const uint64_t reserved_slots,struct varying_matches * vm)4018 assign_final_varying_locations(const struct gl_constants *consts,
4019 const struct gl_extensions *exts,
4020 void *mem_ctx,
4021 struct gl_shader_program *prog,
4022 struct gl_linked_shader *producer,
4023 struct gl_linked_shader *consumer,
4024 unsigned num_xfb_decls,
4025 struct xfb_decl *xfb_decls,
4026 const uint64_t reserved_slots,
4027 struct varying_matches *vm)
4028 {
4029 init_varying_matches(mem_ctx, vm, consts, exts,
4030 producer ? producer->Stage : MESA_SHADER_NONE,
4031 consumer ? consumer->Stage : MESA_SHADER_NONE,
4032 prog->SeparateShader);
4033
4034 /* Regather varying matches as we ran optimisations and the previous pointers
4035 * are no longer valid.
4036 */
4037 if (producer) {
4038 nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
4039 if (var_out->data.location < VARYING_SLOT_VAR0 ||
4040 var_out->data.explicit_location)
4041 continue;
4042
4043 if (vm->num_matches == vm->matches_capacity) {
4044 vm->matches_capacity *= 2;
4045 vm->matches = (struct match *)
4046 reralloc(mem_ctx, vm->matches, struct match,
4047 vm->matches_capacity);
4048 }
4049
4050 vm->matches[vm->num_matches].packing_class
4051 = varying_matches_compute_packing_class(var_out);
4052 vm->matches[vm->num_matches].packing_order
4053 = varying_matches_compute_packing_order(var_out);
4054
4055 vm->matches[vm->num_matches].producer_var = var_out;
4056 vm->matches[vm->num_matches].consumer_var = NULL;
4057 vm->num_matches++;
4058 }
4059
4060 /* Regather xfb varyings too */
4061 for (unsigned i = 0; i < num_xfb_decls; i++) {
4062 if (!xfb_decl_is_varying(&xfb_decls[i]))
4063 continue;
4064
4065 /* Varying pointer was already reset */
4066 if (xfb_decls[i].matched_candidate->initial_location == -1)
4067 continue;
4068
4069 bool UNUSED is_reset = false;
4070 bool UNUSED no_outputs = true;
4071 nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
4072 no_outputs = false;
4073 assert(var_out->data.location != -1);
4074 if (var_out->data.location ==
4075 xfb_decls[i].matched_candidate->initial_location &&
4076 var_out->data.location_frac ==
4077 xfb_decls[i].matched_candidate->initial_location_frac) {
4078 xfb_decls[i].matched_candidate->toplevel_var = var_out;
4079 xfb_decls[i].matched_candidate->initial_location = -1;
4080 is_reset = true;
4081 break;
4082 }
4083 }
4084 assert(is_reset || no_outputs);
4085 }
4086 }
4087
4088 bool found_match = false;
4089 if (consumer) {
4090 nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
4091 if (var_in->data.location < VARYING_SLOT_VAR0 ||
4092 var_in->data.explicit_location)
4093 continue;
4094
4095 found_match = false;
4096 for (unsigned i = 0; i < vm->num_matches; i++) {
4097 if (vm->matches[i].producer_var &&
4098 (vm->matches[i].producer_var->data.location == var_in->data.location &&
4099 vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
4100
4101 vm->matches[i].consumer_var = var_in;
4102 found_match = true;
4103 break;
4104 }
4105 }
4106 if (!found_match) {
4107 if (vm->num_matches == vm->matches_capacity) {
4108 vm->matches_capacity *= 2;
4109 vm->matches = (struct match *)
4110 reralloc(mem_ctx, vm->matches, struct match,
4111 vm->matches_capacity);
4112 }
4113
4114 vm->matches[vm->num_matches].packing_class
4115 = varying_matches_compute_packing_class(var_in);
4116 vm->matches[vm->num_matches].packing_order
4117 = varying_matches_compute_packing_order(var_in);
4118
4119 vm->matches[vm->num_matches].producer_var = NULL;
4120 vm->matches[vm->num_matches].consumer_var = var_in;
4121 vm->num_matches++;
4122 }
4123 }
4124 }
4125
4126 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
4127 const unsigned slots_used =
4128 varying_matches_assign_locations(vm, prog, components, reserved_slots);
4129 varying_matches_store_locations(vm);
4130
4131 for (unsigned i = 0; i < num_xfb_decls; ++i) {
4132 if (xfb_decl_is_varying(&xfb_decls[i])) {
4133 if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog,
4134 vm->disable_varying_packing, vm->xfb_enabled))
4135 return false;
4136 }
4137 }
4138
4139 if (producer) {
4140 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4141 nir_var_shader_out, 0, producer,
4142 vm->disable_varying_packing,
4143 vm->disable_xfb_packing, vm->xfb_enabled);
4144 nir_lower_pack(producer->Program->nir);
4145 }
4146
4147 if (consumer) {
4148 unsigned consumer_vertices = 0;
4149 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
4150 consumer_vertices = consumer->Program->nir->info.gs.vertices_in;
4151
4152 gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4153 nir_var_shader_in, consumer_vertices,
4154 consumer, vm->disable_varying_packing,
4155 vm->disable_xfb_packing, vm->xfb_enabled);
4156 nir_lower_pack(consumer->Program->nir);
4157 }
4158
4159 return true;
4160 }
4161
4162 static bool
check_against_output_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * producer,unsigned num_explicit_locations)4163 check_against_output_limit(const struct gl_constants *consts, gl_api api,
4164 struct gl_shader_program *prog,
4165 struct gl_linked_shader *producer,
4166 unsigned num_explicit_locations)
4167 {
4168 unsigned output_vectors = num_explicit_locations;
4169 nir_foreach_shader_out_variable(var, producer->Program->nir) {
4170 if (!var->data.explicit_location &&
4171 var_counts_against_varying_limit(producer->Stage, var)) {
4172 /* outputs for fragment shader can't be doubles */
4173 output_vectors += glsl_count_attribute_slots(var->type, false);
4174 }
4175 }
4176
4177 assert(producer->Stage != MESA_SHADER_FRAGMENT);
4178 unsigned max_output_components =
4179 consts->Program[producer->Stage].MaxOutputComponents;
4180
4181 const unsigned output_components = output_vectors * 4;
4182 if (output_components > max_output_components) {
4183 if (api == API_OPENGLES2 || prog->IsES)
4184 linker_error(prog, "%s shader uses too many output vectors "
4185 "(%u > %u)\n",
4186 _mesa_shader_stage_to_string(producer->Stage),
4187 output_vectors,
4188 max_output_components / 4);
4189 else
4190 linker_error(prog, "%s shader uses too many output components "
4191 "(%u > %u)\n",
4192 _mesa_shader_stage_to_string(producer->Stage),
4193 output_components,
4194 max_output_components);
4195
4196 return false;
4197 }
4198
4199 return true;
4200 }
4201
4202 static bool
check_against_input_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * consumer,unsigned num_explicit_locations)4203 check_against_input_limit(const struct gl_constants *consts, gl_api api,
4204 struct gl_shader_program *prog,
4205 struct gl_linked_shader *consumer,
4206 unsigned num_explicit_locations)
4207 {
4208 unsigned input_vectors = num_explicit_locations;
4209
4210 nir_foreach_shader_in_variable(var, consumer->Program->nir) {
4211 if (!var->data.explicit_location &&
4212 var_counts_against_varying_limit(consumer->Stage, var)) {
4213 /* vertex inputs aren't varying counted */
4214 input_vectors += glsl_count_attribute_slots(var->type, false);
4215 }
4216 }
4217
4218 assert(consumer->Stage != MESA_SHADER_VERTEX);
4219 unsigned max_input_components =
4220 consts->Program[consumer->Stage].MaxInputComponents;
4221
4222 const unsigned input_components = input_vectors * 4;
4223 if (input_components > max_input_components) {
4224 if (api == API_OPENGLES2 || prog->IsES)
4225 linker_error(prog, "%s shader uses too many input vectors "
4226 "(%u > %u)\n",
4227 _mesa_shader_stage_to_string(consumer->Stage),
4228 input_vectors,
4229 max_input_components / 4);
4230 else
4231 linker_error(prog, "%s shader uses too many input components "
4232 "(%u > %u)\n",
4233 _mesa_shader_stage_to_string(consumer->Stage),
4234 input_components,
4235 max_input_components);
4236
4237 return false;
4238 }
4239
4240 return true;
4241 }
4242
4243 /* Lower unset/unused inputs/outputs */
4244 static void
remove_unused_shader_inputs_and_outputs(struct gl_shader_program * prog,unsigned stage,nir_variable_mode mode)4245 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
4246 unsigned stage, nir_variable_mode mode)
4247 {
4248 bool progress = false;
4249 nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
4250
4251 nir_foreach_variable_with_modes_safe(var, shader, mode) {
4252 if (!var->data.is_xfb_only && var->data.location == -1) {
4253 var->data.location = 0;
4254 var->data.mode = nir_var_shader_temp;
4255 progress = true;
4256 }
4257 }
4258
4259 if (progress)
4260 fixup_vars_lowered_to_temp(shader, mode);
4261 }
4262
4263 static bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,void * mem_ctx)4264 link_varyings(struct gl_shader_program *prog, unsigned first,
4265 unsigned last, const struct gl_constants *consts,
4266 const struct gl_extensions *exts, gl_api api, void *mem_ctx)
4267 {
4268 bool has_xfb_qualifiers = false;
4269 unsigned num_xfb_decls = 0;
4270 char **varying_names = NULL;
4271 bool compact_arrays = false;
4272 struct xfb_decl *xfb_decls = NULL;
4273
4274 if (last > MESA_SHADER_FRAGMENT)
4275 return true;
4276
4277 /* From the ARB_enhanced_layouts spec:
4278 *
4279 * "If the shader used to record output variables for transform feedback
4280 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
4281 * qualifiers, the values specified by TransformFeedbackVaryings are
4282 * ignored, and the set of variables captured for transform feedback is
4283 * instead derived from the specified layout qualifiers."
4284 */
4285 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
4286 /* Find last stage before fragment shader */
4287 if (prog->_LinkedShaders[i]) {
4288 has_xfb_qualifiers =
4289 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
4290 prog, &num_xfb_decls,
4291 &varying_names,
4292 &compact_arrays);
4293 break;
4294 }
4295 }
4296
4297 if (!has_xfb_qualifiers) {
4298 num_xfb_decls = prog->TransformFeedback.NumVarying;
4299 varying_names = prog->TransformFeedback.VaryingNames;
4300 }
4301
4302 if (num_xfb_decls != 0) {
4303 /* From GL_EXT_transform_feedback:
4304 * A program will fail to link if:
4305 *
4306 * * the <count> specified by TransformFeedbackVaryingsEXT is
4307 * non-zero, but the program object has no vertex or geometry
4308 * shader;
4309 */
4310 if (first >= MESA_SHADER_FRAGMENT) {
4311 linker_error(prog, "Transform feedback varyings specified, but "
4312 "no vertex, tessellation, or geometry shader is "
4313 "present.\n");
4314 return false;
4315 }
4316
4317 xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
4318 num_xfb_decls);
4319 if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
4320 varying_names, xfb_decls, compact_arrays))
4321 return false;
4322 }
4323
4324 struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
4325 unsigned num_shaders = 0;
4326
4327 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4328 if (prog->_LinkedShaders[i])
4329 linked_shader[num_shaders++] = prog->_LinkedShaders[i];
4330 }
4331
4332 struct varying_matches vm;
4333 if (last < MESA_SHADER_FRAGMENT &&
4334 (num_xfb_decls != 0 || prog->SeparateShader)) {
4335 struct gl_linked_shader *producer = prog->_LinkedShaders[last];
4336 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4337 producer, NULL, num_xfb_decls,
4338 xfb_decls, &vm))
4339 return false;
4340 }
4341
4342 if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
4343 remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
4344 remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
4345 }
4346
4347 if (prog->SeparateShader) {
4348 struct gl_linked_shader *consumer = linked_shader[0];
4349 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
4350 consumer, 0, NULL, &vm))
4351 return false;
4352 }
4353
4354 if (num_shaders == 1) {
4355 /* Linking shaders also optimizes them. Separate shaders, compute shaders
4356 * and shaders with a fixed-func VS or FS that don't need linking are
4357 * optimized here.
4358 */
4359 gl_nir_opts(linked_shader[0]->Program->nir);
4360 } else {
4361 /* Linking the stages in the opposite order (from fragment to vertex)
4362 * ensures that inter-shader outputs written to in an earlier stage
4363 * are eliminated if they are (transitively) not used in a later
4364 * stage.
4365 */
4366 for (int i = num_shaders - 2; i >= 0; i--) {
4367 unsigned stage_num_xfb_decls =
4368 linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
4369 num_xfb_decls : 0;
4370
4371 if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4372 linked_shader[i],
4373 linked_shader[i + 1],
4374 stage_num_xfb_decls, xfb_decls,
4375 &vm))
4376 return false;
4377
4378 /* Now that validation is done its safe to remove unused varyings. As
4379 * we have both a producer and consumer its safe to remove unused
4380 * varyings even if the program is a SSO because the stages are being
4381 * linked together i.e. we have a multi-stage SSO.
4382 */
4383 link_shader_opts(&vm, linked_shader[i]->Program->nir,
4384 linked_shader[i + 1]->Program->nir,
4385 prog, mem_ctx);
4386
4387 remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
4388 nir_var_shader_out);
4389 remove_unused_shader_inputs_and_outputs(prog,
4390 linked_shader[i + 1]->Stage,
4391 nir_var_shader_in);
4392 }
4393 }
4394
4395 if (!prog->SeparateShader) {
4396 /* If not SSO remove unused varyings from the first/last stage */
4397 NIR_PASS(_, prog->_LinkedShaders[first]->Program->nir,
4398 nir_remove_dead_variables, nir_var_shader_in, NULL);
4399 NIR_PASS(_, prog->_LinkedShaders[last]->Program->nir,
4400 nir_remove_dead_variables, nir_var_shader_out, NULL);
4401 } else {
4402 /* Sort inputs / outputs into a canonical order. This is necessary so
4403 * that inputs / outputs of separable shaders will be assigned
4404 * predictable locations regardless of the order in which declarations
4405 * appeared in the shader source.
4406 */
4407 if (first != MESA_SHADER_VERTEX) {
4408 canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
4409 nir_var_shader_in);
4410 }
4411
4412 if (last != MESA_SHADER_FRAGMENT) {
4413 canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
4414 nir_var_shader_out);
4415 }
4416 }
4417
4418 /* If there is no fragment shader we need to set transform feedback.
4419 *
4420 * For SSO we also need to assign output locations. We assign them here
4421 * because we need to do it for both single stage programs and multi stage
4422 * programs.
4423 */
4424 if (last < MESA_SHADER_FRAGMENT &&
4425 (num_xfb_decls != 0 || prog->SeparateShader)) {
4426 const uint64_t reserved_out_slots =
4427 reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
4428 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4429 prog->_LinkedShaders[last], NULL,
4430 num_xfb_decls, xfb_decls,
4431 reserved_out_slots, &vm))
4432 return false;
4433 }
4434
4435 if (prog->SeparateShader) {
4436 struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
4437
4438 const uint64_t reserved_slots =
4439 reserved_varying_slot(sh, nir_var_shader_in);
4440
4441 /* Assign input locations for SSO, output locations are already
4442 * assigned.
4443 */
4444 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4445 NULL /* producer */,
4446 sh /* consumer */,
4447 0 /* num_xfb_decls */,
4448 NULL /* xfb_decls */,
4449 reserved_slots, &vm))
4450 return false;
4451 }
4452
4453 if (num_shaders == 1) {
4454 gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
4455 0, NULL);
4456 gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
4457 num_xfb_decls, xfb_decls);
4458 } else {
4459 /* Linking the stages in the opposite order (from fragment to vertex)
4460 * ensures that inter-shader outputs written to in an earlier stage
4461 * are eliminated if they are (transitively) not used in a later
4462 * stage.
4463 */
4464 int next = last;
4465 for (int i = next - 1; i >= 0; i--) {
4466 if (prog->_LinkedShaders[i] == NULL && i != 0)
4467 continue;
4468
4469 struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
4470 struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
4471
4472 gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
4473 next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4474 xfb_decls);
4475
4476 const uint64_t reserved_out_slots =
4477 reserved_varying_slot(sh_i, nir_var_shader_out);
4478 const uint64_t reserved_in_slots =
4479 reserved_varying_slot(sh_next, nir_var_shader_in);
4480
4481 if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
4482 sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4483 xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
4484 return false;
4485
4486 /* This must be done after all dead varyings are eliminated. */
4487 if (sh_i != NULL) {
4488 unsigned slots_used = util_bitcount64(reserved_out_slots);
4489 if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
4490 return false;
4491 }
4492
4493 unsigned slots_used = util_bitcount64(reserved_in_slots);
4494 if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
4495 return false;
4496
4497 next = i;
4498 }
4499 }
4500
4501 if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
4502 has_xfb_qualifiers, mem_ctx))
4503 return false;
4504
4505 return prog->data->LinkStatus != LINKING_FAILURE;
4506 }
4507
4508 bool
gl_assign_attribute_or_color_locations(const struct gl_constants * consts,struct gl_shader_program * prog)4509 gl_assign_attribute_or_color_locations(const struct gl_constants *consts,
4510 struct gl_shader_program *prog)
4511 {
4512 void *mem_ctx = ralloc_context(NULL);
4513
4514 if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4515 MESA_SHADER_VERTEX, true)) {
4516 ralloc_free(mem_ctx);
4517 return false;
4518 }
4519
4520 if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4521 MESA_SHADER_FRAGMENT, true)) {
4522 ralloc_free(mem_ctx);
4523 return false;
4524 }
4525
4526 ralloc_free(mem_ctx);
4527 return true;
4528 }
4529
4530 bool
gl_nir_link_varyings(const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,struct gl_shader_program * prog)4531 gl_nir_link_varyings(const struct gl_constants *consts,
4532 const struct gl_extensions *exts,
4533 gl_api api, struct gl_shader_program *prog)
4534 {
4535 void *mem_ctx = ralloc_context(NULL);
4536
4537 unsigned first, last;
4538
4539 MESA_TRACE_FUNC();
4540
4541 first = MESA_SHADER_STAGES;
4542 last = 0;
4543
4544 /* We need to initialise the program resource list because the varying
4545 * packing pass my start inserting varyings onto the list.
4546 */
4547 init_program_resource_list(prog);
4548
4549 /* Determine first and last stage. */
4550 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4551 if (!prog->_LinkedShaders[i])
4552 continue;
4553 if (first == MESA_SHADER_STAGES)
4554 first = i;
4555 last = i;
4556 }
4557
4558 bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
4559 if (r) {
4560 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4561 if (!prog->_LinkedShaders[i])
4562 continue;
4563
4564 /* Check for transform feedback varyings specified via the API */
4565 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
4566 prog->TransformFeedback.NumVarying > 0;
4567
4568 /* Check for transform feedback varyings specified in the Shader */
4569 if (prog->last_vert_prog) {
4570 prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
4571 prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
4572 }
4573 }
4574
4575 /* Assign NIR XFB info to the last stage before the fragment shader */
4576 for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
4577 struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
4578 if (sh && stage != MESA_SHADER_TESS_CTRL) {
4579 sh->Program->nir->xfb_info =
4580 gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
4581 sh->Program->nir);
4582 break;
4583 }
4584 }
4585
4586 /* Lower IO and thoroughly optimize and compact varyings. */
4587 gl_nir_lower_optimize_varyings(consts, prog, false);
4588 }
4589
4590 ralloc_free(mem_ctx);
4591 return r;
4592 }
4593