xref: /aosp_15_r20/external/mesa3d/src/compiler/glsl/gl_nir_link_varyings.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2021 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * Linker functions related specifically to linking varyings between shader
27  * stages.
28  */
29 
30 #include "main/errors.h"
31 #include "main/macros.h"
32 #include "main/menums.h"
33 #include "main/mtypes.h"
34 #include "program/symbol_table.h"
35 #include "util/hash_table.h"
36 #include "util/u_math.h"
37 #include "util/perf/cpu_trace.h"
38 
39 #include "nir.h"
40 #include "nir_builder.h"
41 #include "nir_deref.h"
42 #include "gl_nir.h"
43 #include "gl_nir_link_varyings.h"
44 #include "gl_nir_linker.h"
45 #include "linker_util.h"
46 #include "string_to_uint_map.h"
47 
48 #define SAFE_MASK_FROM_INDEX(i) (((i) >= 32) ? ~0 : ((1 << (i)) - 1))
49 
50 /* Temporary storage for the set of attributes that need locations assigned. */
51 struct temp_attr {
52    unsigned slots;
53    unsigned original_idx;
54    nir_variable *var;
55 };
56 
57 /* Used below in the call to qsort. */
58 static int
compare_attr(const void * a,const void * b)59 compare_attr(const void *a, const void *b)
60 {
61    const struct temp_attr *const l = (const struct temp_attr *) a;
62    const struct temp_attr *const r = (const struct temp_attr *) b;
63 
64    /* Reversed because we want a descending order sort below. */
65    if (r->slots != l->slots)
66       return r->slots - l->slots;
67 
68    return l->original_idx - r->original_idx;
69 }
70 
71 /**
72  * Get the varying type stripped of the outermost array if we're processing
73  * a stage whose varyings are arrays indexed by a vertex number (such as
74  * geometry shader inputs).
75  */
76 static const struct glsl_type *
get_varying_type(const nir_variable * var,gl_shader_stage stage)77 get_varying_type(const nir_variable *var, gl_shader_stage stage)
78 {
79    const struct glsl_type *type = var->type;
80    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
81       assert(glsl_type_is_array(type));
82       type = glsl_get_array_element(type);
83    }
84 
85    return type;
86 }
87 
88 /**
89  * Find a contiguous set of available bits in a bitmask.
90  *
91  * \param used_mask     Bits representing used (1) and unused (0) locations
92  * \param needed_count  Number of contiguous bits needed.
93  *
94  * \return
95  * Base location of the available bits on success or -1 on failure.
96  */
97 static int
find_available_slots(unsigned used_mask,unsigned needed_count)98 find_available_slots(unsigned used_mask, unsigned needed_count)
99 {
100    unsigned needed_mask = (1 << needed_count) - 1;
101    const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count;
102 
103    /* The comparison to 32 is redundant, but without it GCC emits "warning:
104     * cannot optimize possibly infinite loops" for the loop below.
105     */
106    if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32))
107       return -1;
108 
109    for (int i = 0; i <= max_bit_to_test; i++) {
110       if ((needed_mask & ~used_mask) == needed_mask)
111          return i;
112 
113       needed_mask <<= 1;
114    }
115 
116    return -1;
117 }
118 
119 /* Find deref based on variable name.
120  * Note: This function does not support arrays.
121  */
122 static bool
find_deref(nir_shader * shader,const char * name)123 find_deref(nir_shader *shader, const char *name)
124 {
125    nir_foreach_function(func, shader) {
126       nir_foreach_block(block, func->impl) {
127          nir_foreach_instr(instr, block) {
128             if (instr->type == nir_instr_type_deref) {
129                nir_deref_instr *deref = nir_instr_as_deref(instr);
130                if (deref->deref_type == nir_deref_type_var &&
131                    strcmp(deref->var->name, name) == 0)
132                   return true;
133             }
134          }
135       }
136    }
137 
138    return false;
139 }
140 
141 /**
142  * Validate the types and qualifiers of an output from one stage against the
143  * matching input to another stage.
144  */
145 static void
cross_validate_types_and_qualifiers(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * output,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)146 cross_validate_types_and_qualifiers(const struct gl_constants *consts,
147                                     struct gl_shader_program *prog,
148                                     const nir_variable *input,
149                                     const nir_variable *output,
150                                     gl_shader_stage consumer_stage,
151                                     gl_shader_stage producer_stage)
152 {
153    /* Check that the types match between stages.
154     */
155    const struct glsl_type *type_to_match = input->type;
156 
157    /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
158    const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
159                                    consumer_stage != MESA_SHADER_FRAGMENT) ||
160                                   consumer_stage == MESA_SHADER_GEOMETRY;
161    if (extra_array_level) {
162       assert(glsl_type_is_array(type_to_match));
163       type_to_match = glsl_get_array_element(type_to_match);
164    }
165 
166    if (type_to_match != output->type) {
167       if (glsl_type_is_struct(output->type)) {
168          /* Structures across shader stages can have different name
169           * and considered to match in type if and only if structure
170           * members match in name, type, qualification, and declaration
171           * order. The precision doesn’t need to match.
172           */
173          if (!glsl_record_compare(output->type, type_to_match,
174                                   false, /* match_name */
175                                   true, /* match_locations */
176                                   false /* match_precision */)) {
177             linker_error(prog,
178                   "%s shader output `%s' declared as struct `%s', "
179                   "doesn't match in type with %s shader input "
180                   "declared as struct `%s'\n",
181                   _mesa_shader_stage_to_string(producer_stage),
182                   output->name,
183                   glsl_get_type_name(output->type),
184                   _mesa_shader_stage_to_string(consumer_stage),
185                   glsl_get_type_name(input->type));
186          }
187       } else if (!glsl_type_is_array(output->type) ||
188                  !is_gl_identifier(output->name)) {
189          /* There is a bit of a special case for gl_TexCoord.  This
190           * built-in is unsized by default.  Applications that variable
191           * access it must redeclare it with a size.  There is some
192           * language in the GLSL spec that implies the fragment shader
193           * and vertex shader do not have to agree on this size.  Other
194           * driver behave this way, and one or two applications seem to
195           * rely on it.
196           *
197           * Neither declaration needs to be modified here because the array
198           * sizes are fixed later when update_array_sizes is called.
199           *
200           * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
201           *
202           *     "Unlike user-defined varying variables, the built-in
203           *     varying variables don't have a strict one-to-one
204           *     correspondence between the vertex language and the
205           *     fragment language."
206           */
207          linker_error(prog,
208                       "%s shader output `%s' declared as type `%s', "
209                       "but %s shader input declared as type `%s'\n",
210                       _mesa_shader_stage_to_string(producer_stage),
211                       output->name,
212                       glsl_get_type_name(output->type),
213                       _mesa_shader_stage_to_string(consumer_stage),
214                       glsl_get_type_name(input->type));
215          return;
216       }
217    }
218 
219    /* Check that all of the qualifiers match between stages.
220     */
221 
222    /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
223     * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
224     * conformance test suite does not verify that the qualifiers must match.
225     * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
226     * OpenGLES 3.0 drivers, so we relax the checking in all cases.
227     */
228    if (false /* always skip the centroid check */ &&
229        prog->GLSL_Version < (prog->IsES ? 310 : 430) &&
230        input->data.centroid != output->data.centroid) {
231       linker_error(prog,
232                    "%s shader output `%s' %s centroid qualifier, "
233                    "but %s shader input %s centroid qualifier\n",
234                    _mesa_shader_stage_to_string(producer_stage),
235                    output->name,
236                    (output->data.centroid) ? "has" : "lacks",
237                    _mesa_shader_stage_to_string(consumer_stage),
238                    (input->data.centroid) ? "has" : "lacks");
239       return;
240    }
241 
242    if (input->data.sample != output->data.sample) {
243       linker_error(prog,
244                    "%s shader output `%s' %s sample qualifier, "
245                    "but %s shader input %s sample qualifier\n",
246                    _mesa_shader_stage_to_string(producer_stage),
247                    output->name,
248                    (output->data.sample) ? "has" : "lacks",
249                    _mesa_shader_stage_to_string(consumer_stage),
250                    (input->data.sample) ? "has" : "lacks");
251       return;
252    }
253 
254    if (input->data.patch != output->data.patch) {
255       linker_error(prog,
256                    "%s shader output `%s' %s patch qualifier, "
257                    "but %s shader input %s patch qualifier\n",
258                    _mesa_shader_stage_to_string(producer_stage),
259                    output->name,
260                    (output->data.patch) ? "has" : "lacks",
261                    _mesa_shader_stage_to_string(consumer_stage),
262                    (input->data.patch) ? "has" : "lacks");
263       return;
264    }
265 
266    /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
267     *
268     *    "As only outputs need be declared with invariant, an output from
269     *     one shader stage will still match an input of a subsequent stage
270     *     without the input being declared as invariant."
271     *
272     * while GLSL 4.10 says:
273     *
274     *    "For variables leaving one shader and coming into another shader,
275     *     the invariant keyword has to be used in both shaders, or a link
276     *     error will result."
277     *
278     * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
279     *
280     *    "The invariance of varyings that are declared in both the vertex
281     *     and fragment shaders must match."
282     */
283    if (input->data.explicit_invariant != output->data.explicit_invariant &&
284        prog->GLSL_Version < (prog->IsES ? 300 : 420)) {
285       linker_error(prog,
286                    "%s shader output `%s' %s invariant qualifier, "
287                    "but %s shader input %s invariant qualifier\n",
288                    _mesa_shader_stage_to_string(producer_stage),
289                    output->name,
290                    (output->data.explicit_invariant) ? "has" : "lacks",
291                    _mesa_shader_stage_to_string(consumer_stage),
292                    (input->data.explicit_invariant) ? "has" : "lacks");
293       return;
294    }
295 
296    /* GLSL >= 4.40 removes text requiring interpolation qualifiers
297     * to match cross stage, they must only match within the same stage.
298     *
299     * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
300     *
301     *     "It is a link-time error if, within the same stage, the interpolation
302     *     qualifiers of variables of the same name do not match.
303     *
304     * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
305     *
306     *    "When no interpolation qualifier is present, smooth interpolation
307     *    is used."
308     *
309     * So we match variables where one is smooth and the other has no explicit
310     * qualifier.
311     */
312    unsigned input_interpolation = input->data.interpolation;
313    unsigned output_interpolation = output->data.interpolation;
314    if (prog->IsES) {
315       if (input_interpolation == INTERP_MODE_NONE)
316          input_interpolation = INTERP_MODE_SMOOTH;
317       if (output_interpolation == INTERP_MODE_NONE)
318          output_interpolation = INTERP_MODE_SMOOTH;
319    }
320    if (input_interpolation != output_interpolation &&
321        prog->GLSL_Version < 440) {
322       if (!consts->AllowGLSLCrossStageInterpolationMismatch) {
323          linker_error(prog,
324                       "%s shader output `%s' specifies %s "
325                       "interpolation qualifier, "
326                       "but %s shader input specifies %s "
327                       "interpolation qualifier\n",
328                       _mesa_shader_stage_to_string(producer_stage),
329                       output->name,
330                       interpolation_string(output->data.interpolation),
331                       _mesa_shader_stage_to_string(consumer_stage),
332                       interpolation_string(input->data.interpolation));
333          return;
334       } else {
335          linker_warning(prog,
336                         "%s shader output `%s' specifies %s "
337                         "interpolation qualifier, "
338                         "but %s shader input specifies %s "
339                         "interpolation qualifier\n",
340                         _mesa_shader_stage_to_string(producer_stage),
341                         output->name,
342                         interpolation_string(output->data.interpolation),
343                         _mesa_shader_stage_to_string(consumer_stage),
344                         interpolation_string(input->data.interpolation));
345       }
346    }
347 }
348 
349 /**
350  * Validate front and back color outputs against single color input
351  */
352 static void
cross_validate_front_and_back_color(const struct gl_constants * consts,struct gl_shader_program * prog,const nir_variable * input,const nir_variable * front_color,const nir_variable * back_color,gl_shader_stage consumer_stage,gl_shader_stage producer_stage)353 cross_validate_front_and_back_color(const struct gl_constants *consts,
354                                     struct gl_shader_program *prog,
355                                     const nir_variable *input,
356                                     const nir_variable *front_color,
357                                     const nir_variable *back_color,
358                                     gl_shader_stage consumer_stage,
359                                     gl_shader_stage producer_stage)
360 {
361    if (front_color != NULL && front_color->data.assigned)
362       cross_validate_types_and_qualifiers(consts, prog, input, front_color,
363                                           consumer_stage, producer_stage);
364 
365    if (back_color != NULL && back_color->data.assigned)
366       cross_validate_types_and_qualifiers(consts, prog, input, back_color,
367                                           consumer_stage, producer_stage);
368 }
369 
370 static unsigned
compute_variable_location_slot(nir_variable * var,gl_shader_stage stage)371 compute_variable_location_slot(nir_variable *var, gl_shader_stage stage)
372 {
373    unsigned location_start = VARYING_SLOT_VAR0;
374 
375    switch (stage) {
376       case MESA_SHADER_VERTEX:
377          if (var->data.mode == nir_var_shader_in)
378             location_start = VERT_ATTRIB_GENERIC0;
379          break;
380       case MESA_SHADER_TESS_CTRL:
381       case MESA_SHADER_TESS_EVAL:
382          if (var->data.patch)
383             location_start = VARYING_SLOT_PATCH0;
384          break;
385       case MESA_SHADER_FRAGMENT:
386          if (var->data.mode == nir_var_shader_out)
387             location_start = FRAG_RESULT_DATA0;
388          break;
389       default:
390          break;
391    }
392 
393    return var->data.location - location_start;
394 }
395 
396 
397 struct explicit_location_info {
398    nir_variable *var;
399    bool base_type_is_integer;
400    unsigned base_type_bit_size;
401    unsigned interpolation;
402    bool centroid;
403    bool sample;
404    bool patch;
405 };
406 
407 static bool
check_location_aliasing(struct explicit_location_info explicit_locations[][4],nir_variable * var,unsigned location,unsigned component,unsigned location_limit,const struct glsl_type * type,unsigned interpolation,bool centroid,bool sample,bool patch,struct gl_shader_program * prog,gl_shader_stage stage)408 check_location_aliasing(struct explicit_location_info explicit_locations[][4],
409                         nir_variable *var,
410                         unsigned location,
411                         unsigned component,
412                         unsigned location_limit,
413                         const struct glsl_type *type,
414                         unsigned interpolation,
415                         bool centroid,
416                         bool sample,
417                         bool patch,
418                         struct gl_shader_program *prog,
419                         gl_shader_stage stage)
420 {
421    unsigned last_comp;
422    unsigned base_type_bit_size;
423    const struct glsl_type *type_without_array = glsl_without_array(type);
424    const bool base_type_is_integer =
425       glsl_base_type_is_integer(glsl_get_base_type(type_without_array));
426    const bool is_struct = glsl_type_is_struct(type_without_array);
427    if (is_struct) {
428       /* structs don't have a defined underlying base type so just treat all
429        * component slots as used and set the bit size to 0. If there is
430        * location aliasing, we'll fail anyway later.
431        */
432       last_comp = 4;
433       base_type_bit_size = 0;
434    } else {
435       unsigned dmul = glsl_type_is_64bit(type_without_array) ? 2 : 1;
436       last_comp = component + glsl_get_vector_elements(type_without_array) * dmul;
437       base_type_bit_size =
438          glsl_base_type_get_bit_size(glsl_get_base_type(type_without_array));
439    }
440 
441    while (location < location_limit) {
442       unsigned comp = 0;
443       while (comp < 4) {
444          struct explicit_location_info *info =
445             &explicit_locations[location][comp];
446 
447          if (info->var) {
448             if (glsl_type_is_struct(glsl_without_array(info->var->type)) ||
449                 is_struct) {
450                /* Structs cannot share location since they are incompatible
451                 * with any other underlying numerical type.
452                 */
453                linker_error(prog,
454                             "%s shader has multiple %sputs sharing the "
455                             "same location that don't have the same "
456                             "underlying numerical type. Struct variable '%s', "
457                             "location %u\n",
458                             _mesa_shader_stage_to_string(stage),
459                             var->data.mode == nir_var_shader_in ? "in" : "out",
460                             is_struct ? var->name : info->var->name,
461                             location);
462                return false;
463             } else if (comp >= component && comp < last_comp) {
464                /* Component aliasing is not allowed */
465                linker_error(prog,
466                             "%s shader has multiple %sputs explicitly "
467                             "assigned to location %d and component %d\n",
468                             _mesa_shader_stage_to_string(stage),
469                             var->data.mode == nir_var_shader_in ? "in" : "out",
470                             location, comp);
471                return false;
472             } else {
473                /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
474                 * Qualifiers, Page 67, (Location aliasing):
475                 *
476                 *   " Further, when location aliasing, the aliases sharing the
477                 *     location must have the same underlying numerical type
478                 *     and bit width (floating-point or integer, 32-bit versus
479                 *     64-bit, etc.) and the same auxiliary storage and
480                 *     interpolation qualification."
481                 */
482 
483                /* If the underlying numerical type isn't integer, implicitly
484                 * it will be float or else we would have failed by now.
485                 */
486                if (info->base_type_is_integer != base_type_is_integer) {
487                   linker_error(prog,
488                                "%s shader has multiple %sputs sharing the "
489                                "same location that don't have the same "
490                                "underlying numerical type. Location %u "
491                                "component %u.\n",
492                                _mesa_shader_stage_to_string(stage),
493                                var->data.mode == nir_var_shader_in ?
494                                "in" : "out", location, comp);
495                   return false;
496                }
497 
498                if (info->base_type_bit_size != base_type_bit_size) {
499                   linker_error(prog,
500                                "%s shader has multiple %sputs sharing the "
501                                "same location that don't have the same "
502                                "underlying numerical bit size. Location %u "
503                                "component %u.\n",
504                                _mesa_shader_stage_to_string(stage),
505                                var->data.mode == nir_var_shader_in ?
506                                "in" : "out", location, comp);
507                   return false;
508                }
509 
510                if (info->interpolation != interpolation) {
511                   linker_error(prog,
512                                "%s shader has multiple %sputs sharing the "
513                                "same location that don't have the same "
514                                "interpolation qualification. Location %u "
515                                "component %u.\n",
516                                _mesa_shader_stage_to_string(stage),
517                                var->data.mode == nir_var_shader_in ?
518                                "in" : "out", location, comp);
519                   return false;
520                }
521 
522                if (info->centroid != centroid ||
523                    info->sample != sample ||
524                    info->patch != patch) {
525                   linker_error(prog,
526                                "%s shader has multiple %sputs sharing the "
527                                "same location that don't have the same "
528                                "auxiliary storage qualification. Location %u "
529                                "component %u.\n",
530                                _mesa_shader_stage_to_string(stage),
531                                var->data.mode == nir_var_shader_in ?
532                                "in" : "out", location, comp);
533                   return false;
534                }
535             }
536          } else if (comp >= component && comp < last_comp) {
537             info->var = var;
538             info->base_type_is_integer = base_type_is_integer;
539             info->base_type_bit_size = base_type_bit_size;
540             info->interpolation = interpolation;
541             info->centroid = centroid;
542             info->sample = sample;
543             info->patch = patch;
544          }
545 
546          comp++;
547 
548          /* We need to do some special handling for doubles as dvec3 and
549           * dvec4 consume two consecutive locations. We don't need to
550           * worry about components beginning at anything other than 0 as
551           * the spec does not allow this for dvec3 and dvec4.
552           */
553          if (comp == 4 && last_comp > 4) {
554             last_comp = last_comp - 4;
555             /* Bump location index and reset the component index */
556             location++;
557             comp = 0;
558             component = 0;
559          }
560       }
561 
562       location++;
563    }
564 
565    return true;
566 }
567 
568 static void
resize_input_array(nir_shader * shader,struct gl_shader_program * prog,unsigned stage,unsigned num_vertices)569 resize_input_array(nir_shader *shader, struct gl_shader_program *prog,
570                    unsigned stage, unsigned num_vertices)
571 {
572    nir_foreach_shader_in_variable(var, shader) {
573       if (!glsl_type_is_array(var->type) || var->data.patch)
574          continue;
575 
576       unsigned size = glsl_array_size(var->type);
577 
578       if (stage == MESA_SHADER_GEOMETRY) {
579          /* Generate a link error if the shader has declared this array with
580           * an incorrect size.
581           */
582          if (!var->data.implicit_sized_array &&
583              size != -1 && size != num_vertices) {
584             linker_error(prog, "size of array %s declared as %u, "
585                          "but number of input vertices is %u\n",
586                          var->name, size, num_vertices);
587             break;
588          }
589 
590          /* Generate a link error if the shader attempts to access an input
591           * array using an index too large for its actual size assigned at
592           * link time.
593           */
594          if (var->data.max_array_access >= (int)num_vertices) {
595             linker_error(prog, "%s shader accesses element %i of "
596                          "%s, but only %i input vertices\n",
597                          _mesa_shader_stage_to_string(stage),
598                          var->data.max_array_access, var->name, num_vertices);
599             break;
600          }
601       }
602 
603       var->type = glsl_array_type(var->type->fields.array, num_vertices, 0);
604       var->data.max_array_access = num_vertices - 1;
605    }
606 
607    nir_fixup_deref_types(shader);
608 }
609 
610 /**
611  * Resize tessellation evaluation per-vertex inputs to the size of
612  * tessellation control per-vertex outputs.
613  */
614 void
resize_tes_inputs(const struct gl_constants * consts,struct gl_shader_program * prog)615 resize_tes_inputs(const struct gl_constants *consts,
616                   struct gl_shader_program *prog)
617 {
618    if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL)
619       return;
620 
621    struct gl_linked_shader *tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
622    struct gl_linked_shader *tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
623 
624    /* If no control shader is present, then the TES inputs are statically
625     * sized to MaxPatchVertices; the actual size of the arrays won't be
626     * known until draw time.
627     */
628    const int num_vertices = tcs
629       ? tcs->Program->info.tess.tcs_vertices_out
630       : consts->MaxPatchVertices;
631 
632    resize_input_array(tes->Program->nir, prog, MESA_SHADER_TESS_EVAL,
633                       num_vertices);
634    if (tcs) {
635       /* Convert the gl_PatchVerticesIn system value into a constant, since
636        * the value is known at this point.
637        */
638       nir_variable *var =
639          nir_find_variable_with_location(tes->Program->nir,
640                                          nir_var_system_value,
641                                          SYSTEM_VALUE_VERTICES_IN);
642       if (var) {
643          var->data.location = 0;
644          var->data.explicit_location = false;
645          var->data.mode = nir_var_mem_constant;
646 
647          nir_constant *val = rzalloc(var, nir_constant);
648          val->values[0].i32 = num_vertices;
649          var->constant_initializer = val;
650 
651          nir_fixup_deref_modes(tes->Program->nir);
652       }
653    }
654 }
655 
656 void
set_geom_shader_input_array_size(struct gl_shader_program * prog)657 set_geom_shader_input_array_size(struct gl_shader_program *prog)
658 {
659    if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] == NULL)
660       return;
661 
662    /* Set the size of geometry shader input arrays */
663    nir_shader *nir = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program->nir;
664    unsigned num_vertices =
665       mesa_vertices_per_prim(nir->info.gs.input_primitive);
666    resize_input_array(nir, prog, MESA_SHADER_GEOMETRY, num_vertices);
667 }
668 
669 static bool
validate_explicit_variable_location(const struct gl_constants * consts,struct explicit_location_info explicit_locations[][4],nir_variable * var,struct gl_shader_program * prog,struct gl_linked_shader * sh)670 validate_explicit_variable_location(const struct gl_constants *consts,
671                                     struct explicit_location_info explicit_locations[][4],
672                                     nir_variable *var,
673                                     struct gl_shader_program *prog,
674                                     struct gl_linked_shader *sh)
675 {
676    const struct glsl_type *type = get_varying_type(var, sh->Stage);
677    unsigned num_elements = glsl_count_attribute_slots(type, false);
678    unsigned idx = compute_variable_location_slot(var, sh->Stage);
679    unsigned slot_limit = idx + num_elements;
680 
681    /* Vertex shader inputs and fragment shader outputs are validated in
682     * assign_attribute_or_color_locations() so we should not attempt to
683     * validate them again here.
684     */
685    unsigned slot_max;
686    if (var->data.mode == nir_var_shader_out) {
687       assert(sh->Stage != MESA_SHADER_FRAGMENT);
688       slot_max = consts->Program[sh->Stage].MaxOutputComponents / 4;
689    } else {
690       assert(var->data.mode == nir_var_shader_in);
691       assert(sh->Stage != MESA_SHADER_VERTEX);
692       slot_max = consts->Program[sh->Stage].MaxInputComponents / 4;
693    }
694 
695    if (slot_limit > slot_max) {
696       linker_error(prog,
697                    "Invalid location %u in %s shader\n",
698                    idx, _mesa_shader_stage_to_string(sh->Stage));
699       return false;
700    }
701 
702    const struct glsl_type *type_without_array = glsl_without_array(type);
703    if (glsl_type_is_interface(type_without_array)) {
704       for (unsigned i = 0; i < glsl_get_length(type_without_array); i++) {
705          const struct glsl_struct_field *field =
706             glsl_get_struct_field_data(type_without_array, i);
707          unsigned field_location = field->location -
708             (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
709          unsigned field_slots = glsl_count_attribute_slots(field->type, false);
710          if (!check_location_aliasing(explicit_locations, var,
711                                       field_location,
712                                       0,
713                                       field_location + field_slots,
714                                       field->type,
715                                       field->interpolation,
716                                       field->centroid,
717                                       field->sample,
718                                       field->patch,
719                                       prog, sh->Stage)) {
720             return false;
721          }
722       }
723    } else if (!check_location_aliasing(explicit_locations, var,
724                                        idx, var->data.location_frac,
725                                        slot_limit, type,
726                                        var->data.interpolation,
727                                        var->data.centroid,
728                                        var->data.sample,
729                                        var->data.patch,
730                                        prog, sh->Stage)) {
731       return false;
732    }
733 
734    return true;
735 }
736 
737 /**
738  * Validate explicit locations for the inputs to the first stage and the
739  * outputs of the last stage in a program, if those are not the VS and FS
740  * shaders.
741  */
742 void
gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants * consts,struct gl_shader_program * prog,gl_shader_stage first_stage,gl_shader_stage last_stage)743 gl_nir_validate_first_and_last_interface_explicit_locations(const struct gl_constants *consts,
744                                                             struct gl_shader_program *prog,
745                                                             gl_shader_stage first_stage,
746                                                             gl_shader_stage last_stage)
747 {
748    /* VS inputs and FS outputs are validated in
749     * assign_attribute_or_color_locations()
750     */
751    bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
752    bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
753    if (!validate_first_stage && !validate_last_stage)
754       return;
755 
756    struct explicit_location_info explicit_locations[MAX_VARYING][4];
757 
758    gl_shader_stage stages[2] = { first_stage, last_stage };
759    bool validate_stage[2] = { validate_first_stage, validate_last_stage };
760    nir_variable_mode var_mode[2] = { nir_var_shader_in, nir_var_shader_out };
761 
762    for (unsigned i = 0; i < 2; i++) {
763       if (!validate_stage[i])
764          continue;
765 
766       gl_shader_stage stage = stages[i];
767 
768       struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
769       assert(sh);
770 
771       memset(explicit_locations, 0, sizeof(explicit_locations));
772 
773       nir_foreach_variable_with_modes(var, sh->Program->nir, var_mode[i]) {
774          if (!var->data.explicit_location ||
775              var->data.location < VARYING_SLOT_VAR0)
776             continue;
777 
778          if (!validate_explicit_variable_location(consts, explicit_locations,
779                                                   var, prog, sh)) {
780             return;
781          }
782       }
783    }
784 }
785 
786 /**
787  * Check if we should force input / output matching between shader
788  * interfaces.
789  *
790  * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
791  *
792  *   "Only the input variables that are actually read need to be
793  *    written by the previous stage; it is allowed to have
794  *    superfluous declarations of input variables."
795  *
796  * However it's not defined anywhere as to how we should handle
797  * inputs that are not written in the previous stage and it's not
798  * clear what "actually read" means.
799  *
800  * The GLSL 4.20 spec however is much clearer:
801  *
802  *    "Only the input variables that are statically read need to
803  *     be written by the previous stage; it is allowed to have
804  *     superfluous declarations of input variables."
805  *
806  * It also has a table that states it is an error to statically
807  * read an input that is not defined in the previous stage. While
808  * it is not an error to not statically write to the output (it
809  * just needs to be defined to not be an error).
810  *
811  * The text in the GLSL 4.20 spec was an attempt to clarify the
812  * previous spec iterations. However given the difference in spec
813  * and that some applications seem to depend on not erroring when
814  * the input is not actually read in control flow we only apply
815  * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
816  * seen in the wild that depend on the less strict interpretation.
817  */
818 static bool
static_input_output_matching(struct gl_shader_program * prog)819 static_input_output_matching(struct gl_shader_program *prog)
820 {
821    return prog->GLSL_Version >= (prog->IsES ? 0 : 420);
822 }
823 
824 /**
825  * Validate that outputs from one stage match inputs of another
826  */
827 void
gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)828 gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
829                                         struct gl_shader_program *prog,
830                                         struct gl_linked_shader *producer,
831                                         struct gl_linked_shader *consumer)
832 {
833    struct _mesa_symbol_table *table = _mesa_symbol_table_ctor();
834    struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {0};
835    struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {0};
836 
837    /* Find all shader outputs in the "producer" stage.
838     */
839    nir_foreach_variable_with_modes(var, producer->Program->nir, nir_var_shader_out) {
840       if (!var->data.explicit_location
841           || var->data.location < VARYING_SLOT_VAR0) {
842          /* Interface block validation is handled elsewhere */
843          if (!var->interface_type || is_gl_identifier(var->name))
844             _mesa_symbol_table_add_symbol(table, var->name, var);
845 
846       } else {
847          /* User-defined varyings with explicit locations are handled
848           * differently because they do not need to have matching names.
849           */
850          if (!validate_explicit_variable_location(consts,
851                                                   output_explicit_locations,
852                                                   var, prog, producer)) {
853             goto out;
854          }
855       }
856    }
857 
858    /* Find all shader inputs in the "consumer" stage.  Any variables that have
859     * matching outputs already in the symbol table must have the same type and
860     * qualifiers.
861     *
862     * Exception: if the consumer is the geometry shader, then the inputs
863     * should be arrays and the type of the array element should match the type
864     * of the corresponding producer output.
865     */
866    nir_foreach_variable_with_modes(input, consumer->Program->nir, nir_var_shader_in) {
867       if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
868          const nir_variable *front_color =
869             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontColor");
870 
871          const nir_variable *back_color =
872             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackColor");
873 
874          cross_validate_front_and_back_color(consts, prog, input,
875                                              front_color, back_color,
876                                              consumer->Stage, producer->Stage);
877       } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
878          const nir_variable *front_color =
879             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_FrontSecondaryColor");
880 
881          const nir_variable *back_color =
882             (nir_variable *) _mesa_symbol_table_find_symbol(table, "gl_BackSecondaryColor");
883 
884          cross_validate_front_and_back_color(consts, prog, input,
885                                              front_color, back_color,
886                                              consumer->Stage, producer->Stage);
887       } else {
888          /* The rules for connecting inputs and outputs change in the presence
889           * of explicit locations.  In this case, we no longer care about the
890           * names of the variables.  Instead, we care only about the
891           * explicitly assigned location.
892           */
893          nir_variable *output = NULL;
894          if (input->data.explicit_location
895              && input->data.location >= VARYING_SLOT_VAR0) {
896 
897             const struct glsl_type *type =
898                get_varying_type(input, consumer->Stage);
899             unsigned num_elements = glsl_count_attribute_slots(type, false);
900             unsigned idx =
901                compute_variable_location_slot(input, consumer->Stage);
902             unsigned slot_limit = idx + num_elements;
903 
904             if (!validate_explicit_variable_location(consts,
905                                                      input_explicit_locations,
906                                                      input, prog, consumer)) {
907                goto out;
908             }
909 
910             while (idx < slot_limit) {
911                if (idx >= MAX_VARYING) {
912                   linker_error(prog,
913                                "Invalid location %u in %s shader\n", idx,
914                                _mesa_shader_stage_to_string(consumer->Stage));
915                   goto out;
916                }
917 
918                output = output_explicit_locations[idx][input->data.location_frac].var;
919 
920                if (output == NULL) {
921                   /* A linker failure should only happen when there is no
922                    * output declaration and there is Static Use of the
923                    * declared input.
924                    */
925                   if (input->data.used && static_input_output_matching(prog)) {
926                      linker_error(prog,
927                                   "%s shader input `%s' with explicit location "
928                                   "has no matching output\n",
929                                   _mesa_shader_stage_to_string(consumer->Stage),
930                                   input->name);
931                      break;
932                   }
933                } else if (input->data.location != output->data.location) {
934                   linker_error(prog,
935                                "%s shader input `%s' with explicit location "
936                                "has no matching output\n",
937                                _mesa_shader_stage_to_string(consumer->Stage),
938                                input->name);
939                   break;
940                }
941                idx++;
942             }
943          } else {
944             /* Interface block validation is handled elsewhere */
945             if (input->interface_type)
946                continue;
947 
948             output = (nir_variable *)
949                _mesa_symbol_table_find_symbol(table, input->name);
950          }
951 
952          if (output != NULL) {
953             /* Interface blocks have their own validation elsewhere so don't
954              * try validating them here.
955              */
956             if (!(input->interface_type && output->interface_type))
957                cross_validate_types_and_qualifiers(consts, prog, input, output,
958                                                    consumer->Stage,
959                                                    producer->Stage);
960          } else {
961             /* Check for input vars with unmatched output vars in prev stage
962              * taking into account that interface blocks could have a matching
963              * output but with different name, so we ignore them.
964              */
965             assert(!input->data.assigned);
966             if (input->data.used && !input->interface_type &&
967                 !input->data.explicit_location &&
968                 static_input_output_matching(prog))
969                linker_error(prog,
970                             "%s shader input `%s' "
971                             "has no matching output in the previous stage\n",
972                             _mesa_shader_stage_to_string(consumer->Stage),
973                             input->name);
974          }
975       }
976    }
977 
978  out:
979    _mesa_symbol_table_dtor(table);
980 }
981 
982 /**
983  * Assign locations for either VS inputs or FS outputs.
984  *
985  * \param mem_ctx        Temporary ralloc context used for linking.
986  * \param prog           Shader program whose variables need locations
987  *                       assigned.
988  * \param constants      Driver specific constant values for the program.
989  * \param target_index   Selector for the program target to receive location
990  *                       assignmnets.  Must be either \c MESA_SHADER_VERTEX or
991  *                       \c MESA_SHADER_FRAGMENT.
992  * \param do_assignment  Whether we are actually marking the assignment or we
993  *                       are just doing a dry-run checking.
994  *
995  * \return
996  * If locations are (or can be, in case of dry-running) successfully assigned,
997  * true is returned.  Otherwise an error is emitted to the shader link log and
998  * false is returned.
999  */
1000 static bool
assign_attribute_or_color_locations(void * mem_ctx,struct gl_shader_program * prog,const struct gl_constants * constants,unsigned target_index,bool do_assignment)1001 assign_attribute_or_color_locations(void *mem_ctx,
1002                                     struct gl_shader_program *prog,
1003                                     const struct gl_constants *constants,
1004                                     unsigned target_index,
1005                                     bool do_assignment)
1006 {
1007    /* Maximum number of generic locations.  This corresponds to either the
1008     * maximum number of draw buffers or the maximum number of generic
1009     * attributes.
1010     */
1011    unsigned max_index = (target_index == MESA_SHADER_VERTEX) ?
1012       constants->Program[target_index].MaxAttribs :
1013       MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers);
1014 
1015    assert(max_index <= 32);
1016    struct temp_attr to_assign[32];
1017 
1018    /* Mark invalid locations as being used.
1019     */
1020    unsigned used_locations = ~SAFE_MASK_FROM_INDEX(max_index);
1021    unsigned double_storage_locations = 0;
1022 
1023    assert((target_index == MESA_SHADER_VERTEX)
1024           || (target_index == MESA_SHADER_FRAGMENT));
1025 
1026    if (prog->_LinkedShaders[target_index] == NULL)
1027       return true;
1028 
1029    /* Operate in a total of four passes.
1030     *
1031     * 1. Invalidate the location assignments for all vertex shader inputs.
1032     *
1033     * 2. Assign locations for inputs that have user-defined (via
1034     *    glBindVertexAttribLocation) locations and outputs that have
1035     *    user-defined locations (via glBindFragDataLocation).
1036     *
1037     * 3. Sort the attributes without assigned locations by number of slots
1038     *    required in decreasing order.  Fragmentation caused by attribute
1039     *    locations assigned by the application may prevent large attributes
1040     *    from having enough contiguous space.
1041     *
1042     * 4. Assign locations to any inputs without assigned locations.
1043     */
1044 
1045    const int generic_base = (target_index == MESA_SHADER_VERTEX)
1046       ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0;
1047 
1048    nir_variable_mode io_mode =
1049       (target_index == MESA_SHADER_VERTEX)
1050       ? nir_var_shader_in : nir_var_shader_out;
1051 
1052    /* Temporary array for the set of attributes that have locations assigned,
1053     * for the purpose of checking overlapping slots/components of (non-ES)
1054     * fragment shader outputs.
1055     */
1056    nir_variable *assigned[FRAG_RESULT_MAX * 4]; /* (max # of FS outputs) * # components */
1057    unsigned assigned_attr = 0;
1058 
1059    unsigned num_attr = 0;
1060 
1061    nir_shader *shader = prog->_LinkedShaders[target_index]->Program->nir;
1062    nir_foreach_variable_with_modes(var, shader, io_mode) {
1063 
1064       if (var->data.explicit_location) {
1065          if ((var->data.location >= (int)(max_index + generic_base))
1066              || (var->data.location < 0)) {
1067             linker_error(prog,
1068                          "invalid explicit location %d specified for `%s'\n",
1069                          (var->data.location < 0)
1070                          ? var->data.location
1071                          : var->data.location - generic_base,
1072                          var->name);
1073             return false;
1074          }
1075       } else if (target_index == MESA_SHADER_VERTEX) {
1076          unsigned binding;
1077 
1078          if (string_to_uint_map_get(prog->AttributeBindings, &binding, var->name)) {
1079             assert(binding >= VERT_ATTRIB_GENERIC0);
1080             var->data.location = binding;
1081          }
1082       } else if (target_index == MESA_SHADER_FRAGMENT) {
1083          unsigned binding;
1084          unsigned index;
1085          const char *name = var->name;
1086          const struct glsl_type *type = var->type;
1087 
1088          while (type) {
1089             /* Check if there's a binding for the variable name */
1090             if (string_to_uint_map_get(prog->FragDataBindings, &binding, name)) {
1091                assert(binding >= FRAG_RESULT_DATA0);
1092                var->data.location = binding;
1093 
1094                if (string_to_uint_map_get(prog->FragDataIndexBindings, &index, name)) {
1095                   var->data.index = index;
1096                }
1097                break;
1098             }
1099 
1100             /* If not, but it's an array type, look for name[0] */
1101             if (glsl_type_is_array(type)) {
1102                name = ralloc_asprintf(mem_ctx, "%s[0]", name);
1103                type = glsl_get_array_element(type);
1104                continue;
1105             }
1106 
1107             break;
1108          }
1109       }
1110 
1111       if (strcmp(var->name, "gl_LastFragData") == 0)
1112          continue;
1113 
1114       /* From GL4.5 core spec, section 15.2 (Shader Execution):
1115        *
1116        *     "Output binding assignments will cause LinkProgram to fail:
1117        *     ...
1118        *     If the program has an active output assigned to a location greater
1119        *     than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has
1120        *     an active output assigned an index greater than or equal to one;"
1121        */
1122       if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 &&
1123           var->data.location - generic_base >=
1124           (int) constants->MaxDualSourceDrawBuffers) {
1125          linker_error(prog,
1126                       "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS "
1127                       "with index %u for %s\n",
1128                       var->data.location - generic_base, var->data.index,
1129                       var->name);
1130          return false;
1131       }
1132 
1133       const unsigned slots =
1134          glsl_count_attribute_slots(var->type,
1135                                     target_index == MESA_SHADER_VERTEX);
1136 
1137       /* If the variable is not a built-in and has a location statically
1138        * assigned in the shader (presumably via a layout qualifier), make sure
1139        * that it doesn't collide with other assigned locations.  Otherwise,
1140        * add it to the list of variables that need linker-assigned locations.
1141        */
1142       if (var->data.location != -1) {
1143          if (var->data.location >= generic_base && var->data.index < 1) {
1144             /* From page 61 of the OpenGL 4.0 spec:
1145              *
1146              *     "LinkProgram will fail if the attribute bindings assigned
1147              *     by BindAttribLocation do not leave not enough space to
1148              *     assign a location for an active matrix attribute or an
1149              *     active attribute array, both of which require multiple
1150              *     contiguous generic attributes."
1151              *
1152              * I think above text prohibits the aliasing of explicit and
1153              * automatic assignments. But, aliasing is allowed in manual
1154              * assignments of attribute locations. See below comments for
1155              * the details.
1156              *
1157              * From OpenGL 4.0 spec, page 61:
1158              *
1159              *     "It is possible for an application to bind more than one
1160              *     attribute name to the same location. This is referred to as
1161              *     aliasing. This will only work if only one of the aliased
1162              *     attributes is active in the executable program, or if no
1163              *     path through the shader consumes more than one attribute of
1164              *     a set of attributes aliased to the same location. A link
1165              *     error can occur if the linker determines that every path
1166              *     through the shader consumes multiple aliased attributes,
1167              *     but implementations are not required to generate an error
1168              *     in this case."
1169              *
1170              * From GLSL 4.30 spec, page 54:
1171              *
1172              *    "A program will fail to link if any two non-vertex shader
1173              *     input variables are assigned to the same location. For
1174              *     vertex shaders, multiple input variables may be assigned
1175              *     to the same location using either layout qualifiers or via
1176              *     the OpenGL API. However, such aliasing is intended only to
1177              *     support vertex shaders where each execution path accesses
1178              *     at most one input per each location. Implementations are
1179              *     permitted, but not required, to generate link-time errors
1180              *     if they detect that every path through the vertex shader
1181              *     executable accesses multiple inputs assigned to any single
1182              *     location. For all shader types, a program will fail to link
1183              *     if explicit location assignments leave the linker unable
1184              *     to find space for other variables without explicit
1185              *     assignments."
1186              *
1187              * From OpenGL ES 3.0 spec, page 56:
1188              *
1189              *    "Binding more than one attribute name to the same location
1190              *     is referred to as aliasing, and is not permitted in OpenGL
1191              *     ES Shading Language 3.00 vertex shaders. LinkProgram will
1192              *     fail when this condition exists. However, aliasing is
1193              *     possible in OpenGL ES Shading Language 1.00 vertex shaders.
1194              *     This will only work if only one of the aliased attributes
1195              *     is active in the executable program, or if no path through
1196              *     the shader consumes more than one attribute of a set of
1197              *     attributes aliased to the same location. A link error can
1198              *     occur if the linker determines that every path through the
1199              *     shader consumes multiple aliased attributes, but implemen-
1200              *     tations are not required to generate an error in this case."
1201              *
1202              * After looking at above references from OpenGL, OpenGL ES and
1203              * GLSL specifications, we allow aliasing of vertex input variables
1204              * in: OpenGL 2.0 (and above) and OpenGL ES 2.0.
1205              *
1206              * NOTE: This is not required by the spec but its worth mentioning
1207              * here that we're not doing anything to make sure that no path
1208              * through the vertex shader executable accesses multiple inputs
1209              * assigned to any single location.
1210              */
1211 
1212             /* Mask representing the contiguous slots that will be used by
1213              * this attribute.
1214              */
1215             const unsigned attr = var->data.location - generic_base;
1216             const unsigned use_mask = (1 << slots) - 1;
1217             const char *const string = (target_index == MESA_SHADER_VERTEX)
1218                ? "vertex shader input" : "fragment shader output";
1219 
1220             /* Generate a link error if the requested locations for this
1221              * attribute exceed the maximum allowed attribute location.
1222              */
1223             if (attr + slots > max_index) {
1224                linker_error(prog,
1225                            "insufficient contiguous locations "
1226                            "available for %s `%s' %d %d %d\n", string,
1227                            var->name, used_locations, use_mask, attr);
1228                return false;
1229             }
1230 
1231             /* Generate a link error if the set of bits requested for this
1232              * attribute overlaps any previously allocated bits.
1233              */
1234             if ((~(use_mask << attr) & used_locations) != used_locations) {
1235                if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1236                   /* From section 4.4.2 (Output Layout Qualifiers) of the GLSL
1237                    * 4.40 spec:
1238                    *
1239                    *    "Additionally, for fragment shader outputs, if two
1240                    *    variables are placed within the same location, they
1241                    *    must have the same underlying type (floating-point or
1242                    *    integer). No component aliasing of output variables or
1243                    *    members is allowed.
1244                    */
1245                   for (unsigned i = 0; i < assigned_attr; i++) {
1246                      unsigned assigned_slots =
1247                         glsl_count_attribute_slots(assigned[i]->type, false);
1248                      unsigned assig_attr =
1249                         assigned[i]->data.location - generic_base;
1250                      unsigned assigned_use_mask = (1 << assigned_slots) - 1;
1251 
1252                      if ((assigned_use_mask << assig_attr) &
1253                          (use_mask << attr)) {
1254 
1255                         const struct glsl_type *assigned_type =
1256                            glsl_without_array(assigned[i]->type);
1257                         const struct glsl_type *type =
1258                            glsl_without_array(var->type);
1259                         if (glsl_get_base_type(assigned_type) !=
1260                             glsl_get_base_type(type)) {
1261                            linker_error(prog, "types do not match for aliased"
1262                                         " %ss %s and %s\n", string,
1263                                         assigned[i]->name, var->name);
1264                            return false;
1265                         }
1266 
1267                         unsigned assigned_component_mask =
1268                            ((1 << glsl_get_vector_elements(assigned_type)) - 1) <<
1269                            assigned[i]->data.location_frac;
1270                         unsigned component_mask =
1271                            ((1 << glsl_get_vector_elements(type)) - 1) <<
1272                            var->data.location_frac;
1273                         if (assigned_component_mask & component_mask) {
1274                            linker_error(prog, "overlapping component is "
1275                                         "assigned to %ss %s and %s "
1276                                         "(component=%d)\n",
1277                                         string, assigned[i]->name, var->name,
1278                                         var->data.location_frac);
1279                            return false;
1280                         }
1281                      }
1282                   }
1283                } else if (target_index == MESA_SHADER_FRAGMENT ||
1284                           (prog->IsES && prog->GLSL_Version >= 300)) {
1285                   linker_error(prog, "overlapping location is assigned "
1286                                "to %s `%s' %d %d %d\n", string, var->name,
1287                                used_locations, use_mask, attr);
1288                   return false;
1289                } else {
1290                   linker_warning(prog, "overlapping location is assigned "
1291                                  "to %s `%s' %d %d %d\n", string, var->name,
1292                                  used_locations, use_mask, attr);
1293                }
1294             }
1295 
1296             if (target_index == MESA_SHADER_FRAGMENT && !prog->IsES) {
1297                /* Only track assigned variables for non-ES fragment shaders
1298                 * to avoid overflowing the array.
1299                 *
1300                 * At most one variable per fragment output component should
1301                 * reach this.
1302                 */
1303                assert(assigned_attr < ARRAY_SIZE(assigned));
1304                assigned[assigned_attr] = var;
1305                assigned_attr++;
1306             }
1307 
1308             used_locations |= (use_mask << attr);
1309 
1310             /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
1311              *
1312              * "A program with more than the value of MAX_VERTEX_ATTRIBS
1313              *  active attribute variables may fail to link, unless
1314              *  device-dependent optimizations are able to make the program
1315              *  fit within available hardware resources. For the purposes
1316              *  of this test, attribute variables of the type dvec3, dvec4,
1317              *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
1318              *  count as consuming twice as many attributes as equivalent
1319              *  single-precision types. While these types use the same number
1320              *  of generic attributes as their single-precision equivalents,
1321              *  implementations are permitted to consume two single-precision
1322              *  vectors of internal storage for each three- or four-component
1323              *  double-precision vector."
1324              *
1325              * Mark this attribute slot as taking up twice as much space
1326              * so we can count it properly against limits.  According to
1327              * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
1328              * is optional behavior, but it seems preferable.
1329              */
1330             if (glsl_type_is_dual_slot(glsl_without_array(var->type)))
1331                double_storage_locations |= (use_mask << attr);
1332          }
1333 
1334          continue;
1335       }
1336 
1337       if (num_attr >= max_index) {
1338          linker_error(prog, "too many %s (max %u)",
1339                       target_index == MESA_SHADER_VERTEX ?
1340                       "vertex shader inputs" : "fragment shader outputs",
1341                       max_index);
1342          return false;
1343       }
1344       to_assign[num_attr].slots = slots;
1345       to_assign[num_attr].var = var;
1346       to_assign[num_attr].original_idx = num_attr;
1347       num_attr++;
1348    }
1349 
1350    if (!do_assignment)
1351       return true;
1352 
1353    if (target_index == MESA_SHADER_VERTEX) {
1354       unsigned total_attribs_size =
1355          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1356          util_bitcount(double_storage_locations);
1357       if (total_attribs_size > max_index) {
1358          linker_error(prog,
1359                       "attempt to use %d vertex attribute slots only %d available ",
1360                       total_attribs_size, max_index);
1361          return false;
1362       }
1363    }
1364 
1365    /* If all of the attributes were assigned locations by the application (or
1366     * are built-in attributes with fixed locations), return early.  This should
1367     * be the common case.
1368     */
1369    if (num_attr == 0)
1370       return true;
1371 
1372    qsort(to_assign, num_attr, sizeof(to_assign[0]), &compare_attr);
1373 
1374    if (target_index == MESA_SHADER_VERTEX) {
1375       /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS.  It can
1376        * only be explicitly assigned by via glBindAttribLocation.  Mark it as
1377        * reserved to prevent it from being automatically allocated below.
1378        */
1379       if (find_deref(shader, "gl_Vertex"))
1380          used_locations |= (1 << 0);
1381    }
1382 
1383    for (unsigned i = 0; i < num_attr; i++) {
1384       /* Mask representing the contiguous slots that will be used by this
1385        * attribute.
1386        */
1387       const unsigned use_mask = (1 << to_assign[i].slots) - 1;
1388 
1389       int location = find_available_slots(used_locations, to_assign[i].slots);
1390 
1391       if (location < 0) {
1392          const char *const string = (target_index == MESA_SHADER_VERTEX)
1393             ? "vertex shader input" : "fragment shader output";
1394 
1395          linker_error(prog,
1396                       "insufficient contiguous locations "
1397                       "available for %s `%s'\n",
1398                       string, to_assign[i].var->name);
1399          return false;
1400       }
1401 
1402       to_assign[i].var->data.location = generic_base + location;
1403       used_locations |= (use_mask << location);
1404 
1405       if (glsl_type_is_dual_slot(glsl_without_array(to_assign[i].var->type)))
1406          double_storage_locations |= (use_mask << location);
1407    }
1408 
1409    /* Now that we have all the locations, from the GL 4.5 core spec, section
1410     * 11.1.1 (Vertex Attributes), dvec3, dvec4, dmat2x3, dmat2x4, dmat3,
1411     * dmat3x4, dmat4x3, and dmat4 count as consuming twice as many attributes
1412     * as equivalent single-precision types.
1413     */
1414    if (target_index == MESA_SHADER_VERTEX) {
1415       unsigned total_attribs_size =
1416          util_bitcount(used_locations & SAFE_MASK_FROM_INDEX(max_index)) +
1417          util_bitcount(double_storage_locations);
1418       if (total_attribs_size > max_index) {
1419          linker_error(prog,
1420                       "attempt to use %d vertex attribute slots only %d available ",
1421                       total_attribs_size, max_index);
1422          return false;
1423       }
1424    }
1425 
1426    return true;
1427 }
1428 
1429 static bool
varying_has_user_specified_location(const nir_variable * var)1430 varying_has_user_specified_location(const nir_variable *var)
1431 {
1432    return var->data.explicit_location &&
1433       var->data.location >= VARYING_SLOT_VAR0;
1434 }
1435 
1436 static void
create_xfb_varying_names(void * mem_ctx,const struct glsl_type * t,char ** name,size_t name_length,unsigned * count,const char * ifc_member_name,const struct glsl_type * ifc_member_t,char *** varying_names)1437 create_xfb_varying_names(void *mem_ctx, const struct glsl_type *t, char **name,
1438                          size_t name_length, unsigned *count,
1439                          const char *ifc_member_name,
1440                          const struct glsl_type *ifc_member_t,
1441                          char ***varying_names)
1442 {
1443    if (glsl_type_is_interface(t)) {
1444       size_t new_length = name_length;
1445 
1446       assert(ifc_member_name && ifc_member_t);
1447       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
1448 
1449       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
1450                                NULL, NULL, varying_names);
1451    } else if (glsl_type_is_struct(t)) {
1452       for (unsigned i = 0; i < glsl_get_length(t); i++) {
1453          const char *field = glsl_get_struct_elem_name(t, i);
1454          size_t new_length = name_length;
1455 
1456          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
1457 
1458          create_xfb_varying_names(mem_ctx, glsl_get_struct_field(t, i), name,
1459                                   new_length, count, NULL, NULL,
1460                                   varying_names);
1461       }
1462    } else if (glsl_type_is_struct(glsl_without_array(t)) ||
1463               glsl_type_is_interface(glsl_without_array(t)) ||
1464               (glsl_type_is_array(t) && glsl_type_is_array(glsl_get_array_element(t)))) {
1465       for (unsigned i = 0; i < glsl_get_length(t); i++) {
1466          size_t new_length = name_length;
1467 
1468          /* Append the subscript to the current variable name */
1469          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
1470 
1471          create_xfb_varying_names(mem_ctx, glsl_get_array_element(t), name,
1472                                   new_length, count, ifc_member_name,
1473                                   ifc_member_t, varying_names);
1474       }
1475    } else {
1476       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
1477    }
1478 }
1479 
1480 static bool
process_xfb_layout_qualifiers(void * mem_ctx,const struct gl_linked_shader * sh,struct gl_shader_program * prog,unsigned * num_xfb_decls,char *** varying_names,bool * compact_arrays)1481 process_xfb_layout_qualifiers(void *mem_ctx, const struct gl_linked_shader *sh,
1482                               struct gl_shader_program *prog,
1483                               unsigned *num_xfb_decls,
1484                               char ***varying_names,
1485                               bool *compact_arrays)
1486 {
1487    bool has_xfb_qualifiers = false;
1488 
1489    /* We still need to enable transform feedback mode even if xfb_stride is
1490     * only applied to a global out. Also we don't bother to propagate
1491     * xfb_stride to interface block members so this will catch that case also.
1492     */
1493    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1494       if (prog->TransformFeedback.BufferStride[j]) {
1495          has_xfb_qualifiers = true;
1496          break;
1497       }
1498    }
1499 
1500    *compact_arrays = sh->Program->nir->options->compact_arrays;
1501    nir_foreach_shader_out_variable(var, sh->Program->nir) {
1502       /* From the ARB_enhanced_layouts spec:
1503        *
1504        *    "Any shader making any static use (after preprocessing) of any of
1505        *     these *xfb_* qualifiers will cause the shader to be in a
1506        *     transform feedback capturing mode and hence responsible for
1507        *     describing the transform feedback setup.  This mode will capture
1508        *     any output selected by *xfb_offset*, directly or indirectly, to
1509        *     a transform feedback buffer."
1510        */
1511       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
1512          has_xfb_qualifiers = true;
1513       }
1514 
1515       if (var->data.explicit_offset) {
1516          *num_xfb_decls += glsl_varying_count(var->type);
1517          has_xfb_qualifiers = true;
1518       }
1519    }
1520 
1521    if (*num_xfb_decls == 0)
1522       return has_xfb_qualifiers;
1523 
1524 
1525    unsigned i = 0;
1526    *varying_names = ralloc_array(mem_ctx, char *, *num_xfb_decls);
1527    nir_foreach_shader_out_variable(var, sh->Program->nir) {
1528       if (var->data.explicit_offset) {
1529          char *name;
1530          const struct glsl_type *type, *member_type;
1531 
1532          if (var->data.from_named_ifc_block) {
1533             type = var->interface_type;
1534 
1535             /* Find the member type before it was altered by lowering */
1536             const struct glsl_type *type_wa = glsl_without_array(type);
1537             member_type =
1538                glsl_get_struct_field(type_wa, glsl_get_field_index(type_wa, var->name));
1539             name = ralloc_strdup(NULL, glsl_get_type_name(type_wa));
1540          } else {
1541             type = var->type;
1542             member_type = NULL;
1543             name = ralloc_strdup(NULL, var->name);
1544          }
1545          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
1546                                   var->name, member_type, varying_names);
1547          ralloc_free(name);
1548       }
1549    }
1550 
1551    assert(i == *num_xfb_decls);
1552    return has_xfb_qualifiers;
1553 }
1554 
1555 /**
1556  * Initialize this struct based on a string that was passed to
1557  * glTransformFeedbackVaryings.
1558  *
1559  * If the input is mal-formed, this call still succeeds, but it sets
1560  * this->var_name to a mal-formed input, so xfb_decl_find_output_var()
1561  * will fail to find any matching variable.
1562  */
1563 static void
xfb_decl_init(struct xfb_decl * xfb_decl,const struct gl_constants * consts,const struct gl_extensions * exts,const void * mem_ctx,const char * input,bool compact_arrays)1564 xfb_decl_init(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1565               const struct gl_extensions *exts, const void *mem_ctx,
1566               const char *input, bool compact_arrays)
1567 {
1568    /* We don't have to be pedantic about what is a valid GLSL variable name,
1569     * because any variable with an invalid name can't exist in the IR anyway.
1570     */
1571    xfb_decl->location = -1;
1572    xfb_decl->orig_name = input;
1573    xfb_decl->lowered_builtin_array_variable = none;
1574    xfb_decl->skip_components = 0;
1575    xfb_decl->next_buffer_separator = false;
1576    xfb_decl->matched_candidate = NULL;
1577    xfb_decl->stream_id = 0;
1578    xfb_decl->buffer = 0;
1579    xfb_decl->offset = 0;
1580 
1581    if (exts->ARB_transform_feedback3) {
1582       /* Parse gl_NextBuffer. */
1583       if (strcmp(input, "gl_NextBuffer") == 0) {
1584          xfb_decl->next_buffer_separator = true;
1585          return;
1586       }
1587 
1588       /* Parse gl_SkipComponents. */
1589       if (strcmp(input, "gl_SkipComponents1") == 0)
1590          xfb_decl->skip_components = 1;
1591       else if (strcmp(input, "gl_SkipComponents2") == 0)
1592          xfb_decl->skip_components = 2;
1593       else if (strcmp(input, "gl_SkipComponents3") == 0)
1594          xfb_decl->skip_components = 3;
1595       else if (strcmp(input, "gl_SkipComponents4") == 0)
1596          xfb_decl->skip_components = 4;
1597 
1598       if (xfb_decl->skip_components)
1599          return;
1600    }
1601 
1602    /* Parse a declaration. */
1603    const char *base_name_end;
1604    long subscript = link_util_parse_program_resource_name(input, strlen(input),
1605                                                           &base_name_end);
1606    xfb_decl->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1607    if (xfb_decl->var_name == NULL) {
1608       _mesa_error_no_memory(__func__);
1609       return;
1610    }
1611 
1612    if (subscript >= 0) {
1613       xfb_decl->array_subscript = subscript;
1614       xfb_decl->is_subscripted = true;
1615    } else {
1616       xfb_decl->is_subscripted = false;
1617    }
1618 
1619    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1620     * class must behave specially to account for the fact that gl_ClipDistance
1621     * is converted from a float[8] to a vec4[2].
1622     */
1623    if (!compact_arrays &&
1624        strcmp(xfb_decl->var_name, "gl_ClipDistance") == 0) {
1625       xfb_decl->lowered_builtin_array_variable = clip_distance;
1626    }
1627    if (!compact_arrays &&
1628        strcmp(xfb_decl->var_name, "gl_CullDistance") == 0) {
1629       xfb_decl->lowered_builtin_array_variable = cull_distance;
1630    }
1631 }
1632 
1633 /**
1634  * Determine whether two xfb_decl structs refer to the same variable and
1635  * array index (if applicable).
1636  */
1637 static bool
xfb_decl_is_same(const struct xfb_decl * x,const struct xfb_decl * y)1638 xfb_decl_is_same(const struct xfb_decl *x, const struct xfb_decl *y)
1639 {
1640    assert(xfb_decl_is_varying(x) && xfb_decl_is_varying(y));
1641 
1642    if (strcmp(x->var_name, y->var_name) != 0)
1643       return false;
1644    if (x->is_subscripted != y->is_subscripted)
1645       return false;
1646    if (x->is_subscripted && x->array_subscript != y->array_subscript)
1647       return false;
1648    return true;
1649 }
1650 
1651 /**
1652  * The total number of varying components taken up by this variable.  Only
1653  * valid if assign_location() has been called.
1654  */
1655 static unsigned
xfb_decl_num_components(struct xfb_decl * xfb_decl)1656 xfb_decl_num_components(struct xfb_decl *xfb_decl)
1657 {
1658    if (xfb_decl->lowered_builtin_array_variable)
1659       return xfb_decl->size;
1660    else
1661       return xfb_decl->vector_elements * xfb_decl->matrix_columns *
1662          xfb_decl->size * (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1663 }
1664 
1665 /**
1666  * Assign a location and stream ID for this xfb_decl object based on the
1667  * transform feedback candidate found by find_candidate.
1668  *
1669  * If an error occurs, the error is reported through linker_error() and false
1670  * is returned.
1671  */
1672 static bool
xfb_decl_assign_location(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,bool disable_varying_packing,bool xfb_enabled)1673 xfb_decl_assign_location(struct xfb_decl *xfb_decl,
1674                          const struct gl_constants *consts,
1675                          struct gl_shader_program *prog,
1676                          bool disable_varying_packing, bool xfb_enabled)
1677 {
1678    assert(xfb_decl_is_varying(xfb_decl));
1679 
1680    unsigned fine_location
1681       = xfb_decl->matched_candidate->toplevel_var->data.location * 4
1682       + xfb_decl->matched_candidate->toplevel_var->data.location_frac
1683       + xfb_decl->matched_candidate->struct_offset_floats;
1684    const unsigned dmul =
1685       glsl_type_is_64bit(glsl_without_array(xfb_decl->matched_candidate->type)) ? 2 : 1;
1686 
1687    if (glsl_type_is_array(xfb_decl->matched_candidate->type)) {
1688       /* Array variable */
1689       const struct glsl_type *element_type =
1690          glsl_get_array_element(xfb_decl->matched_candidate->type);
1691       const unsigned matrix_cols = glsl_get_matrix_columns(element_type);
1692       const unsigned vector_elements = glsl_get_vector_elements(element_type);
1693       unsigned actual_array_size;
1694       switch (xfb_decl->lowered_builtin_array_variable) {
1695       case clip_distance:
1696          actual_array_size = prog->last_vert_prog ?
1697             prog->last_vert_prog->nir->info.clip_distance_array_size : 0;
1698          break;
1699       case cull_distance:
1700          actual_array_size = prog->last_vert_prog ?
1701             prog->last_vert_prog->nir->info.cull_distance_array_size : 0;
1702          break;
1703       case none:
1704       default:
1705          actual_array_size = glsl_array_size(xfb_decl->matched_candidate->type);
1706          break;
1707       }
1708 
1709       if (xfb_decl->is_subscripted) {
1710          /* Check array bounds. */
1711          if (xfb_decl->array_subscript >= actual_array_size) {
1712             linker_error(prog, "Transform feedback varying %s has index "
1713                          "%i, but the array size is %u.",
1714                          xfb_decl->orig_name, xfb_decl->array_subscript,
1715                          actual_array_size);
1716             return false;
1717          }
1718 
1719          bool array_will_be_lowered =
1720             lower_packed_varying_needs_lowering(prog->last_vert_prog->nir,
1721                                                 xfb_decl->matched_candidate->toplevel_var,
1722                                                 nir_var_shader_out,
1723                                                 disable_varying_packing,
1724                                                 xfb_enabled) ||
1725             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_ClipDistance") == 0 ||
1726             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_CullDistance") == 0 ||
1727             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelInner") == 0 ||
1728             strcmp(xfb_decl->matched_candidate->toplevel_var->name, "gl_TessLevelOuter") == 0;
1729 
1730          unsigned array_elem_size = xfb_decl->lowered_builtin_array_variable ?
1731             1 : (array_will_be_lowered ? vector_elements : 4) * matrix_cols * dmul;
1732          fine_location += array_elem_size * xfb_decl->array_subscript;
1733          xfb_decl->size = 1;
1734       } else {
1735          xfb_decl->size = actual_array_size;
1736       }
1737       xfb_decl->vector_elements = vector_elements;
1738       xfb_decl->matrix_columns = matrix_cols;
1739       if (xfb_decl->lowered_builtin_array_variable)
1740          xfb_decl->type = GL_FLOAT;
1741       else
1742          xfb_decl->type = glsl_get_gl_type(element_type);
1743    } else {
1744       /* Regular variable (scalar, vector, or matrix) */
1745       if (xfb_decl->is_subscripted) {
1746          linker_error(prog, "Transform feedback varying %s requested, "
1747                       "but %s is not an array.",
1748                       xfb_decl->orig_name, xfb_decl->var_name);
1749          return false;
1750       }
1751       xfb_decl->size = 1;
1752       xfb_decl->vector_elements = glsl_get_vector_elements(xfb_decl->matched_candidate->type);
1753       xfb_decl->matrix_columns = glsl_get_matrix_columns(xfb_decl->matched_candidate->type);
1754       xfb_decl->type = glsl_get_gl_type(xfb_decl->matched_candidate->type);
1755    }
1756    xfb_decl->location = fine_location / 4;
1757    xfb_decl->location_frac = fine_location % 4;
1758 
1759    /* From GL_EXT_transform_feedback:
1760     *   A program will fail to link if:
1761     *
1762     *   * the total number of components to capture in any varying
1763     *     variable in <varyings> is greater than the constant
1764     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1765     *     buffer mode is SEPARATE_ATTRIBS_EXT;
1766     */
1767    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1768        xfb_decl_num_components(xfb_decl) >
1769        consts->MaxTransformFeedbackSeparateComponents) {
1770       linker_error(prog, "Transform feedback varying %s exceeds "
1771                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1772                    xfb_decl->orig_name);
1773       return false;
1774    }
1775 
1776    /* Only transform feedback varyings can be assigned to non-zero streams,
1777     * so assign the stream id here.
1778     */
1779    xfb_decl->stream_id = xfb_decl->matched_candidate->toplevel_var->data.stream;
1780 
1781    unsigned array_offset = xfb_decl->array_subscript * 4 * dmul;
1782    unsigned struct_offset = xfb_decl->matched_candidate->xfb_offset_floats * 4;
1783    xfb_decl->buffer = xfb_decl->matched_candidate->toplevel_var->data.xfb.buffer;
1784    xfb_decl->offset = xfb_decl->matched_candidate->toplevel_var->data.offset +
1785       array_offset + struct_offset;
1786 
1787    return true;
1788 }
1789 
1790 static unsigned
xfb_decl_get_num_outputs(struct xfb_decl * xfb_decl)1791 xfb_decl_get_num_outputs(struct xfb_decl *xfb_decl)
1792 {
1793    if (!xfb_decl_is_varying(xfb_decl)) {
1794       return 0;
1795    }
1796 
1797    if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1798       unsigned dmul = _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1;
1799       unsigned rows_per_element = DIV_ROUND_UP(xfb_decl->vector_elements * dmul, 4);
1800       return xfb_decl->size * xfb_decl->matrix_columns * rows_per_element;
1801    } else {
1802       return (xfb_decl_num_components(xfb_decl) + xfb_decl->location_frac + 3) / 4;
1803    }
1804 }
1805 
1806 static bool
xfb_decl_is_varying_written(struct xfb_decl * xfb_decl)1807 xfb_decl_is_varying_written(struct xfb_decl *xfb_decl)
1808 {
1809    if (xfb_decl->next_buffer_separator || xfb_decl->skip_components)
1810       return false;
1811 
1812    return xfb_decl->matched_candidate->toplevel_var->data.assigned;
1813 }
1814 
1815 /**
1816  * Update gl_transform_feedback_info to reflect this xfb_decl.
1817  *
1818  * If an error occurs, the error is reported through linker_error() and false
1819  * is returned.
1820  */
1821 static bool
xfb_decl_store(struct xfb_decl * xfb_decl,const struct gl_constants * consts,struct gl_shader_program * prog,struct gl_transform_feedback_info * info,unsigned buffer,unsigned buffer_index,const unsigned max_outputs,BITSET_WORD * used_components[MAX_FEEDBACK_BUFFERS],bool * explicit_stride,unsigned * max_member_alignment,bool has_xfb_qualifiers,const void * mem_ctx)1822 xfb_decl_store(struct xfb_decl *xfb_decl, const struct gl_constants *consts,
1823                struct gl_shader_program *prog,
1824                struct gl_transform_feedback_info *info,
1825                unsigned buffer, unsigned buffer_index,
1826                const unsigned max_outputs,
1827                BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1828                bool *explicit_stride, unsigned *max_member_alignment,
1829                bool has_xfb_qualifiers, const void* mem_ctx)
1830 {
1831    unsigned xfb_offset = 0;
1832    unsigned size = xfb_decl->size;
1833    /* Handle gl_SkipComponents. */
1834    if (xfb_decl->skip_components) {
1835       info->Buffers[buffer].Stride += xfb_decl->skip_components;
1836       size = xfb_decl->skip_components;
1837       goto store_varying;
1838    }
1839 
1840    if (xfb_decl->next_buffer_separator) {
1841       size = 0;
1842       goto store_varying;
1843    }
1844 
1845    if (has_xfb_qualifiers) {
1846       xfb_offset = xfb_decl->offset / 4;
1847    } else {
1848       xfb_offset = info->Buffers[buffer].Stride;
1849    }
1850    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1851 
1852    {
1853       unsigned location = xfb_decl->location;
1854       unsigned location_frac = xfb_decl->location_frac;
1855       unsigned num_components = xfb_decl_num_components(xfb_decl);
1856 
1857       /* From GL_EXT_transform_feedback:
1858        *
1859        *   " A program will fail to link if:
1860        *
1861        *       * the total number of components to capture is greater than the
1862        *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1863        *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1864        *
1865        * From GL_ARB_enhanced_layouts:
1866        *
1867        *   " The resulting stride (implicit or explicit) must be less than or
1868        *     equal to the implementation-dependent constant
1869        *     gl_MaxTransformFeedbackInterleavedComponents."
1870        */
1871       if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1872            has_xfb_qualifiers) &&
1873           xfb_offset + num_components >
1874           consts->MaxTransformFeedbackInterleavedComponents) {
1875          linker_error(prog,
1876                       "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1877                       "limit has been exceeded.");
1878          return false;
1879       }
1880 
1881       /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1882        * Page 76, (Transform Feedback Layout Qualifiers):
1883        *
1884        *   " No aliasing in output buffers is allowed: It is a compile-time or
1885        *     link-time error to specify variables with overlapping transform
1886        *     feedback offsets."
1887        */
1888       const unsigned max_components =
1889          consts->MaxTransformFeedbackInterleavedComponents;
1890       const unsigned first_component = xfb_offset;
1891       const unsigned last_component = xfb_offset + num_components - 1;
1892       const unsigned start_word = BITSET_BITWORD(first_component);
1893       const unsigned end_word = BITSET_BITWORD(last_component);
1894       BITSET_WORD *used;
1895       assert(last_component < max_components);
1896 
1897       if (!used_components[buffer]) {
1898          used_components[buffer] =
1899             rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1900       }
1901       used = used_components[buffer];
1902 
1903       for (unsigned word = start_word; word <= end_word; word++) {
1904          unsigned start_range = 0;
1905          unsigned end_range = BITSET_WORDBITS - 1;
1906 
1907          if (word == start_word)
1908             start_range = first_component % BITSET_WORDBITS;
1909 
1910          if (word == end_word)
1911             end_range = last_component % BITSET_WORDBITS;
1912 
1913          if (used[word] & BITSET_RANGE(start_range, end_range)) {
1914             linker_error(prog,
1915                          "variable '%s', xfb_offset (%d) is causing aliasing.",
1916                          xfb_decl->orig_name, xfb_offset * 4);
1917             return false;
1918          }
1919          used[word] |= BITSET_RANGE(start_range, end_range);
1920       }
1921 
1922       const unsigned type_num_components =
1923          xfb_decl->vector_elements *
1924          (_mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
1925       unsigned current_type_components_left = type_num_components;
1926 
1927       while (num_components > 0) {
1928          unsigned output_size = 0;
1929 
1930          /*  From GL_ARB_enhanced_layouts:
1931           *
1932           * "When an attribute variable declared using an array type is bound to
1933           * generic attribute index <i>, the active array elements are assigned to
1934           * consecutive generic attributes beginning with generic attribute <i>.  The
1935           * number of attributes and components assigned to each element are
1936           * determined according to the data type of array elements and "component"
1937           * layout qualifier (if any) specified in the declaration of the array."
1938           *
1939           * "When an attribute variable declared using a matrix type is bound to a
1940           * generic attribute index <i>, its values are taken from consecutive generic
1941           * attributes beginning with generic attribute <i>.  Such matrices are
1942           * treated as an array of column vectors with values taken from the generic
1943           * attributes.
1944           * This means there may be gaps in the varyings we are taking values from."
1945           *
1946           * Examples:
1947           *
1948           * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1949           * |                                |                               |
1950           * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
1951           * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
1952           * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
1953           * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
1954           * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
1955           *
1956           */
1957          if (varying_has_user_specified_location(xfb_decl->matched_candidate->toplevel_var)) {
1958             output_size = MIN3(num_components, current_type_components_left, 4);
1959             current_type_components_left -= output_size;
1960             if (current_type_components_left == 0) {
1961                current_type_components_left = type_num_components;
1962             }
1963          } else {
1964             output_size = MIN2(num_components, 4 - location_frac);
1965          }
1966 
1967          assert((info->NumOutputs == 0 && max_outputs == 0) ||
1968                 info->NumOutputs < max_outputs);
1969 
1970          /* From the ARB_enhanced_layouts spec:
1971           *
1972           *    "If such a block member or variable is not written during a shader
1973           *    invocation, the buffer contents at the assigned offset will be
1974           *    undefined.  Even if there are no static writes to a variable or
1975           *    member that is assigned a transform feedback offset, the space is
1976           *    still allocated in the buffer and still affects the stride."
1977           */
1978          if (xfb_decl_is_varying_written(xfb_decl)) {
1979             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1980             info->Outputs[info->NumOutputs].OutputRegister = location;
1981             info->Outputs[info->NumOutputs].NumComponents = output_size;
1982             info->Outputs[info->NumOutputs].StreamId = xfb_decl->stream_id;
1983             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1984             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1985             ++info->NumOutputs;
1986          }
1987          info->Buffers[buffer].Stream = xfb_decl->stream_id;
1988          xfb_offset += output_size;
1989 
1990          num_components -= output_size;
1991          location++;
1992          location_frac = 0;
1993       }
1994    }
1995 
1996    if (explicit_stride && explicit_stride[buffer]) {
1997       if (_mesa_gl_datatype_is_64bit(xfb_decl->type) &&
1998           info->Buffers[buffer].Stride % 2) {
1999          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
2000                       "multiple of 8 as its applied to a type that is or "
2001                       "contains a double.",
2002                       info->Buffers[buffer].Stride * 4);
2003          return false;
2004       }
2005 
2006       if (xfb_offset > info->Buffers[buffer].Stride) {
2007          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
2008                       "buffer (%d)", xfb_offset * 4,
2009                       info->Buffers[buffer].Stride * 4, buffer);
2010          return false;
2011       }
2012    } else {
2013       if (max_member_alignment && has_xfb_qualifiers) {
2014          max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
2015                                              _mesa_gl_datatype_is_64bit(xfb_decl->type) ? 2 : 1);
2016          info->Buffers[buffer].Stride = ALIGN(xfb_offset,
2017                                               max_member_alignment[buffer]);
2018       } else {
2019          info->Buffers[buffer].Stride = xfb_offset;
2020       }
2021    }
2022 
2023  store_varying:
2024    info->Varyings[info->NumVarying].name.string =
2025       ralloc_strdup(prog, xfb_decl->orig_name);
2026    resource_name_updated(&info->Varyings[info->NumVarying].name);
2027    info->Varyings[info->NumVarying].Type = xfb_decl->type;
2028    info->Varyings[info->NumVarying].Size = size;
2029    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
2030    info->NumVarying++;
2031    info->Buffers[buffer].NumVaryings++;
2032 
2033    return true;
2034 }
2035 
2036 static const struct tfeedback_candidate *
xfb_decl_find_candidate(struct xfb_decl * xfb_decl,struct gl_shader_program * prog,struct hash_table * tfeedback_candidates)2037 xfb_decl_find_candidate(struct xfb_decl *xfb_decl,
2038                         struct gl_shader_program *prog,
2039                         struct hash_table *tfeedback_candidates)
2040 {
2041    const char *name = xfb_decl->var_name;
2042    switch (xfb_decl->lowered_builtin_array_variable) {
2043    case none:
2044       name = xfb_decl->var_name;
2045       break;
2046    case clip_distance:
2047    case cull_distance:
2048       name = "gl_ClipDistanceMESA";
2049       break;
2050    }
2051    struct hash_entry *entry =
2052       _mesa_hash_table_search(tfeedback_candidates, name);
2053 
2054    xfb_decl->matched_candidate = entry ?
2055          (struct tfeedback_candidate *) entry->data : NULL;
2056 
2057    if (!xfb_decl->matched_candidate) {
2058       /* From GL_EXT_transform_feedback:
2059        *   A program will fail to link if:
2060        *
2061        *   * any variable name specified in the <varyings> array is not
2062        *     declared as an output in the geometry shader (if present) or
2063        *     the vertex shader (if no geometry shader is present);
2064        */
2065       linker_error(prog, "Transform feedback varying %s undeclared.",
2066                    xfb_decl->orig_name);
2067    }
2068 
2069    return xfb_decl->matched_candidate;
2070 }
2071 
2072 /**
2073  * Force a candidate over the previously matched one. It happens when a new
2074  * varying needs to be created to match the xfb declaration, for example,
2075  * to fullfil an alignment criteria.
2076  */
2077 static void
xfb_decl_set_lowered_candidate(struct xfb_decl * xfb_decl,struct tfeedback_candidate * candidate)2078 xfb_decl_set_lowered_candidate(struct xfb_decl *xfb_decl,
2079                                struct tfeedback_candidate *candidate)
2080 {
2081    xfb_decl->matched_candidate = candidate;
2082 
2083    /* The subscript part is no longer relevant */
2084    xfb_decl->is_subscripted = false;
2085    xfb_decl->array_subscript = 0;
2086 }
2087 
2088 /**
2089  * Parse all the transform feedback declarations that were passed to
2090  * glTransformFeedbackVaryings() and store them in xfb_decl objects.
2091  *
2092  * If an error occurs, the error is reported through linker_error() and false
2093  * is returned.
2094  */
2095 static bool
parse_xfb_decls(const struct gl_constants * consts,const struct gl_extensions * exts,struct gl_shader_program * prog,const void * mem_ctx,unsigned num_names,char ** varying_names,struct xfb_decl * decls,bool compact_arrays)2096 parse_xfb_decls(const struct gl_constants *consts,
2097                 const struct gl_extensions *exts,
2098                 struct gl_shader_program *prog,
2099                 const void *mem_ctx, unsigned num_names,
2100                 char **varying_names, struct xfb_decl *decls, bool compact_arrays)
2101 {
2102    for (unsigned i = 0; i < num_names; ++i) {
2103       xfb_decl_init(&decls[i], consts, exts, mem_ctx, varying_names[i], compact_arrays);
2104 
2105       if (!xfb_decl_is_varying(&decls[i]))
2106          continue;
2107 
2108       /* From GL_EXT_transform_feedback:
2109        *   A program will fail to link if:
2110        *
2111        *   * any two entries in the <varyings> array specify the same varying
2112        *     variable;
2113        *
2114        * We interpret this to mean "any two entries in the <varyings> array
2115        * specify the same varying variable and array index", since transform
2116        * feedback of arrays would be useless otherwise.
2117        */
2118       for (unsigned j = 0; j < i; ++j) {
2119          if (xfb_decl_is_varying(&decls[j])) {
2120             if (xfb_decl_is_same(&decls[i], &decls[j])) {
2121                linker_error(prog, "Transform feedback varying %s specified "
2122                             "more than once.", varying_names[i]);
2123                return false;
2124             }
2125          }
2126       }
2127    }
2128    return true;
2129 }
2130 
2131 static int
cmp_xfb_offset(const void * x_generic,const void * y_generic)2132 cmp_xfb_offset(const void * x_generic, const void * y_generic)
2133 {
2134    struct xfb_decl *x = (struct xfb_decl *) x_generic;
2135    struct xfb_decl *y = (struct xfb_decl *) y_generic;
2136 
2137    if (x->buffer != y->buffer)
2138       return x->buffer - y->buffer;
2139    return x->offset - y->offset;
2140 }
2141 
2142 /**
2143  * Store transform feedback location assignments into
2144  * prog->sh.LinkedTransformFeedback based on the data stored in
2145  * xfb_decls.
2146  *
2147  * If an error occurs, the error is reported through linker_error() and false
2148  * is returned.
2149  */
2150 static bool
store_tfeedback_info(const struct gl_constants * consts,struct gl_shader_program * prog,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,bool has_xfb_qualifiers,const void * mem_ctx)2151 store_tfeedback_info(const struct gl_constants *consts,
2152                      struct gl_shader_program *prog, unsigned num_xfb_decls,
2153                      struct xfb_decl *xfb_decls, bool has_xfb_qualifiers,
2154                      const void *mem_ctx)
2155 {
2156    if (!prog->last_vert_prog)
2157       return true;
2158 
2159    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
2160     * tracking the number of buffers doesn't overflow.
2161     */
2162    assert(consts->MaxTransformFeedbackBuffers < 32);
2163 
2164    bool separate_attribs_mode =
2165       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
2166 
2167    struct gl_program *xfb_prog = prog->last_vert_prog;
2168    xfb_prog->sh.LinkedTransformFeedback =
2169       rzalloc(xfb_prog, struct gl_transform_feedback_info);
2170 
2171    /* The xfb_offset qualifier does not have to be used in increasing order
2172     * however some drivers expect to receive the list of transform feedback
2173     * declarations in order so sort it now for convenience.
2174     */
2175    if (has_xfb_qualifiers) {
2176       qsort(xfb_decls, num_xfb_decls, sizeof(*xfb_decls),
2177             cmp_xfb_offset);
2178    }
2179 
2180    xfb_prog->sh.LinkedTransformFeedback->Varyings =
2181       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
2182                     num_xfb_decls);
2183 
2184    unsigned num_outputs = 0;
2185    for (unsigned i = 0; i < num_xfb_decls; ++i) {
2186       if (xfb_decl_is_varying_written(&xfb_decls[i]))
2187          num_outputs += xfb_decl_get_num_outputs(&xfb_decls[i]);
2188    }
2189 
2190    xfb_prog->sh.LinkedTransformFeedback->Outputs =
2191       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
2192                     num_outputs);
2193 
2194    unsigned num_buffers = 0;
2195    unsigned buffers = 0;
2196    BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {0};
2197 
2198    if (!has_xfb_qualifiers && separate_attribs_mode) {
2199       /* GL_SEPARATE_ATTRIBS */
2200       for (unsigned i = 0; i < num_xfb_decls; ++i) {
2201          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2202                              xfb_prog->sh.LinkedTransformFeedback,
2203                              num_buffers, num_buffers, num_outputs,
2204                              used_components, NULL, NULL, has_xfb_qualifiers,
2205                              mem_ctx))
2206             return false;
2207 
2208          buffers |= 1 << num_buffers;
2209          num_buffers++;
2210       }
2211    }
2212    else {
2213       /* GL_INVERLEAVED_ATTRIBS */
2214       int buffer_stream_id = -1;
2215       unsigned buffer =
2216          num_xfb_decls ? xfb_decls[0].buffer : 0;
2217       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
2218       unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
2219       /* Apply any xfb_stride global qualifiers */
2220       if (has_xfb_qualifiers) {
2221          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
2222             if (prog->TransformFeedback.BufferStride[j]) {
2223                explicit_stride[j] = true;
2224                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
2225                   prog->TransformFeedback.BufferStride[j] / 4;
2226             }
2227          }
2228       }
2229 
2230       for (unsigned i = 0; i < num_xfb_decls; ++i) {
2231          if (has_xfb_qualifiers &&
2232              buffer != xfb_decls[i].buffer) {
2233             /* we have moved to the next buffer so reset stream id */
2234             buffer_stream_id = -1;
2235             num_buffers++;
2236          }
2237 
2238          if (xfb_decls[i].next_buffer_separator) {
2239             if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2240                                 xfb_prog->sh.LinkedTransformFeedback,
2241                                 buffer, num_buffers, num_outputs,
2242                                 used_components, explicit_stride,
2243                                 max_member_alignment, has_xfb_qualifiers,
2244                                 mem_ctx))
2245                return false;
2246             num_buffers++;
2247             buffer_stream_id = -1;
2248             continue;
2249          }
2250 
2251          if (has_xfb_qualifiers) {
2252             buffer = xfb_decls[i].buffer;
2253          } else {
2254             buffer = num_buffers;
2255          }
2256 
2257          if (xfb_decl_is_varying(&xfb_decls[i])) {
2258             if (buffer_stream_id == -1)  {
2259                /* First varying writing to this buffer: remember its stream */
2260                buffer_stream_id = (int) xfb_decls[i].stream_id;
2261 
2262                /* Only mark a buffer as active when there is a varying
2263                 * attached to it. This behaviour is based on a revised version
2264                 * of section 13.2.2 of the GL 4.6 spec.
2265                 */
2266                buffers |= 1 << buffer;
2267             } else if (buffer_stream_id !=
2268                        (int) xfb_decls[i].stream_id) {
2269                /* Varying writes to the same buffer from a different stream */
2270                linker_error(prog,
2271                             "Transform feedback can't capture varyings belonging "
2272                             "to different vertex streams in a single buffer. "
2273                             "Varying %s writes to buffer from stream %u, other "
2274                             "varyings in the same buffer write from stream %u.",
2275                             xfb_decls[i].orig_name,
2276                             xfb_decls[i].stream_id,
2277                             buffer_stream_id);
2278                return false;
2279             }
2280          }
2281 
2282          if (!xfb_decl_store(&xfb_decls[i], consts, prog,
2283                              xfb_prog->sh.LinkedTransformFeedback,
2284                              buffer, num_buffers, num_outputs, used_components,
2285                              explicit_stride, max_member_alignment,
2286                              has_xfb_qualifiers, mem_ctx))
2287             return false;
2288       }
2289    }
2290    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
2291 
2292    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
2293    return true;
2294 }
2295 
2296 /**
2297  * Enum representing the order in which varyings are packed within a
2298  * packing class.
2299  *
2300  * Currently we pack vec4's first, then vec2's, then scalar values, then
2301  * vec3's.  This order ensures that the only vectors that are at risk of
2302  * having to be "double parked" (split between two adjacent varying slots)
2303  * are the vec3's.
2304  */
2305 enum packing_order_enum {
2306    PACKING_ORDER_VEC4,
2307    PACKING_ORDER_VEC2,
2308    PACKING_ORDER_SCALAR,
2309    PACKING_ORDER_VEC3,
2310 };
2311 
2312 /**
2313  * Structure recording the relationship between a single producer output
2314  * and a single consumer input.
2315  */
2316 struct match {
2317    /**
2318     * Packing class for this varying, computed by compute_packing_class().
2319     */
2320    unsigned packing_class;
2321 
2322    /**
2323     * Packing order for this varying, computed by compute_packing_order().
2324     */
2325    enum packing_order_enum packing_order;
2326 
2327    /**
2328     * The output variable in the producer stage.
2329     */
2330    nir_variable *producer_var;
2331 
2332    /**
2333     * The input variable in the consumer stage.
2334     */
2335    nir_variable *consumer_var;
2336 
2337    /**
2338     * The location which has been assigned for this varying.  This is
2339     * expressed in multiples of a float, with the first generic varying
2340     * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
2341     * value 0.
2342     */
2343    unsigned generic_location;
2344 
2345    /**
2346     * Original index, used as a fallback sorting key to ensure
2347     * a stable sort
2348     */
2349    unsigned original_index;
2350 };
2351 
2352 /**
2353  * Data structure recording the relationship between outputs of one shader
2354  * stage (the "producer") and inputs of another (the "consumer").
2355  */
2356 struct varying_matches
2357 {
2358    /**
2359     * If true, this driver disables varying packing, so all varyings need to
2360     * be aligned on slot boundaries, and take up a number of slots equal to
2361     * their number of matrix columns times their array size.
2362     *
2363     * Packing may also be disabled because our current packing method is not
2364     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
2365     * guaranteed to match across stages.
2366     */
2367    bool disable_varying_packing;
2368 
2369    /**
2370     * If true, this driver disables packing for varyings used by transform
2371     * feedback.
2372     */
2373    bool disable_xfb_packing;
2374 
2375    /**
2376     * If true, this driver has transform feedback enabled. The transform
2377     * feedback code usually requires at least some packing be done even
2378     * when varying packing is disabled, fortunately where transform feedback
2379     * requires packing it's safe to override the disabled setting. See
2380     * is_varying_packing_safe().
2381     */
2382    bool xfb_enabled;
2383 
2384    bool enhanced_layouts_enabled;
2385 
2386    /**
2387     * If true, this driver prefers varyings to be aligned to power of two
2388     * in a slot.
2389     */
2390    bool prefer_pot_aligned_varyings;
2391 
2392    struct match *matches;
2393 
2394    /**
2395     * The number of elements in the \c matches array that are currently in
2396     * use.
2397     */
2398    unsigned num_matches;
2399 
2400    /**
2401     * The number of elements that were set aside for the \c matches array when
2402     * it was allocated.
2403     */
2404    unsigned matches_capacity;
2405 
2406    gl_shader_stage producer_stage;
2407    gl_shader_stage consumer_stage;
2408 };
2409 
2410 /**
2411  * Comparison function passed to qsort() to sort varyings by packing_class and
2412  * then by packing_order.
2413  */
2414 static int
varying_matches_match_comparator(const void * x_generic,const void * y_generic)2415 varying_matches_match_comparator(const void *x_generic, const void *y_generic)
2416 {
2417    const struct match *x = (const struct match *) x_generic;
2418    const struct match *y = (const struct match *) y_generic;
2419 
2420    if (x->packing_class != y->packing_class)
2421       return x->packing_class - y->packing_class;
2422    if (x->packing_order != y->packing_order)
2423       return x->packing_order - y->packing_order;
2424    return x->original_index - y->original_index;
2425 }
2426 
2427 /**
2428  * Comparison function passed to qsort() to sort varyings used only by
2429  * transform feedback when packing of other varyings is disabled.
2430  */
2431 static int
varying_matches_xfb_comparator(const void * x_generic,const void * y_generic)2432 varying_matches_xfb_comparator(const void *x_generic, const void *y_generic)
2433 {
2434    const struct match *x = (const struct match *) x_generic;
2435    const struct match *y = (const struct match *) y_generic;
2436    /* if both varying are used by transform feedback, sort them */
2437    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) {
2438       if (y->producer_var != NULL && y->producer_var->data.is_xfb_only)
2439          return 0;
2440       /* if x is varying and y is not, put y first */
2441       return +1;
2442    } else if (y->producer_var != NULL && y->producer_var->data.is_xfb_only) {
2443       /* if y is varying and x is not, leave x first */
2444       return -1;
2445    }
2446 
2447    /* otherwise leave the order alone */
2448    return x->original_index - y->original_index;
2449 }
2450 
2451 /**
2452  * Comparison function passed to qsort() to sort varyings NOT used by
2453  * transform feedback when packing of xfb varyings is disabled.
2454  */
2455 static int
varying_matches_not_xfb_comparator(const void * x_generic,const void * y_generic)2456 varying_matches_not_xfb_comparator(const void *x_generic, const void *y_generic)
2457 {
2458    const struct match *x = (const struct match *) x_generic;
2459    const struct match *y = (const struct match *) y_generic;
2460 
2461    if ( (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2462         && (y->producer_var != NULL && !y->producer_var->data.is_xfb) )
2463       /* if both are non-xfb, then sort them */
2464       return varying_matches_match_comparator(x_generic, y_generic);
2465 
2466    /* otherwise, leave the order alone */
2467    return x->original_index - y->original_index;
2468 }
2469 
2470 static bool
is_unpackable_tess(gl_shader_stage producer_stage,gl_shader_stage consumer_stage)2471 is_unpackable_tess(gl_shader_stage producer_stage,
2472                    gl_shader_stage consumer_stage)
2473 {
2474    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
2475        consumer_stage == MESA_SHADER_TESS_CTRL ||
2476        producer_stage == MESA_SHADER_TESS_CTRL)
2477       return true;
2478 
2479    return false;
2480 }
2481 
2482 static void
init_varying_matches(void * mem_ctx,struct varying_matches * vm,const struct gl_constants * consts,const struct gl_extensions * exts,gl_shader_stage producer_stage,gl_shader_stage consumer_stage,bool sso)2483 init_varying_matches(void *mem_ctx, struct varying_matches *vm,
2484                      const struct gl_constants *consts,
2485                      const struct gl_extensions *exts,
2486                      gl_shader_stage producer_stage,
2487                      gl_shader_stage consumer_stage,
2488                      bool sso)
2489 {
2490    /* Tessellation shaders treat inputs and outputs as shared memory and can
2491     * access inputs and outputs of other invocations.
2492     * Therefore, they can't be lowered to temps easily (and definitely not
2493     * efficiently).
2494     */
2495    bool unpackable_tess =
2496       is_unpackable_tess(producer_stage, consumer_stage);
2497 
2498    /* Transform feedback code assumes varying arrays are packed, so if the
2499     * driver has disabled varying packing, make sure to at least enable
2500     * packing required by transform feedback. See below for exception.
2501     */
2502    bool xfb_enabled = exts->EXT_transform_feedback && !unpackable_tess;
2503 
2504    /* Some drivers actually requires packing to be explicitly disabled
2505     * for varyings used by transform feedback.
2506     */
2507    bool disable_xfb_packing = consts->DisableTransformFeedbackPacking;
2508 
2509    /* Disable packing on outward facing interfaces for SSO because in ES we
2510     * need to retain the unpacked varying information for draw time
2511     * validation.
2512     *
2513     * Packing is still enabled on individual arrays, structs, and matrices as
2514     * these are required by the transform feedback code and it is still safe
2515     * to do so. We also enable packing when a varying is only used for
2516     * transform feedback and its not a SSO.
2517     */
2518    bool disable_varying_packing =
2519       consts->DisableVaryingPacking || unpackable_tess;
2520    if (sso && (producer_stage == MESA_SHADER_NONE || consumer_stage == MESA_SHADER_NONE))
2521       disable_varying_packing = true;
2522 
2523    /* Note: this initial capacity is rather arbitrarily chosen to be large
2524     * enough for many cases without wasting an unreasonable amount of space.
2525     * varying_matches_record() will resize the array if there are more than
2526     * this number of varyings.
2527     */
2528    vm->matches_capacity = 8;
2529    vm->matches = (struct match *)
2530       ralloc_array(mem_ctx, struct match, vm->matches_capacity);
2531    vm->num_matches = 0;
2532 
2533    vm->disable_varying_packing = disable_varying_packing;
2534    vm->disable_xfb_packing = disable_xfb_packing;
2535    vm->xfb_enabled = xfb_enabled;
2536    vm->enhanced_layouts_enabled = exts->ARB_enhanced_layouts;
2537    vm->prefer_pot_aligned_varyings = consts->PreferPOTAlignedVaryings;
2538    vm->producer_stage = producer_stage;
2539    vm->consumer_stage = consumer_stage;
2540 }
2541 
2542 /**
2543  * Packing is always safe on individual arrays, structures, and matrices. It
2544  * is also safe if the varying is only used for transform feedback.
2545  */
2546 static bool
is_varying_packing_safe(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2547 is_varying_packing_safe(struct varying_matches *vm,
2548                         const struct glsl_type *type, const nir_variable *var)
2549 {
2550    if (is_unpackable_tess(vm->producer_stage, vm->consumer_stage))
2551       return false;
2552 
2553    return vm->xfb_enabled && (glsl_type_is_array_or_matrix(type) ||
2554                               glsl_type_is_struct(type) ||
2555                               var->data.is_xfb_only);
2556 }
2557 
2558 static bool
is_packing_disabled(struct varying_matches * vm,const struct glsl_type * type,const nir_variable * var)2559 is_packing_disabled(struct varying_matches *vm, const struct glsl_type *type,
2560                     const nir_variable *var)
2561 {
2562    return (vm->disable_varying_packing && !is_varying_packing_safe(vm, type, var)) ||
2563       (vm->disable_xfb_packing && var->data.is_xfb &&
2564        !(glsl_type_is_array(type) || glsl_type_is_struct(type) ||
2565          glsl_type_is_matrix(type))) || var->data.must_be_shader_input;
2566 }
2567 
2568 /**
2569  * Compute the "packing class" of the given varying.  This is an unsigned
2570  * integer with the property that two variables in the same packing class can
2571  * be safely backed into the same vec4.
2572  */
2573 static unsigned
varying_matches_compute_packing_class(const nir_variable * var)2574 varying_matches_compute_packing_class(const nir_variable *var)
2575 {
2576    /* Without help from the back-end, there is no way to pack together
2577     * variables with different interpolation types, because
2578     * lower_packed_varyings must choose exactly one interpolation type for
2579     * each packed varying it creates.
2580     *
2581     * However, we can safely pack together floats, ints, and uints, because:
2582     *
2583     * - varyings of base type "int" and "uint" must use the "flat"
2584     *   interpolation type, which can only occur in GLSL 1.30 and above.
2585     *
2586     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2587     *   can store flat floats as ints without losing any information (using
2588     *   the ir_unop_bitcast_* opcodes).
2589     *
2590     * Therefore, the packing class depends only on the interpolation type.
2591     */
2592    bool is_interpolation_flat = var->data.interpolation == INTERP_MODE_FLAT ||
2593       glsl_contains_integer(var->type) || glsl_contains_double(var->type);
2594 
2595    const unsigned interp = is_interpolation_flat
2596       ? (unsigned) INTERP_MODE_FLAT : var->data.interpolation;
2597 
2598    assert(interp < (1 << 3));
2599 
2600    const unsigned packing_class = (interp << 0) |
2601                                   (var->data.centroid << 3) |
2602                                   (var->data.sample << 4) |
2603                                   (var->data.patch << 5) |
2604                                   (var->data.must_be_shader_input << 6);
2605 
2606    return packing_class;
2607 }
2608 
2609 /**
2610  * Compute the "packing order" of the given varying.  This is a sort key we
2611  * use to determine when to attempt to pack the given varying relative to
2612  * other varyings in the same packing class.
2613  */
2614 static enum packing_order_enum
varying_matches_compute_packing_order(const nir_variable * var)2615 varying_matches_compute_packing_order(const nir_variable *var)
2616 {
2617    const struct glsl_type *element_type = glsl_without_array(var->type);
2618 
2619    switch (glsl_get_component_slots(element_type) % 4) {
2620    case 1: return PACKING_ORDER_SCALAR;
2621    case 2: return PACKING_ORDER_VEC2;
2622    case 3: return PACKING_ORDER_VEC3;
2623    case 0: return PACKING_ORDER_VEC4;
2624    default:
2625       assert(!"Unexpected value of vector_elements");
2626       return PACKING_ORDER_VEC4;
2627    }
2628 }
2629 
2630 /**
2631  * Record the given producer/consumer variable pair in the list of variables
2632  * that should later be assigned locations.
2633  *
2634  * It is permissible for \c consumer_var to be NULL (this happens if a
2635  * variable is output by the producer and consumed by transform feedback, but
2636  * not consumed by the consumer).
2637  *
2638  * If \c producer_var has already been paired up with a consumer_var, or
2639  * producer_var is part of fixed pipeline functionality (and hence already has
2640  * a location assigned), this function has no effect.
2641  *
2642  * Note: as a side effect this function may change the interpolation type of
2643  * \c producer_var, but only when the change couldn't possibly affect
2644  * rendering.
2645  */
2646 static void
varying_matches_record(void * mem_ctx,struct varying_matches * vm,nir_variable * producer_var,nir_variable * consumer_var)2647 varying_matches_record(void *mem_ctx, struct varying_matches *vm,
2648                        nir_variable *producer_var, nir_variable *consumer_var)
2649 {
2650    assert(producer_var != NULL || consumer_var != NULL);
2651 
2652    if ((producer_var &&
2653        (producer_var->data.explicit_location || producer_var->data.location != -1)) ||
2654        (consumer_var &&
2655         (consumer_var->data.explicit_location || consumer_var->data.location != -1))) {
2656       /* Either a location already exists for this variable (since it is part
2657        * of fixed functionality), or it has already been assigned explicitly.
2658        */
2659       return;
2660    }
2661 
2662    /* The varyings should not have been matched and assgned previously */
2663    assert((producer_var == NULL || producer_var->data.location == -1) &&
2664           (consumer_var == NULL || consumer_var->data.location == -1));
2665 
2666    bool needs_flat_qualifier = consumer_var == NULL &&
2667       (glsl_contains_integer(producer_var->type) ||
2668        glsl_contains_double(producer_var->type));
2669 
2670    if (!vm->disable_varying_packing &&
2671        (!vm->disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
2672        (needs_flat_qualifier ||
2673         (vm->consumer_stage != MESA_SHADER_NONE && vm->consumer_stage != MESA_SHADER_FRAGMENT))) {
2674       /* Since this varying is not being consumed by the fragment shader, its
2675        * interpolation type varying cannot possibly affect rendering.
2676        * Also, this variable is non-flat and is (or contains) an integer
2677        * or a double.
2678        * If the consumer stage is unknown, don't modify the interpolation
2679        * type as it could affect rendering later with separate shaders.
2680        *
2681        * lower_packed_varyings requires all integer varyings to flat,
2682        * regardless of where they appear.  We can trivially satisfy that
2683        * requirement by changing the interpolation type to flat here.
2684        */
2685       if (producer_var) {
2686          producer_var->data.centroid = false;
2687          producer_var->data.sample = false;
2688          producer_var->data.interpolation = INTERP_MODE_FLAT;
2689       }
2690 
2691       if (consumer_var) {
2692          consumer_var->data.centroid = false;
2693          consumer_var->data.sample = false;
2694          consumer_var->data.interpolation = INTERP_MODE_FLAT;
2695       }
2696    }
2697 
2698    if (vm->num_matches == vm->matches_capacity) {
2699       vm->matches_capacity *= 2;
2700       vm->matches = (struct match *)
2701          reralloc(mem_ctx, vm->matches, struct match, vm->matches_capacity);
2702    }
2703 
2704    /* We must use the consumer to compute the packing class because in GL4.4+
2705     * there is no guarantee interpolation qualifiers will match across stages.
2706     *
2707     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
2708     *
2709     *    "The type and presence of interpolation qualifiers of variables with
2710     *    the same name declared in all linked shaders for the same cross-stage
2711     *    interface must match, otherwise the link command will fail.
2712     *
2713     *    When comparing an output from one stage to an input of a subsequent
2714     *    stage, the input and output don't match if their interpolation
2715     *    qualifiers (or lack thereof) are not the same."
2716     *
2717     * This text was also in at least revison 7 of the 4.40 spec but is no
2718     * longer in revision 9 and not in the 4.50 spec.
2719     */
2720    const nir_variable *const var = (consumer_var != NULL)
2721       ? consumer_var : producer_var;
2722 
2723    if (producer_var && consumer_var &&
2724        consumer_var->data.must_be_shader_input) {
2725       producer_var->data.must_be_shader_input = 1;
2726    }
2727 
2728    vm->matches[vm->num_matches].packing_class
2729       = varying_matches_compute_packing_class(var);
2730    vm->matches[vm->num_matches].packing_order
2731       = varying_matches_compute_packing_order(var);
2732 
2733    vm->matches[vm->num_matches].producer_var = producer_var;
2734    vm->matches[vm->num_matches].consumer_var = consumer_var;
2735    vm->num_matches++;
2736 }
2737 
2738 /**
2739  * Choose locations for all of the variable matches that were previously
2740  * passed to varying_matches_record().
2741  * \param components  returns array[slot] of number of components used
2742  *                    per slot (1, 2, 3 or 4)
2743  * \param reserved_slots  bitmask indicating which varying slots are already
2744  *                        allocated
2745  * \return number of slots (4-element vectors) allocated
2746  */
2747 static unsigned
varying_matches_assign_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint8_t components[],uint64_t reserved_slots)2748 varying_matches_assign_locations(struct varying_matches *vm,
2749                                  struct gl_shader_program *prog,
2750                                  uint8_t components[], uint64_t reserved_slots)
2751 {
2752    /* Establish the original order of the varying_matches array; our
2753     * sorts will use this for sorting when the varyings do not have
2754     * xfb qualifiers
2755     */
2756    for (unsigned i = 0; i < vm->num_matches; i++)
2757       vm->matches[i].original_index = i;
2758 
2759    /* If packing has been disabled then we cannot safely sort the varyings by
2760     * class as it may mean we are using a version of OpenGL where
2761     * interpolation qualifiers are not guaranteed to be matching across
2762     * shaders, sorting in this case could result in mismatching shader
2763     * interfaces. So we sort only the varyings used by transform feedback.
2764     *
2765     * If packing is only disabled for xfb varyings (mutually exclusive with
2766     * disable_varying_packing), we then group varyings depending on if they
2767     * are captured for transform feedback.
2768     */
2769    if (vm->disable_varying_packing) {
2770       /* Only sort varyings that are only used by transform feedback. */
2771       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2772             &varying_matches_xfb_comparator);
2773    } else if (vm->disable_xfb_packing) {
2774       /* Only sort varyings that are NOT used by transform feedback. */
2775       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2776             &varying_matches_not_xfb_comparator);
2777    } else {
2778       /* Sort varying matches into an order that makes them easy to pack. */
2779       qsort(vm->matches, vm->num_matches, sizeof(*vm->matches),
2780             &varying_matches_match_comparator);
2781    }
2782 
2783    unsigned generic_location = 0;
2784    unsigned generic_patch_location = MAX_VARYING*4;
2785    bool previous_var_xfb = false;
2786    bool previous_var_xfb_only = false;
2787    unsigned previous_packing_class = ~0u;
2788 
2789    /* For tranform feedback separate mode, we know the number of attributes
2790     * is <= the number of buffers.  So packing isn't critical.  In fact,
2791     * packing vec3 attributes can cause trouble because splitting a vec3
2792     * effectively creates an additional transform feedback output.  The
2793     * extra TFB output may exceed device driver limits.
2794     *
2795     * Also don't pack vec3 if the driver prefers power of two aligned
2796     * varyings. Packing order guarantees that vec4, vec2 and vec1 will be
2797     * pot-aligned, we only need to take care of vec3s
2798     */
2799    const bool dont_pack_vec3 =
2800       (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2801        prog->TransformFeedback.NumVarying > 0) ||
2802       vm->prefer_pot_aligned_varyings;
2803 
2804    for (unsigned i = 0; i < vm->num_matches; i++) {
2805       unsigned *location = &generic_location;
2806       const nir_variable *var;
2807       const struct glsl_type *type;
2808       bool is_vertex_input = false;
2809 
2810       if (vm->matches[i].consumer_var) {
2811          var = vm->matches[i].consumer_var;
2812          type = get_varying_type(var, vm->consumer_stage);
2813          if (vm->consumer_stage == MESA_SHADER_VERTEX)
2814             is_vertex_input = true;
2815       } else {
2816          if (!vm->matches[i].producer_var)
2817             continue; /* The varying was optimised away */
2818 
2819          var = vm->matches[i].producer_var;
2820          type = get_varying_type(var, vm->producer_stage);
2821       }
2822 
2823       if (var->data.patch)
2824          location = &generic_patch_location;
2825 
2826       /* Advance to the next slot if this varying has a different packing
2827        * class than the previous one, and we're not already on a slot
2828        * boundary.
2829        *
2830        * Also advance if varying packing is disabled for transform feedback,
2831        * and previous or current varying is used for transform feedback.
2832        *
2833        * Also advance to the next slot if packing is disabled. This makes sure
2834        * we don't assign varyings the same locations which is possible
2835        * because we still pack individual arrays, records and matrices even
2836        * when packing is disabled. Note we don't advance to the next slot if
2837        * we can pack varyings together that are only used for transform
2838        * feedback.
2839        */
2840       if (var->data.must_be_shader_input ||
2841           (vm->disable_xfb_packing &&
2842            (previous_var_xfb || var->data.is_xfb)) ||
2843           (vm->disable_varying_packing &&
2844            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2845           (previous_packing_class != vm->matches[i].packing_class) ||
2846           (vm->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2847            dont_pack_vec3)) {
2848          *location = ALIGN(*location, 4);
2849       }
2850 
2851       previous_var_xfb = var->data.is_xfb;
2852       previous_var_xfb_only = var->data.is_xfb_only;
2853       previous_packing_class = vm->matches[i].packing_class;
2854 
2855       /* The number of components taken up by this variable. For vertex shader
2856        * inputs, we use the number of slots * 4, as they have different
2857        * counting rules.
2858        */
2859       unsigned num_components = 0;
2860       if (is_vertex_input) {
2861          num_components = glsl_count_attribute_slots(type, is_vertex_input) * 4;
2862       } else {
2863          if (is_packing_disabled(vm, type, var)) {
2864             num_components = glsl_count_attribute_slots(type, false) * 4;
2865          } else {
2866             num_components = glsl_get_component_slots_aligned(type, *location);
2867          }
2868       }
2869 
2870       /* The last slot for this variable, inclusive. */
2871       unsigned slot_end = *location + num_components - 1;
2872 
2873       /* FIXME: We could be smarter in the below code and loop back over
2874        * trying to fill any locations that we skipped because we couldn't pack
2875        * the varying between an explicit location. For now just let the user
2876        * hit the linking error if we run out of room and suggest they use
2877        * explicit locations.
2878        */
2879       while (slot_end < MAX_VARYING * 4u) {
2880          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2881          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2882 
2883          assert(slots > 0);
2884 
2885          if ((reserved_slots & slot_mask) == 0) {
2886             break;
2887          }
2888 
2889          *location = ALIGN(*location + 1, 4);
2890          slot_end = *location + num_components - 1;
2891       }
2892 
2893       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2894          linker_error(prog, "insufficient contiguous locations available for "
2895                       "%s it is possible an array or struct could not be "
2896                       "packed between varyings with explicit locations. Try "
2897                       "using an explicit location for arrays and structs.",
2898                       var->name);
2899       }
2900 
2901       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2902          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2903             components[j] = 4;
2904          components[slot_end / 4u] = (slot_end & 3) + 1;
2905       }
2906 
2907       vm->matches[i].generic_location = *location;
2908 
2909       *location = slot_end + 1;
2910    }
2911 
2912    return (generic_location + 3) / 4;
2913 }
2914 
2915 static void
varying_matches_assign_temp_locations(struct varying_matches * vm,struct gl_shader_program * prog,uint64_t reserved_slots)2916 varying_matches_assign_temp_locations(struct varying_matches *vm,
2917                                       struct gl_shader_program *prog,
2918                                       uint64_t reserved_slots)
2919 {
2920    unsigned tmp_loc = 0;
2921    for (unsigned i = 0; i < vm->num_matches; i++) {
2922       nir_variable *producer_var = vm->matches[i].producer_var;
2923       nir_variable *consumer_var = vm->matches[i].consumer_var;
2924 
2925       while (tmp_loc < MAX_VARYINGS_INCL_PATCH) {
2926          if (reserved_slots & (UINT64_C(1) << tmp_loc))
2927             tmp_loc++;
2928          else
2929             break;
2930       }
2931 
2932       if (producer_var) {
2933          assert(producer_var->data.location == -1);
2934          producer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2935       }
2936 
2937       if (consumer_var) {
2938          assert(consumer_var->data.location == -1);
2939          consumer_var->data.location = VARYING_SLOT_VAR0 + tmp_loc;
2940       }
2941 
2942       tmp_loc++;
2943    }
2944 }
2945 
2946 /**
2947  * Update the producer and consumer shaders to reflect the locations
2948  * assignments that were made by varying_matches_assign_locations().
2949  */
2950 static void
varying_matches_store_locations(struct varying_matches * vm)2951 varying_matches_store_locations(struct varying_matches *vm)
2952 {
2953    /* Check is location needs to be packed with lower_packed_varyings() or if
2954     * we can just use ARB_enhanced_layouts packing.
2955     */
2956    bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {0};
2957    const struct glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2958 
2959    for (unsigned i = 0; i < vm->num_matches; i++) {
2960       nir_variable *producer_var = vm->matches[i].producer_var;
2961       nir_variable *consumer_var = vm->matches[i].consumer_var;
2962       unsigned generic_location = vm->matches[i].generic_location;
2963       unsigned slot = generic_location / 4;
2964       unsigned offset = generic_location % 4;
2965 
2966       if (producer_var) {
2967          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2968          producer_var->data.location_frac = offset;
2969       }
2970 
2971       if (consumer_var) {
2972          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2973          consumer_var->data.location_frac = offset;
2974       }
2975 
2976       /* Find locations suitable for native packing via
2977        * ARB_enhanced_layouts.
2978        */
2979       if (vm->enhanced_layouts_enabled) {
2980          nir_variable *var = producer_var ? producer_var : consumer_var;
2981          unsigned stage = producer_var ? vm->producer_stage : vm->consumer_stage;
2982          const struct glsl_type *type =
2983             get_varying_type(var, stage);
2984          unsigned comp_slots = glsl_get_component_slots(type) + offset;
2985          unsigned slots = comp_slots / 4;
2986          if (comp_slots % 4)
2987             slots += 1;
2988 
2989          if (producer_var && consumer_var) {
2990             if (glsl_type_is_array_or_matrix(type) || glsl_type_is_struct(type) ||
2991                 glsl_type_is_64bit(type)) {
2992                for (unsigned j = 0; j < slots; j++) {
2993                   pack_loc[slot + j] = true;
2994                }
2995             } else if (offset + glsl_get_vector_elements(type) > 4) {
2996                pack_loc[slot] = true;
2997                pack_loc[slot + 1] = true;
2998             } else {
2999                loc_type[slot][offset] = type;
3000             }
3001          } else {
3002             for (unsigned j = 0; j < slots; j++) {
3003                pack_loc[slot + j] = true;
3004             }
3005          }
3006       }
3007    }
3008 
3009    /* Attempt to use ARB_enhanced_layouts for more efficient packing if
3010     * suitable.
3011     */
3012    if (vm->enhanced_layouts_enabled) {
3013       for (unsigned i = 0; i < vm->num_matches; i++) {
3014          nir_variable *producer_var = vm->matches[i].producer_var;
3015          nir_variable *consumer_var = vm->matches[i].consumer_var;
3016          if (!producer_var || !consumer_var)
3017             continue;
3018 
3019          unsigned generic_location = vm->matches[i].generic_location;
3020          unsigned slot = generic_location / 4;
3021          if (pack_loc[slot])
3022             continue;
3023 
3024          const struct glsl_type *type =
3025             get_varying_type(producer_var, vm->producer_stage);
3026          bool type_match = true;
3027          for (unsigned j = 0; j < 4; j++) {
3028             if (loc_type[slot][j]) {
3029                if (glsl_get_base_type(type) !=
3030                    glsl_get_base_type(loc_type[slot][j]))
3031                   type_match = false;
3032             }
3033          }
3034 
3035          if (type_match) {
3036             producer_var->data.explicit_location = 1;
3037             consumer_var->data.explicit_location = 1;
3038          }
3039       }
3040    }
3041 }
3042 
3043 /**
3044  * Is the given variable a varying variable to be counted against the
3045  * limit in ctx->Const.MaxVarying?
3046  * This includes variables such as texcoords, colors and generic
3047  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
3048  */
3049 static bool
var_counts_against_varying_limit(gl_shader_stage stage,const nir_variable * var)3050 var_counts_against_varying_limit(gl_shader_stage stage, const nir_variable *var)
3051 {
3052    /* Only fragment shaders will take a varying variable as an input */
3053    if (stage == MESA_SHADER_FRAGMENT &&
3054        var->data.mode == nir_var_shader_in) {
3055       switch (var->data.location) {
3056       case VARYING_SLOT_POS:
3057       case VARYING_SLOT_FACE:
3058       case VARYING_SLOT_PNTC:
3059          return false;
3060       default:
3061          return true;
3062       }
3063    }
3064    return false;
3065 }
3066 
3067 struct tfeedback_candidate_generator_state {
3068    /**
3069     * Memory context used to allocate hash table keys and values.
3070     */
3071    void *mem_ctx;
3072 
3073    /**
3074     * Hash table in which tfeedback_candidate objects should be stored.
3075     */
3076    struct hash_table *tfeedback_candidates;
3077 
3078    gl_shader_stage stage;
3079 
3080    /**
3081     * Pointer to the toplevel variable that is being traversed.
3082     */
3083    nir_variable *toplevel_var;
3084 
3085    /**
3086     * Total number of varying floats that have been visited so far.  This is
3087     * used to determine the offset to each varying within the toplevel
3088     * variable.
3089     */
3090    unsigned varying_floats;
3091 
3092    /**
3093     * Offset within the xfb. Counted in floats.
3094     */
3095    unsigned xfb_offset_floats;
3096 };
3097 
3098 /**
3099  * Generates tfeedback_candidate structs describing all possible targets of
3100  * transform feedback.
3101  *
3102  * tfeedback_candidate structs are stored in the hash table
3103  * tfeedback_candidates.  This hash table maps varying names to instances of the
3104  * tfeedback_candidate struct.
3105  */
3106 static void
tfeedback_candidate_generator(struct tfeedback_candidate_generator_state * state,char ** name,size_t name_length,const struct glsl_type * type,const struct glsl_struct_field * named_ifc_member)3107 tfeedback_candidate_generator(struct tfeedback_candidate_generator_state *state,
3108                               char **name, size_t name_length,
3109                               const struct glsl_type *type,
3110                               const struct glsl_struct_field *named_ifc_member)
3111 {
3112    switch (glsl_get_base_type(type)) {
3113    case GLSL_TYPE_INTERFACE:
3114       if (named_ifc_member) {
3115          ralloc_asprintf_rewrite_tail(name, &name_length, ".%s",
3116                                       named_ifc_member->name);
3117          tfeedback_candidate_generator(state, name, name_length,
3118                                        named_ifc_member->type, NULL);
3119          return;
3120       }
3121       FALLTHROUGH;
3122    case GLSL_TYPE_STRUCT:
3123       for (unsigned i = 0; i < glsl_get_length(type); i++) {
3124          size_t new_length = name_length;
3125 
3126          /* Append '.field' to the current variable name. */
3127          if (name) {
3128             ralloc_asprintf_rewrite_tail(name, &new_length, ".%s",
3129                                          glsl_get_struct_elem_name(type, i));
3130          }
3131 
3132          tfeedback_candidate_generator(state, name, new_length,
3133                                        glsl_get_struct_field(type, i), NULL);
3134       }
3135 
3136       return;
3137    case GLSL_TYPE_ARRAY:
3138       if (glsl_type_is_struct(glsl_without_array(type)) ||
3139           glsl_type_is_interface(glsl_without_array(type)) ||
3140           glsl_type_is_array(glsl_get_array_element(type))) {
3141 
3142          for (unsigned i = 0; i < glsl_get_length(type); i++) {
3143             size_t new_length = name_length;
3144 
3145             /* Append the subscript to the current variable name */
3146             ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
3147 
3148             tfeedback_candidate_generator(state, name, new_length,
3149                                           glsl_get_array_element(type),
3150                                           named_ifc_member);
3151          }
3152 
3153          return;
3154       }
3155       FALLTHROUGH;
3156    default:
3157       assert(!glsl_type_is_struct(glsl_without_array(type)));
3158       assert(!glsl_type_is_interface(glsl_without_array(type)));
3159 
3160       struct tfeedback_candidate *candidate
3161          = rzalloc(state->mem_ctx, struct tfeedback_candidate);
3162       candidate->toplevel_var = state->toplevel_var;
3163       candidate->type = type;
3164 
3165       if (glsl_type_is_64bit(glsl_without_array(type))) {
3166          /*  From ARB_gpu_shader_fp64:
3167           *
3168           * If any variable captured in transform feedback has double-precision
3169           * components, the practical requirements for defined behavior are:
3170           *     ...
3171           * (c) each double-precision variable captured must be aligned to a
3172           *     multiple of eight bytes relative to the beginning of a vertex.
3173           */
3174          state->xfb_offset_floats = ALIGN(state->xfb_offset_floats, 2);
3175          /* 64-bit members of structs are also aligned. */
3176          state->varying_floats = ALIGN(state->varying_floats, 2);
3177       }
3178 
3179       candidate->xfb_offset_floats = state->xfb_offset_floats;
3180       candidate->struct_offset_floats = state->varying_floats;
3181 
3182       _mesa_hash_table_insert(state->tfeedback_candidates,
3183                               ralloc_strdup(state->mem_ctx, *name),
3184                               candidate);
3185 
3186       const unsigned component_slots = glsl_get_component_slots(type);
3187 
3188       if (varying_has_user_specified_location(state->toplevel_var)) {
3189          state->varying_floats += glsl_count_attribute_slots(type, false) * 4;
3190       } else {
3191          state->varying_floats += component_slots;
3192       }
3193 
3194       state->xfb_offset_floats += component_slots;
3195    }
3196 }
3197 
3198 static void
populate_consumer_input_sets(void * mem_ctx,nir_shader * nir,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3199 populate_consumer_input_sets(void *mem_ctx, nir_shader *nir,
3200                              struct hash_table *consumer_inputs,
3201                              struct hash_table *consumer_interface_inputs,
3202                              nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3203 {
3204    memset(consumer_inputs_with_locations, 0,
3205           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
3206 
3207    nir_foreach_shader_in_variable(input_var, nir) {
3208       /* All interface blocks should have been lowered by this point */
3209       assert(!glsl_type_is_interface(input_var->type));
3210 
3211       if (input_var->data.explicit_location) {
3212          /* assign_varying_locations only cares about finding the
3213           * nir_variable at the start of a contiguous location block.
3214           *
3215           *     - For !producer, consumer_inputs_with_locations isn't used.
3216           *
3217           *     - For !consumer, consumer_inputs_with_locations is empty.
3218           *
3219           * For consumer && producer, if you were trying to set some
3220           * nir_variable to the middle of a location block on the other side
3221           * of producer/consumer, cross_validate_outputs_to_inputs() should
3222           * be link-erroring due to either type mismatch or location
3223           * overlaps.  If the variables do match up, then they've got a
3224           * matching data.location and you only looked at
3225           * consumer_inputs_with_locations[var->data.location], not any
3226           * following entries for the array/structure.
3227           */
3228          consumer_inputs_with_locations[input_var->data.location] =
3229             input_var;
3230       } else if (input_var->interface_type != NULL) {
3231          char *const iface_field_name =
3232             ralloc_asprintf(mem_ctx, "%s.%s",
3233                glsl_get_type_name(glsl_without_array(input_var->interface_type)),
3234                input_var->name);
3235          _mesa_hash_table_insert(consumer_interface_inputs,
3236                                  iface_field_name, input_var);
3237       } else {
3238          _mesa_hash_table_insert(consumer_inputs,
3239                                  ralloc_strdup(mem_ctx, input_var->name),
3240                                  input_var);
3241       }
3242    }
3243 }
3244 
3245 /**
3246  * Find a variable from the consumer that "matches" the specified variable
3247  *
3248  * This function only finds inputs with names that match.  There is no
3249  * validation (here) that the types, etc. are compatible.
3250  */
3251 static nir_variable *
get_matching_input(void * mem_ctx,const nir_variable * output_var,struct hash_table * consumer_inputs,struct hash_table * consumer_interface_inputs,nir_variable * consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])3252 get_matching_input(void *mem_ctx,
3253                    const nir_variable *output_var,
3254                    struct hash_table *consumer_inputs,
3255                    struct hash_table *consumer_interface_inputs,
3256                    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
3257 {
3258    nir_variable *input_var;
3259 
3260    if (output_var->data.explicit_location) {
3261       input_var = consumer_inputs_with_locations[output_var->data.location];
3262    } else if (output_var->interface_type != NULL) {
3263       char *const iface_field_name =
3264          ralloc_asprintf(mem_ctx, "%s.%s",
3265             glsl_get_type_name(glsl_without_array(output_var->interface_type)),
3266             output_var->name);
3267       struct hash_entry *entry =
3268          _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
3269       input_var = entry ? (nir_variable *) entry->data : NULL;
3270    } else {
3271       struct hash_entry *entry =
3272          _mesa_hash_table_search(consumer_inputs, output_var->name);
3273       input_var = entry ? (nir_variable *) entry->data : NULL;
3274    }
3275 
3276    return (input_var == NULL || input_var->data.mode != nir_var_shader_in)
3277       ? NULL : input_var;
3278 }
3279 
3280 static int
io_variable_cmp(const void * _a,const void * _b)3281 io_variable_cmp(const void *_a, const void *_b)
3282 {
3283    const nir_variable *const a = *(const nir_variable **) _a;
3284    const nir_variable *const b = *(const nir_variable **) _b;
3285 
3286    if (a->data.explicit_location && b->data.explicit_location)
3287       return b->data.location - a->data.location;
3288 
3289    if (a->data.explicit_location && !b->data.explicit_location)
3290       return 1;
3291 
3292    if (!a->data.explicit_location && b->data.explicit_location)
3293       return -1;
3294 
3295    return -strcmp(a->name, b->name);
3296 }
3297 
3298 /**
3299  * Sort the shader IO variables into canonical order
3300  */
3301 static void
canonicalize_shader_io(nir_shader * nir,nir_variable_mode io_mode)3302 canonicalize_shader_io(nir_shader *nir, nir_variable_mode io_mode)
3303 {
3304    nir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
3305    unsigned num_variables = 0;
3306 
3307    nir_foreach_variable_with_modes(var, nir, io_mode) {
3308       /* If we have already encountered more I/O variables that could
3309        * successfully link, bail.
3310        */
3311       if (num_variables == ARRAY_SIZE(var_table))
3312          return;
3313 
3314       var_table[num_variables++] = var;
3315    }
3316 
3317    if (num_variables == 0)
3318       return;
3319 
3320    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
3321     * we're going to push the variables on to the IR list as a stack, so we
3322     * want the last variable (in canonical order) to be first in the list.
3323     */
3324    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
3325 
3326    /* Remove the variable from it's current location in the varible list, and
3327     * put it at the front.
3328     */
3329    for (unsigned i = 0; i < num_variables; i++) {
3330       exec_node_remove(&var_table[i]->node);
3331       exec_list_push_head(&nir->variables, &var_table[i]->node);
3332    }
3333 }
3334 
3335 /**
3336  * Generate a bitfield map of the explicit locations for shader varyings.
3337  *
3338  * Note: For Tessellation shaders we are sitting right on the limits of the
3339  * 64 bit map. Per-vertex and per-patch both have separate location domains
3340  * with a max of MAX_VARYING.
3341  */
3342 static uint64_t
reserved_varying_slot(struct gl_linked_shader * sh,nir_variable_mode io_mode)3343 reserved_varying_slot(struct gl_linked_shader *sh,
3344                       nir_variable_mode io_mode)
3345 {
3346    assert(io_mode == nir_var_shader_in || io_mode == nir_var_shader_out);
3347    /* Avoid an overflow of the returned value */
3348    assert(MAX_VARYINGS_INCL_PATCH <= 64);
3349 
3350    uint64_t slots = 0;
3351    int var_slot;
3352 
3353    if (!sh)
3354       return slots;
3355 
3356    nir_foreach_variable_with_modes(var, sh->Program->nir, io_mode) {
3357       if (!var->data.explicit_location ||
3358           var->data.location < VARYING_SLOT_VAR0)
3359          continue;
3360 
3361       var_slot = var->data.location - VARYING_SLOT_VAR0;
3362 
3363       bool is_gl_vertex_input = io_mode == nir_var_shader_in &&
3364                                 sh->Stage == MESA_SHADER_VERTEX;
3365       unsigned num_elements =
3366          glsl_count_attribute_slots(get_varying_type(var, sh->Stage),
3367                                     is_gl_vertex_input);
3368       for (unsigned i = 0; i < num_elements; i++) {
3369          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
3370             slots |= UINT64_C(1) << var_slot;
3371          var_slot += 1;
3372       }
3373    }
3374 
3375    return slots;
3376 }
3377 
3378 /**
3379  * Sets the bits in the inputs_read, or outputs_written
3380  * bitfield corresponding to this variable.
3381  */
3382 static void
set_variable_io_mask(BITSET_WORD * bits,nir_variable * var,gl_shader_stage stage)3383 set_variable_io_mask(BITSET_WORD *bits, nir_variable *var, gl_shader_stage stage)
3384 {
3385    assert(var->data.mode == nir_var_shader_in ||
3386           var->data.mode == nir_var_shader_out);
3387    assert(var->data.location >= VARYING_SLOT_VAR0);
3388 
3389    const struct glsl_type *type = var->type;
3390    if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
3391       assert(glsl_type_is_array(type));
3392       type = glsl_get_array_element(type);
3393    }
3394 
3395    unsigned location = var->data.location - VARYING_SLOT_VAR0;
3396    unsigned slots = glsl_count_attribute_slots(type, false);
3397    for (unsigned i = 0; i < slots; i++) {
3398       BITSET_SET(bits, location + i);
3399    }
3400 }
3401 
3402 static uint8_t
get_num_components(nir_variable * var)3403 get_num_components(nir_variable *var)
3404 {
3405    if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
3406       return 4;
3407 
3408    return glsl_get_vector_elements(glsl_without_array(var->type));
3409 }
3410 
3411 static void
tcs_add_output_reads(nir_shader * shader,BITSET_WORD ** read)3412 tcs_add_output_reads(nir_shader *shader, BITSET_WORD **read)
3413 {
3414    nir_foreach_function_impl(impl, shader) {
3415       nir_foreach_block(block, impl) {
3416          nir_foreach_instr(instr, block) {
3417             if (instr->type != nir_instr_type_intrinsic)
3418                continue;
3419 
3420             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3421             if (intrin->intrinsic != nir_intrinsic_load_deref)
3422                continue;
3423 
3424             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
3425             if (!nir_deref_mode_is(deref, nir_var_shader_out))
3426                continue;
3427 
3428             nir_variable *var = nir_deref_instr_get_variable(deref);
3429             for (unsigned i = 0; i < get_num_components(var); i++) {
3430                if (var->data.location < VARYING_SLOT_VAR0)
3431                   continue;
3432 
3433                unsigned comp = var->data.location_frac;
3434                set_variable_io_mask(read[comp + i], var, shader->info.stage);
3435             }
3436          }
3437       }
3438    }
3439 }
3440 
3441 /* We need to replace any interp intrinsics with undefined (shader_temp) inputs
3442  * as no further NIR pass expects to see this.
3443  */
3444 static bool
replace_unused_interpolate_at_with_undef(nir_builder * b,nir_instr * instr,void * data)3445 replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
3446                                          void *data)
3447 {
3448    if (instr->type == nir_instr_type_intrinsic) {
3449       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
3450 
3451       if (intrin->intrinsic == nir_intrinsic_interp_deref_at_centroid ||
3452           intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
3453           intrin->intrinsic == nir_intrinsic_interp_deref_at_offset) {
3454          nir_variable *var = nir_intrinsic_get_var(intrin, 0);
3455          if (var->data.mode == nir_var_shader_temp) {
3456             /* Create undef and rewrite the interp uses */
3457             nir_def *undef =
3458                nir_undef(b, intrin->def.num_components,
3459                              intrin->def.bit_size);
3460             nir_def_replace(&intrin->def, undef);
3461             return true;
3462          }
3463       }
3464    }
3465 
3466    return false;
3467 }
3468 
3469 static void
fixup_vars_lowered_to_temp(nir_shader * shader,nir_variable_mode mode)3470 fixup_vars_lowered_to_temp(nir_shader *shader, nir_variable_mode mode)
3471 {
3472    /* Remove all interpolate uses of the unset varying and replace with undef. */
3473    if (mode == nir_var_shader_in && shader->info.stage == MESA_SHADER_FRAGMENT) {
3474       (void) nir_shader_instructions_pass(shader,
3475                                           replace_unused_interpolate_at_with_undef,
3476                                           nir_metadata_control_flow,
3477                                           NULL);
3478    }
3479 
3480    nir_lower_global_vars_to_local(shader);
3481    nir_fixup_deref_modes(shader);
3482 }
3483 
3484 /**
3485  * Helper for removing unused shader I/O variables, by demoting them to global
3486  * variables (which may then be dead code eliminated).
3487  *
3488  * Example usage is:
3489  *
3490  * progress = nir_remove_unused_io_vars(producer, consumer, nir_var_shader_out,
3491  *                                      read, patches_read) ||
3492  *                                      progress;
3493  *
3494  * The "used" should be an array of 4 BITSET_WORDs representing each
3495  * .location_frac used.  Note that for vector variables, only the first channel
3496  * (.location_frac) is examined for deciding if the variable is used!
3497  */
3498 static bool
remove_unused_io_vars(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,nir_variable_mode mode,BITSET_WORD ** used_by_other_stage)3499 remove_unused_io_vars(nir_shader *producer, nir_shader *consumer,
3500                       struct gl_shader_program *prog,
3501                       nir_variable_mode mode,
3502                       BITSET_WORD **used_by_other_stage)
3503 {
3504    assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
3505 
3506    bool progress = false;
3507    nir_shader *shader = mode == nir_var_shader_out ? producer : consumer;
3508 
3509    BITSET_WORD **used;
3510    nir_foreach_variable_with_modes_safe(var, shader, mode) {
3511       used = used_by_other_stage;
3512 
3513       /* Skip builtins dead builtins are removed elsewhere */
3514       if (is_gl_identifier(var->name))
3515          continue;
3516 
3517       if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
3518          continue;
3519 
3520       /* Skip xfb varyings and any other type we cannot remove */
3521       if (var->data.always_active_io)
3522          continue;
3523 
3524       if (var->data.explicit_xfb_buffer)
3525          continue;
3526 
3527       BITSET_WORD *other_stage = used[var->data.location_frac];
3528 
3529       /* if location == -1 lower varying to global as it has no match and is not
3530        * a xfb varying, this must be done after skiping bultins as builtins
3531        * could be assigned a location of -1.
3532        * We also lower unused varyings with explicit locations.
3533        */
3534       bool use_found = false;
3535       if (var->data.location >= 0) {
3536          unsigned location = var->data.location - VARYING_SLOT_VAR0;
3537 
3538          const struct glsl_type *type = var->type;
3539          if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
3540             assert(glsl_type_is_array(type));
3541             type = glsl_get_array_element(type);
3542          }
3543 
3544          unsigned slots = glsl_count_attribute_slots(type, false);
3545          for (unsigned i = 0; i < slots; i++) {
3546             if (BITSET_TEST(other_stage, location + i)) {
3547                use_found = true;
3548                break;
3549             }
3550          }
3551       }
3552 
3553       if (!use_found) {
3554          /* This one is invalid, make it a global variable instead */
3555          var->data.location = 0;
3556          var->data.mode = nir_var_shader_temp;
3557 
3558          progress = true;
3559 
3560          if (mode == nir_var_shader_in) {
3561             if (!prog->IsES && prog->GLSL_Version <= 120) {
3562                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3563                 *
3564                 *     Only those varying variables used (i.e. read) in
3565                 *     the fragment shader executable must be written to
3566                 *     by the vertex shader executable; declaring
3567                 *     superfluous varying variables in a vertex shader is
3568                 *     permissible.
3569                 *
3570                 * We interpret this text as meaning that the VS must
3571                 * write the variable for the FS to read it.  See
3572                 * "glsl1-varying read but not written" in piglit.
3573                 */
3574                linker_error(prog, "%s shader varying %s not written "
3575                             "by %s shader\n.",
3576                             _mesa_shader_stage_to_string(consumer->info.stage),
3577                             var->name,
3578                             _mesa_shader_stage_to_string(producer->info.stage));
3579             } else {
3580                linker_warning(prog, "%s shader varying %s not written "
3581                               "by %s shader\n.",
3582                               _mesa_shader_stage_to_string(consumer->info.stage),
3583                               var->name,
3584                               _mesa_shader_stage_to_string(producer->info.stage));
3585             }
3586          }
3587       }
3588    }
3589 
3590    if (progress)
3591       fixup_vars_lowered_to_temp(shader, mode);
3592 
3593    return progress;
3594 }
3595 
3596 static bool
remove_unused_varyings(nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3597 remove_unused_varyings(nir_shader *producer, nir_shader *consumer,
3598                        struct gl_shader_program *prog, void *mem_ctx)
3599 {
3600    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
3601    assert(consumer->info.stage != MESA_SHADER_VERTEX);
3602 
3603    int max_loc_out = 0;
3604    nir_foreach_shader_out_variable(var, producer) {
3605       if (var->data.location < VARYING_SLOT_VAR0)
3606          continue;
3607 
3608       const struct glsl_type *type = var->type;
3609       if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
3610          assert(glsl_type_is_array(type));
3611          type = glsl_get_array_element(type);
3612       }
3613       unsigned slots = glsl_count_attribute_slots(type, false);
3614 
3615       max_loc_out = max_loc_out < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3616          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_out;
3617    }
3618 
3619    int max_loc_in = 0;
3620    nir_foreach_shader_in_variable(var, consumer) {
3621       if (var->data.location < VARYING_SLOT_VAR0)
3622          continue;
3623 
3624       const struct glsl_type *type = var->type;
3625       if (nir_is_arrayed_io(var, consumer->info.stage) || var->data.per_view) {
3626          assert(glsl_type_is_array(type));
3627          type = glsl_get_array_element(type);
3628       }
3629       unsigned slots = glsl_count_attribute_slots(type, false);
3630 
3631       max_loc_in = max_loc_in < (var->data.location - VARYING_SLOT_VAR0) + slots ?
3632          (var->data.location - VARYING_SLOT_VAR0) + slots : max_loc_in;
3633    }
3634 
3635    /* Old glsl shaders that don't use explicit locations can contain greater
3636     * than 64 varyings before unused varyings are removed so we must count them
3637     * and make use of the BITSET macros to keep track of used slots. Once we
3638     * have removed these excess varyings we can make use of further nir varying
3639     * linking optimimisation passes.
3640     */
3641    BITSET_WORD *read[4];
3642    BITSET_WORD *written[4];
3643    int max_loc = MAX2(max_loc_in, max_loc_out);
3644    for (unsigned i = 0; i < 4; i++) {
3645       read[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3646       written[i] = rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_loc));
3647    }
3648 
3649    nir_foreach_shader_out_variable(var, producer) {
3650       if (var->data.location < VARYING_SLOT_VAR0)
3651          continue;
3652 
3653       for (unsigned i = 0; i < get_num_components(var); i++) {
3654          unsigned comp = var->data.location_frac;
3655          set_variable_io_mask(written[comp + i], var, producer->info.stage);
3656       }
3657    }
3658 
3659    nir_foreach_shader_in_variable(var, consumer) {
3660       if (var->data.location < VARYING_SLOT_VAR0)
3661          continue;
3662 
3663       for (unsigned i = 0; i < get_num_components(var); i++) {
3664          unsigned comp = var->data.location_frac;
3665          set_variable_io_mask(read[comp + i], var, consumer->info.stage);
3666       }
3667    }
3668 
3669    /* Each TCS invocation can read data written by other TCS invocations,
3670     * so even if the outputs are not used by the TES we must also make
3671     * sure they are not read by the TCS before demoting them to globals.
3672     */
3673    if (producer->info.stage == MESA_SHADER_TESS_CTRL)
3674       tcs_add_output_reads(producer, read);
3675 
3676    bool progress = false;
3677    progress =
3678       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_out, read);
3679    progress =
3680       remove_unused_io_vars(producer, consumer, prog, nir_var_shader_in, written) || progress;
3681 
3682    return progress;
3683 }
3684 
3685 static bool
should_add_varying_match_record(nir_variable * const input_var,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer)3686 should_add_varying_match_record(nir_variable *const input_var,
3687                                 struct gl_shader_program *prog,
3688                                 struct gl_linked_shader *producer,
3689                                 struct gl_linked_shader *consumer) {
3690 
3691    /* If a matching input variable was found, add this output (and the input) to
3692     * the set.  If this is a separable program and there is no consumer stage,
3693     * add the output.
3694     *
3695     * Always add TCS outputs. They are shared by all invocations
3696     * within a patch and can be used as shared memory.
3697     */
3698    return input_var || (prog->SeparateShader && consumer == NULL) ||
3699              producer->Stage == MESA_SHADER_TESS_CTRL;
3700 }
3701 
3702 /* This assigns some initial unoptimised varying locations so that our nir
3703  * optimisations can perform some initial optimisations and also does initial
3704  * processing of
3705  */
3706 static bool
assign_initial_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,struct varying_matches * vm)3707 assign_initial_varying_locations(const struct gl_constants *consts,
3708                                  const struct gl_extensions *exts,
3709                                  void *mem_ctx,
3710                                  struct gl_shader_program *prog,
3711                                  struct gl_linked_shader *producer,
3712                                  struct gl_linked_shader *consumer,
3713                                  unsigned num_xfb_decls,
3714                                  struct xfb_decl *xfb_decls,
3715                                  struct varying_matches *vm)
3716 {
3717    init_varying_matches(mem_ctx, vm, consts, exts,
3718                         producer ? producer->Stage : MESA_SHADER_NONE,
3719                         consumer ? consumer->Stage : MESA_SHADER_NONE,
3720                         prog->SeparateShader);
3721 
3722    struct hash_table *tfeedback_candidates =
3723          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3724                                  _mesa_key_string_equal);
3725    struct hash_table *consumer_inputs =
3726          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3727                                  _mesa_key_string_equal);
3728    struct hash_table *consumer_interface_inputs =
3729          _mesa_hash_table_create(mem_ctx, _mesa_hash_string,
3730                                  _mesa_key_string_equal);
3731    nir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
3732       NULL,
3733    };
3734 
3735    if (consumer)
3736       populate_consumer_input_sets(mem_ctx, consumer->Program->nir,
3737                                    consumer_inputs, consumer_interface_inputs,
3738                                    consumer_inputs_with_locations);
3739 
3740    if (producer) {
3741       nir_foreach_shader_out_variable(output_var, producer->Program->nir) {
3742          /* Only geometry shaders can use non-zero streams */
3743          assert(output_var->data.stream == 0 ||
3744                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
3745                  producer->Stage == MESA_SHADER_GEOMETRY));
3746 
3747          if (num_xfb_decls > 0) {
3748             /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
3749              * ("Vertex Shader Variables / Output Variables")
3750              *
3751              * "Each program object can specify a set of output variables from
3752              * one shader to be recorded in transform feedback mode (see
3753              * section 13.3). The variables that can be recorded are those
3754              * emitted by the first active shader, in order, from the
3755              * following list:
3756              *
3757              *  * geometry shader
3758              *  * tessellation evaluation shader
3759              *  * tessellation control shader
3760              *  * vertex shader"
3761              *
3762              * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
3763              * Variables / Output Variables") tessellation control shader is
3764              * not included in the stages list.
3765              */
3766             if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
3767 
3768                const struct glsl_type *type = output_var->data.from_named_ifc_block ?
3769                   output_var->interface_type : output_var->type;
3770                if (!output_var->data.patch && producer->Stage == MESA_SHADER_TESS_CTRL) {
3771                   assert(glsl_type_is_array(type));
3772                   type = glsl_get_array_element(type);
3773                }
3774 
3775                const struct glsl_struct_field *ifc_member = NULL;
3776                if (output_var->data.from_named_ifc_block) {
3777                   ifc_member =
3778                      glsl_get_struct_field_data(glsl_without_array(type),
3779                         glsl_get_field_index(glsl_without_array(type), output_var->name));
3780                }
3781 
3782                char *name;
3783                if (glsl_type_is_struct(glsl_without_array(type)) ||
3784                    (glsl_type_is_array(type) && glsl_type_is_array(glsl_get_array_element(type)))) {
3785                   type = output_var->type;
3786                   name = ralloc_strdup(NULL, output_var->name);
3787                } else if (glsl_type_is_interface(glsl_without_array(type))) {
3788                   name = ralloc_strdup(NULL, glsl_get_type_name(glsl_without_array(type)));
3789                } else  {
3790                   name = ralloc_strdup(NULL, output_var->name);
3791                }
3792 
3793                struct tfeedback_candidate_generator_state state;
3794                state.mem_ctx = mem_ctx;
3795                state.tfeedback_candidates = tfeedback_candidates;
3796                state.stage = producer->Stage;
3797                state.toplevel_var = output_var;
3798                state.varying_floats = 0;
3799                state.xfb_offset_floats = 0;
3800 
3801                tfeedback_candidate_generator(&state, &name, strlen(name), type,
3802                                              ifc_member);
3803                ralloc_free(name);
3804             }
3805          }
3806 
3807          nir_variable *const input_var =
3808             get_matching_input(mem_ctx, output_var, consumer_inputs,
3809                                consumer_interface_inputs,
3810                                consumer_inputs_with_locations);
3811 
3812          if (should_add_varying_match_record(input_var, prog, producer,
3813                                              consumer)) {
3814             varying_matches_record(mem_ctx, vm, output_var, input_var);
3815          }
3816 
3817          /* Only stream 0 outputs can be consumed in the next stage */
3818          if (input_var && output_var->data.stream != 0) {
3819             linker_error(prog, "output %s is assigned to stream=%d but "
3820                          "is linked to an input, which requires stream=0",
3821                          output_var->name, output_var->data.stream);
3822             return false;
3823          }
3824       }
3825    } else {
3826       /* If there's no producer stage, then this must be a separable program.
3827        * For example, we may have a program that has just a fragment shader.
3828        * Later this program will be used with some arbitrary vertex (or
3829        * geometry) shader program.  This means that locations must be assigned
3830        * for all the inputs.
3831        */
3832       nir_foreach_shader_in_variable(input_var, consumer->Program->nir) {
3833          varying_matches_record(mem_ctx, vm, NULL, input_var);
3834       }
3835    }
3836 
3837    for (unsigned i = 0; i < num_xfb_decls; ++i) {
3838       if (!xfb_decl_is_varying(&xfb_decls[i]))
3839          continue;
3840 
3841       const struct tfeedback_candidate *matched_candidate
3842          = xfb_decl_find_candidate(&xfb_decls[i], prog, tfeedback_candidates);
3843 
3844       if (matched_candidate == NULL)
3845          return false;
3846 
3847       /* There are two situations where a new output varying is needed:
3848        *
3849        *  - If varying packing is disabled for xfb and the current declaration
3850        *    is subscripting an array, whether the subscript is aligned or not.
3851        *    to preserve the rest of the array for the consumer.
3852        *
3853        *  - If a builtin variable needs to be copied to a new variable
3854        *    before its content is modified by another lowering pass (e.g.
3855        *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
3856        */
3857       const bool lowered =
3858          (vm->disable_xfb_packing && xfb_decls[i].is_subscripted) ||
3859          (matched_candidate->toplevel_var->data.explicit_location &&
3860           matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
3861           (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
3862           (consts->ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
3863               BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
3864 
3865       if (lowered) {
3866          nir_variable *new_var;
3867          struct tfeedback_candidate *new_candidate = NULL;
3868 
3869          new_var = gl_nir_lower_xfb_varying(producer->Program->nir,
3870                                             xfb_decls[i].orig_name,
3871                                             matched_candidate->toplevel_var);
3872          if (new_var == NULL)
3873             return false;
3874 
3875          /* Create new candidate and replace matched_candidate */
3876          new_candidate = rzalloc(mem_ctx, struct tfeedback_candidate);
3877          new_candidate->toplevel_var = new_var;
3878          new_candidate->type = new_var->type;
3879          new_candidate->struct_offset_floats = 0;
3880          new_candidate->xfb_offset_floats = 0;
3881          _mesa_hash_table_insert(tfeedback_candidates,
3882                                  ralloc_strdup(mem_ctx, new_var->name),
3883                                  new_candidate);
3884 
3885          xfb_decl_set_lowered_candidate(&xfb_decls[i], new_candidate);
3886          matched_candidate = new_candidate;
3887       }
3888 
3889       /* Mark as xfb varying */
3890       matched_candidate->toplevel_var->data.is_xfb = 1;
3891 
3892       /* Mark xfb varyings as always active */
3893       matched_candidate->toplevel_var->data.always_active_io = 1;
3894 
3895       /* Mark any corresponding inputs as always active also. We must do this
3896        * because we have a NIR pass that lowers vectors to scalars and another
3897        * that removes unused varyings.
3898        * We don't split varyings marked as always active because there is no
3899        * point in doing so. This means we need to mark both sides of the
3900        * interface as always active otherwise we will have a mismatch and
3901        * start removing things we shouldn't.
3902        */
3903       nir_variable *const input_var =
3904          get_matching_input(mem_ctx, matched_candidate->toplevel_var,
3905                             consumer_inputs, consumer_interface_inputs,
3906                             consumer_inputs_with_locations);
3907       if (input_var) {
3908          input_var->data.is_xfb = 1;
3909          input_var->data.always_active_io = 1;
3910       }
3911 
3912       /* Add the xfb varying to varying matches if it wasn't already added */
3913       if ((!should_add_varying_match_record(input_var, prog, producer,
3914                                             consumer) &&
3915            !matched_candidate->toplevel_var->data.is_xfb_only) || lowered) {
3916          matched_candidate->toplevel_var->data.is_xfb_only = 1;
3917          varying_matches_record(mem_ctx, vm, matched_candidate->toplevel_var,
3918                                 NULL);
3919       }
3920    }
3921 
3922    uint64_t reserved_out_slots = 0;
3923    if (producer)
3924       reserved_out_slots = reserved_varying_slot(producer, nir_var_shader_out);
3925 
3926    uint64_t reserved_in_slots = 0;
3927    if (consumer)
3928       reserved_in_slots = reserved_varying_slot(consumer, nir_var_shader_in);
3929 
3930    /* Assign temporary user varying locations. This is required for our NIR
3931     * varying optimisations to do their matching.
3932     */
3933    const uint64_t reserved_slots = reserved_out_slots | reserved_in_slots;
3934    varying_matches_assign_temp_locations(vm, prog, reserved_slots);
3935 
3936    for (unsigned i = 0; i < num_xfb_decls; ++i) {
3937       if (!xfb_decl_is_varying(&xfb_decls[i]))
3938          continue;
3939 
3940       xfb_decls[i].matched_candidate->initial_location =
3941          xfb_decls[i].matched_candidate->toplevel_var->data.location;
3942       xfb_decls[i].matched_candidate->initial_location_frac =
3943          xfb_decls[i].matched_candidate->toplevel_var->data.location_frac;
3944    }
3945 
3946    return true;
3947 }
3948 
3949 static void
link_shader_opts(struct varying_matches * vm,nir_shader * producer,nir_shader * consumer,struct gl_shader_program * prog,void * mem_ctx)3950 link_shader_opts(struct varying_matches *vm,
3951                  nir_shader *producer, nir_shader *consumer,
3952                  struct gl_shader_program *prog, void *mem_ctx)
3953 {
3954    /* If we can't pack the stage using this pass then we can't lower io to
3955     * scalar just yet. Instead we leave it to a later NIR linking pass that uses
3956     * ARB_enhanced_layout style packing to pack things further.
3957     *
3958     * Otherwise we might end up causing linking errors and perf regressions
3959     * because the new scalars will be assigned individual slots and can overflow
3960     * the available slots.
3961     */
3962    if (producer->options->lower_to_scalar && !vm->disable_varying_packing &&
3963       !vm->disable_xfb_packing) {
3964       NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
3965       NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
3966    }
3967 
3968    gl_nir_opts(producer);
3969    gl_nir_opts(consumer);
3970 
3971    if (nir_link_opt_varyings(producer, consumer))
3972       gl_nir_opts(consumer);
3973 
3974    NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL);
3975    NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
3976 
3977    if (remove_unused_varyings(producer, consumer, prog, mem_ctx)) {
3978       NIR_PASS(_, producer, nir_lower_global_vars_to_local);
3979       NIR_PASS(_, consumer, nir_lower_global_vars_to_local);
3980 
3981       gl_nir_opts(producer);
3982       gl_nir_opts(consumer);
3983 
3984       /* Optimizations can cause varyings to become unused.
3985        * nir_compact_varyings() depends on all dead varyings being removed so
3986        * we need to call nir_remove_dead_variables() again here.
3987        */
3988       NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out,
3989                  NULL);
3990       NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in,
3991                  NULL);
3992    }
3993 
3994    nir_link_varying_precision(producer, consumer);
3995 }
3996 
3997 /**
3998  * Assign locations for all variables that are produced in one pipeline stage
3999  * (the "producer") and consumed in the next stage (the "consumer").
4000  *
4001  * Variables produced by the producer may also be consumed by transform
4002  * feedback.
4003  *
4004  * \param num_xfb_decls is the number of declarations indicating
4005  *        variables that may be consumed by transform feedback.
4006  *
4007  * \param xfb_decls is a pointer to an array of xfb_decl objects
4008  *        representing the result of parsing the strings passed to
4009  *        glTransformFeedbackVaryings().  assign_location() will be called for
4010  *        each of these objects that matches one of the outputs of the
4011  *        producer.
4012  *
4013  * When num_xfb_decls is nonzero, it is permissible for the consumer to
4014  * be NULL.  In this case, varying locations are assigned solely based on the
4015  * requirements of transform feedback.
4016  */
4017 static bool
assign_final_varying_locations(const struct gl_constants * consts,const struct gl_extensions * exts,void * mem_ctx,struct gl_shader_program * prog,struct gl_linked_shader * producer,struct gl_linked_shader * consumer,unsigned num_xfb_decls,struct xfb_decl * xfb_decls,const uint64_t reserved_slots,struct varying_matches * vm)4018 assign_final_varying_locations(const struct gl_constants *consts,
4019                                const struct gl_extensions *exts,
4020                                void *mem_ctx,
4021                                struct gl_shader_program *prog,
4022                                struct gl_linked_shader *producer,
4023                                struct gl_linked_shader *consumer,
4024                                unsigned num_xfb_decls,
4025                                struct xfb_decl *xfb_decls,
4026                                const uint64_t reserved_slots,
4027                                struct varying_matches *vm)
4028 {
4029    init_varying_matches(mem_ctx, vm, consts, exts,
4030                         producer ? producer->Stage : MESA_SHADER_NONE,
4031                         consumer ? consumer->Stage : MESA_SHADER_NONE,
4032                         prog->SeparateShader);
4033 
4034    /* Regather varying matches as we ran optimisations and the previous pointers
4035     * are no longer valid.
4036     */
4037    if (producer) {
4038       nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
4039          if (var_out->data.location < VARYING_SLOT_VAR0 ||
4040              var_out->data.explicit_location)
4041             continue;
4042 
4043          if (vm->num_matches == vm->matches_capacity) {
4044             vm->matches_capacity *= 2;
4045             vm->matches = (struct match *)
4046                reralloc(mem_ctx, vm->matches, struct match,
4047                         vm->matches_capacity);
4048          }
4049 
4050          vm->matches[vm->num_matches].packing_class
4051             = varying_matches_compute_packing_class(var_out);
4052          vm->matches[vm->num_matches].packing_order
4053             = varying_matches_compute_packing_order(var_out);
4054 
4055          vm->matches[vm->num_matches].producer_var = var_out;
4056          vm->matches[vm->num_matches].consumer_var = NULL;
4057          vm->num_matches++;
4058       }
4059 
4060       /* Regather xfb varyings too */
4061       for (unsigned i = 0; i < num_xfb_decls; i++) {
4062          if (!xfb_decl_is_varying(&xfb_decls[i]))
4063             continue;
4064 
4065          /* Varying pointer was already reset */
4066          if (xfb_decls[i].matched_candidate->initial_location == -1)
4067             continue;
4068 
4069          bool UNUSED is_reset = false;
4070          bool UNUSED no_outputs = true;
4071          nir_foreach_shader_out_variable(var_out, producer->Program->nir) {
4072             no_outputs = false;
4073             assert(var_out->data.location != -1);
4074             if (var_out->data.location ==
4075                 xfb_decls[i].matched_candidate->initial_location &&
4076                 var_out->data.location_frac ==
4077                 xfb_decls[i].matched_candidate->initial_location_frac) {
4078                xfb_decls[i].matched_candidate->toplevel_var = var_out;
4079                xfb_decls[i].matched_candidate->initial_location = -1;
4080                is_reset = true;
4081                break;
4082             }
4083          }
4084          assert(is_reset || no_outputs);
4085       }
4086    }
4087 
4088    bool found_match = false;
4089    if (consumer) {
4090       nir_foreach_shader_in_variable(var_in, consumer->Program->nir) {
4091          if (var_in->data.location < VARYING_SLOT_VAR0 ||
4092              var_in->data.explicit_location)
4093             continue;
4094 
4095          found_match = false;
4096          for (unsigned i = 0; i < vm->num_matches; i++) {
4097             if (vm->matches[i].producer_var &&
4098                 (vm->matches[i].producer_var->data.location == var_in->data.location &&
4099                  vm->matches[i].producer_var->data.location_frac == var_in->data.location_frac)) {
4100 
4101                vm->matches[i].consumer_var = var_in;
4102                found_match = true;
4103                break;
4104             }
4105          }
4106          if (!found_match) {
4107             if (vm->num_matches == vm->matches_capacity) {
4108                vm->matches_capacity *= 2;
4109                vm->matches = (struct match *)
4110                   reralloc(mem_ctx, vm->matches, struct match,
4111                            vm->matches_capacity);
4112             }
4113 
4114             vm->matches[vm->num_matches].packing_class
4115                = varying_matches_compute_packing_class(var_in);
4116             vm->matches[vm->num_matches].packing_order
4117                = varying_matches_compute_packing_order(var_in);
4118 
4119             vm->matches[vm->num_matches].producer_var = NULL;
4120             vm->matches[vm->num_matches].consumer_var = var_in;
4121             vm->num_matches++;
4122          }
4123       }
4124    }
4125 
4126    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
4127    const unsigned slots_used =
4128       varying_matches_assign_locations(vm, prog, components, reserved_slots);
4129    varying_matches_store_locations(vm);
4130 
4131    for (unsigned i = 0; i < num_xfb_decls; ++i) {
4132       if (xfb_decl_is_varying(&xfb_decls[i])) {
4133          if (!xfb_decl_assign_location(&xfb_decls[i], consts, prog,
4134              vm->disable_varying_packing, vm->xfb_enabled))
4135             return false;
4136       }
4137    }
4138 
4139    if (producer) {
4140       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4141                                    nir_var_shader_out, 0, producer,
4142                                    vm->disable_varying_packing,
4143                                    vm->disable_xfb_packing, vm->xfb_enabled);
4144       nir_lower_pack(producer->Program->nir);
4145    }
4146 
4147    if (consumer) {
4148       unsigned consumer_vertices = 0;
4149       if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
4150          consumer_vertices = consumer->Program->nir->info.gs.vertices_in;
4151 
4152       gl_nir_lower_packed_varyings(consts, prog, mem_ctx, slots_used, components,
4153                                    nir_var_shader_in, consumer_vertices,
4154                                    consumer, vm->disable_varying_packing,
4155                                    vm->disable_xfb_packing, vm->xfb_enabled);
4156       nir_lower_pack(consumer->Program->nir);
4157    }
4158 
4159    return true;
4160 }
4161 
4162 static bool
check_against_output_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * producer,unsigned num_explicit_locations)4163 check_against_output_limit(const struct gl_constants *consts, gl_api api,
4164                            struct gl_shader_program *prog,
4165                            struct gl_linked_shader *producer,
4166                            unsigned num_explicit_locations)
4167 {
4168    unsigned output_vectors = num_explicit_locations;
4169    nir_foreach_shader_out_variable(var, producer->Program->nir) {
4170       if (!var->data.explicit_location &&
4171           var_counts_against_varying_limit(producer->Stage, var)) {
4172          /* outputs for fragment shader can't be doubles */
4173          output_vectors += glsl_count_attribute_slots(var->type, false);
4174       }
4175    }
4176 
4177    assert(producer->Stage != MESA_SHADER_FRAGMENT);
4178    unsigned max_output_components =
4179       consts->Program[producer->Stage].MaxOutputComponents;
4180 
4181    const unsigned output_components = output_vectors * 4;
4182    if (output_components > max_output_components) {
4183       if (api == API_OPENGLES2 || prog->IsES)
4184          linker_error(prog, "%s shader uses too many output vectors "
4185                       "(%u > %u)\n",
4186                       _mesa_shader_stage_to_string(producer->Stage),
4187                       output_vectors,
4188                       max_output_components / 4);
4189       else
4190          linker_error(prog, "%s shader uses too many output components "
4191                       "(%u > %u)\n",
4192                       _mesa_shader_stage_to_string(producer->Stage),
4193                       output_components,
4194                       max_output_components);
4195 
4196       return false;
4197    }
4198 
4199    return true;
4200 }
4201 
4202 static bool
check_against_input_limit(const struct gl_constants * consts,gl_api api,struct gl_shader_program * prog,struct gl_linked_shader * consumer,unsigned num_explicit_locations)4203 check_against_input_limit(const struct gl_constants *consts, gl_api api,
4204                           struct gl_shader_program *prog,
4205                           struct gl_linked_shader *consumer,
4206                           unsigned num_explicit_locations)
4207 {
4208    unsigned input_vectors = num_explicit_locations;
4209 
4210    nir_foreach_shader_in_variable(var, consumer->Program->nir) {
4211       if (!var->data.explicit_location &&
4212           var_counts_against_varying_limit(consumer->Stage, var)) {
4213          /* vertex inputs aren't varying counted */
4214          input_vectors += glsl_count_attribute_slots(var->type, false);
4215       }
4216    }
4217 
4218    assert(consumer->Stage != MESA_SHADER_VERTEX);
4219    unsigned max_input_components =
4220       consts->Program[consumer->Stage].MaxInputComponents;
4221 
4222    const unsigned input_components = input_vectors * 4;
4223    if (input_components > max_input_components) {
4224       if (api == API_OPENGLES2 || prog->IsES)
4225          linker_error(prog, "%s shader uses too many input vectors "
4226                       "(%u > %u)\n",
4227                       _mesa_shader_stage_to_string(consumer->Stage),
4228                       input_vectors,
4229                       max_input_components / 4);
4230       else
4231          linker_error(prog, "%s shader uses too many input components "
4232                       "(%u > %u)\n",
4233                       _mesa_shader_stage_to_string(consumer->Stage),
4234                       input_components,
4235                       max_input_components);
4236 
4237       return false;
4238    }
4239 
4240    return true;
4241 }
4242 
4243 /* Lower unset/unused inputs/outputs */
4244 static void
remove_unused_shader_inputs_and_outputs(struct gl_shader_program * prog,unsigned stage,nir_variable_mode mode)4245 remove_unused_shader_inputs_and_outputs(struct gl_shader_program *prog,
4246                                         unsigned stage, nir_variable_mode mode)
4247 {
4248    bool progress = false;
4249    nir_shader *shader = prog->_LinkedShaders[stage]->Program->nir;
4250 
4251    nir_foreach_variable_with_modes_safe(var, shader, mode) {
4252       if (!var->data.is_xfb_only && var->data.location == -1) {
4253          var->data.location = 0;
4254          var->data.mode = nir_var_shader_temp;
4255          progress = true;
4256       }
4257    }
4258 
4259    if (progress)
4260       fixup_vars_lowered_to_temp(shader, mode);
4261 }
4262 
4263 static bool
link_varyings(struct gl_shader_program * prog,unsigned first,unsigned last,const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,void * mem_ctx)4264 link_varyings(struct gl_shader_program *prog, unsigned first,
4265               unsigned last, const struct gl_constants *consts,
4266               const struct gl_extensions *exts, gl_api api, void *mem_ctx)
4267 {
4268    bool has_xfb_qualifiers = false;
4269    unsigned num_xfb_decls = 0;
4270    char **varying_names = NULL;
4271    bool compact_arrays = false;
4272    struct xfb_decl *xfb_decls = NULL;
4273 
4274    if (last > MESA_SHADER_FRAGMENT)
4275       return true;
4276 
4277    /* From the ARB_enhanced_layouts spec:
4278     *
4279     *    "If the shader used to record output variables for transform feedback
4280     *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
4281     *    qualifiers, the values specified by TransformFeedbackVaryings are
4282     *    ignored, and the set of variables captured for transform feedback is
4283     *    instead derived from the specified layout qualifiers."
4284     */
4285    for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
4286       /* Find last stage before fragment shader */
4287       if (prog->_LinkedShaders[i]) {
4288          has_xfb_qualifiers =
4289             process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
4290                                           prog, &num_xfb_decls,
4291                                           &varying_names,
4292                                           &compact_arrays);
4293          break;
4294       }
4295    }
4296 
4297    if (!has_xfb_qualifiers) {
4298       num_xfb_decls = prog->TransformFeedback.NumVarying;
4299       varying_names = prog->TransformFeedback.VaryingNames;
4300    }
4301 
4302    if (num_xfb_decls != 0) {
4303       /* From GL_EXT_transform_feedback:
4304        *   A program will fail to link if:
4305        *
4306        *   * the <count> specified by TransformFeedbackVaryingsEXT is
4307        *     non-zero, but the program object has no vertex or geometry
4308        *     shader;
4309        */
4310       if (first >= MESA_SHADER_FRAGMENT) {
4311          linker_error(prog, "Transform feedback varyings specified, but "
4312                       "no vertex, tessellation, or geometry shader is "
4313                       "present.\n");
4314          return false;
4315       }
4316 
4317       xfb_decls = rzalloc_array(mem_ctx, struct xfb_decl,
4318                                       num_xfb_decls);
4319       if (!parse_xfb_decls(consts, exts, prog, mem_ctx, num_xfb_decls,
4320                            varying_names, xfb_decls, compact_arrays))
4321          return false;
4322    }
4323 
4324    struct gl_linked_shader *linked_shader[MESA_SHADER_STAGES];
4325    unsigned num_shaders = 0;
4326 
4327    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4328       if (prog->_LinkedShaders[i])
4329          linked_shader[num_shaders++] = prog->_LinkedShaders[i];
4330    }
4331 
4332    struct varying_matches vm;
4333    if (last < MESA_SHADER_FRAGMENT &&
4334        (num_xfb_decls != 0 || prog->SeparateShader)) {
4335          struct gl_linked_shader *producer = prog->_LinkedShaders[last];
4336          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4337                                                producer, NULL, num_xfb_decls,
4338                                                xfb_decls, &vm))
4339             return false;
4340    }
4341 
4342    if (last <= MESA_SHADER_FRAGMENT && !prog->SeparateShader) {
4343       remove_unused_shader_inputs_and_outputs(prog, first, nir_var_shader_in);
4344       remove_unused_shader_inputs_and_outputs(prog, last, nir_var_shader_out);
4345    }
4346 
4347    if (prog->SeparateShader) {
4348       struct gl_linked_shader *consumer = linked_shader[0];
4349       if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog, NULL,
4350                                             consumer, 0, NULL, &vm))
4351          return false;
4352    }
4353 
4354    if (num_shaders == 1) {
4355       /* Linking shaders also optimizes them. Separate shaders, compute shaders
4356        * and shaders with a fixed-func VS or FS that don't need linking are
4357        * optimized here.
4358        */
4359       gl_nir_opts(linked_shader[0]->Program->nir);
4360    } else {
4361       /* Linking the stages in the opposite order (from fragment to vertex)
4362        * ensures that inter-shader outputs written to in an earlier stage
4363        * are eliminated if they are (transitively) not used in a later
4364        * stage.
4365        */
4366       for (int i = num_shaders - 2; i >= 0; i--) {
4367          unsigned stage_num_xfb_decls =
4368             linked_shader[i + 1]->Stage == MESA_SHADER_FRAGMENT ?
4369             num_xfb_decls : 0;
4370 
4371          if (!assign_initial_varying_locations(consts, exts, mem_ctx, prog,
4372                                                linked_shader[i],
4373                                                linked_shader[i + 1],
4374                                                stage_num_xfb_decls, xfb_decls,
4375                                                &vm))
4376             return false;
4377 
4378          /* Now that validation is done its safe to remove unused varyings. As
4379           * we have both a producer and consumer its safe to remove unused
4380           * varyings even if the program is a SSO because the stages are being
4381           * linked together i.e. we have a multi-stage SSO.
4382           */
4383          link_shader_opts(&vm, linked_shader[i]->Program->nir,
4384                           linked_shader[i + 1]->Program->nir,
4385                           prog, mem_ctx);
4386 
4387          remove_unused_shader_inputs_and_outputs(prog, linked_shader[i]->Stage,
4388                                                  nir_var_shader_out);
4389          remove_unused_shader_inputs_and_outputs(prog,
4390                                                  linked_shader[i + 1]->Stage,
4391                                                  nir_var_shader_in);
4392       }
4393    }
4394 
4395    if (!prog->SeparateShader) {
4396       /* If not SSO remove unused varyings from the first/last stage */
4397       NIR_PASS(_, prog->_LinkedShaders[first]->Program->nir,
4398                  nir_remove_dead_variables, nir_var_shader_in, NULL);
4399       NIR_PASS(_, prog->_LinkedShaders[last]->Program->nir,
4400                  nir_remove_dead_variables, nir_var_shader_out, NULL);
4401    } else {
4402       /* Sort inputs / outputs into a canonical order.  This is necessary so
4403        * that inputs / outputs of separable shaders will be assigned
4404        * predictable locations regardless of the order in which declarations
4405        * appeared in the shader source.
4406        */
4407       if (first != MESA_SHADER_VERTEX) {
4408          canonicalize_shader_io(prog->_LinkedShaders[first]->Program->nir,
4409                                 nir_var_shader_in);
4410       }
4411 
4412       if (last != MESA_SHADER_FRAGMENT) {
4413          canonicalize_shader_io(prog->_LinkedShaders[last]->Program->nir,
4414                                 nir_var_shader_out);
4415       }
4416    }
4417 
4418    /* If there is no fragment shader we need to set transform feedback.
4419     *
4420     * For SSO we also need to assign output locations.  We assign them here
4421     * because we need to do it for both single stage programs and multi stage
4422     * programs.
4423     */
4424    if (last < MESA_SHADER_FRAGMENT &&
4425        (num_xfb_decls != 0 || prog->SeparateShader)) {
4426       const uint64_t reserved_out_slots =
4427          reserved_varying_slot(prog->_LinkedShaders[last], nir_var_shader_out);
4428       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4429                                           prog->_LinkedShaders[last], NULL,
4430                                           num_xfb_decls, xfb_decls,
4431                                           reserved_out_slots, &vm))
4432          return false;
4433    }
4434 
4435    if (prog->SeparateShader) {
4436       struct gl_linked_shader *const sh = prog->_LinkedShaders[first];
4437 
4438       const uint64_t reserved_slots =
4439          reserved_varying_slot(sh, nir_var_shader_in);
4440 
4441       /* Assign input locations for SSO, output locations are already
4442        * assigned.
4443        */
4444       if (!assign_final_varying_locations(consts, exts, mem_ctx, prog,
4445                                           NULL /* producer */,
4446                                           sh /* consumer */,
4447                                           0 /* num_xfb_decls */,
4448                                           NULL /* xfb_decls */,
4449                                           reserved_slots, &vm))
4450          return false;
4451    }
4452 
4453    if (num_shaders == 1) {
4454       gl_nir_opt_dead_builtin_varyings(consts, api, prog, NULL, linked_shader[0],
4455                                        0, NULL);
4456       gl_nir_opt_dead_builtin_varyings(consts, api, prog, linked_shader[0], NULL,
4457                                        num_xfb_decls, xfb_decls);
4458    } else {
4459       /* Linking the stages in the opposite order (from fragment to vertex)
4460        * ensures that inter-shader outputs written to in an earlier stage
4461        * are eliminated if they are (transitively) not used in a later
4462        * stage.
4463        */
4464       int next = last;
4465       for (int i = next - 1; i >= 0; i--) {
4466          if (prog->_LinkedShaders[i] == NULL && i != 0)
4467             continue;
4468 
4469          struct gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
4470          struct gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
4471 
4472          gl_nir_opt_dead_builtin_varyings(consts, api, prog, sh_i, sh_next,
4473                                           next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4474                                           xfb_decls);
4475 
4476          const uint64_t reserved_out_slots =
4477             reserved_varying_slot(sh_i, nir_var_shader_out);
4478          const uint64_t reserved_in_slots =
4479             reserved_varying_slot(sh_next, nir_var_shader_in);
4480 
4481          if (!assign_final_varying_locations(consts, exts, mem_ctx, prog, sh_i,
4482                    sh_next, next == MESA_SHADER_FRAGMENT ? num_xfb_decls : 0,
4483                    xfb_decls, reserved_out_slots | reserved_in_slots, &vm))
4484             return false;
4485 
4486          /* This must be done after all dead varyings are eliminated. */
4487          if (sh_i != NULL) {
4488             unsigned slots_used = util_bitcount64(reserved_out_slots);
4489             if (!check_against_output_limit(consts, api, prog, sh_i, slots_used))
4490                return false;
4491          }
4492 
4493          unsigned slots_used = util_bitcount64(reserved_in_slots);
4494          if (!check_against_input_limit(consts, api, prog, sh_next, slots_used))
4495             return false;
4496 
4497          next = i;
4498       }
4499    }
4500 
4501    if (!store_tfeedback_info(consts, prog, num_xfb_decls, xfb_decls,
4502                              has_xfb_qualifiers, mem_ctx))
4503       return false;
4504 
4505    return prog->data->LinkStatus != LINKING_FAILURE;
4506 }
4507 
4508 bool
gl_assign_attribute_or_color_locations(const struct gl_constants * consts,struct gl_shader_program * prog)4509 gl_assign_attribute_or_color_locations(const struct gl_constants *consts,
4510                                        struct gl_shader_program *prog)
4511 {
4512    void *mem_ctx = ralloc_context(NULL);
4513 
4514    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4515                                             MESA_SHADER_VERTEX, true)) {
4516       ralloc_free(mem_ctx);
4517       return false;
4518    }
4519 
4520    if (!assign_attribute_or_color_locations(mem_ctx, prog, consts,
4521                                             MESA_SHADER_FRAGMENT, true)) {
4522       ralloc_free(mem_ctx);
4523       return false;
4524    }
4525 
4526    ralloc_free(mem_ctx);
4527    return true;
4528 }
4529 
4530 bool
gl_nir_link_varyings(const struct gl_constants * consts,const struct gl_extensions * exts,gl_api api,struct gl_shader_program * prog)4531 gl_nir_link_varyings(const struct gl_constants *consts,
4532                      const struct gl_extensions *exts,
4533                      gl_api api, struct gl_shader_program *prog)
4534 {
4535    void *mem_ctx = ralloc_context(NULL);
4536 
4537    unsigned first, last;
4538 
4539    MESA_TRACE_FUNC();
4540 
4541    first = MESA_SHADER_STAGES;
4542    last = 0;
4543 
4544    /* We need to initialise the program resource list because the varying
4545     * packing pass my start inserting varyings onto the list.
4546     */
4547    init_program_resource_list(prog);
4548 
4549    /* Determine first and last stage. */
4550    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4551       if (!prog->_LinkedShaders[i])
4552          continue;
4553       if (first == MESA_SHADER_STAGES)
4554          first = i;
4555       last = i;
4556    }
4557 
4558    bool r = link_varyings(prog, first, last, consts, exts, api, mem_ctx);
4559    if (r) {
4560       for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
4561          if (!prog->_LinkedShaders[i])
4562             continue;
4563 
4564          /* Check for transform feedback varyings specified via the API */
4565          prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings =
4566             prog->TransformFeedback.NumVarying > 0;
4567 
4568          /* Check for transform feedback varyings specified in the Shader */
4569          if (prog->last_vert_prog) {
4570             prog->_LinkedShaders[i]->Program->nir->info.has_transform_feedback_varyings |=
4571                prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;
4572          }
4573       }
4574 
4575       /* Assign NIR XFB info to the last stage before the fragment shader */
4576       for (int stage = MESA_SHADER_FRAGMENT - 1; stage >= 0; stage--) {
4577          struct gl_linked_shader *sh = prog->_LinkedShaders[stage];
4578          if (sh && stage != MESA_SHADER_TESS_CTRL) {
4579             sh->Program->nir->xfb_info =
4580                gl_to_nir_xfb_info(sh->Program->sh.LinkedTransformFeedback,
4581                                   sh->Program->nir);
4582             break;
4583          }
4584       }
4585 
4586       /* Lower IO and thoroughly optimize and compact varyings. */
4587       gl_nir_lower_optimize_varyings(consts, prog, false);
4588    }
4589 
4590    ralloc_free(mem_ctx);
4591    return r;
4592 }
4593