xref: /aosp_15_r20/external/mesa3d/src/compiler/glsl/gl_nir_lower_packed_varyings.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2011 Intel Corporation
3  * Copyright © 2022 Valve Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * This lowering pass generates GLSL code that manually packs varyings into
27  * vec4 slots, for the benefit of back-ends that don't support packed varyings
28  * natively.
29  *
30  * For example, the following shader:
31  *
32  *   out mat3x2 foo;  // location=4, location_frac=0
33  *   out vec3 bar[2]; // location=5, location_frac=2
34  *
35  *   main()
36  *   {
37  *     ...
38  *   }
39  *
40  * Is rewritten to:
41  *
42  *   mat3x2 foo;
43  *   vec3 bar[2];
44  *   out vec4 packed4; // location=4, location_frac=0
45  *   out vec4 packed5; // location=5, location_frac=0
46  *   out vec4 packed6; // location=6, location_frac=0
47  *
48  *   main()
49  *   {
50  *     ...
51  *     packed4.xy = foo[0];
52  *     packed4.zw = foo[1];
53  *     packed5.xy = foo[2];
54  *     packed5.zw = bar[0].xy;
55  *     packed6.x = bar[0].z;
56  *     packed6.yzw = bar[1];
57  *   }
58  *
59  * This lowering pass properly handles "double parking" of a varying vector
60  * across two varying slots.  For example, in the code above, two of the
61  * components of bar[0] are stored in packed5, and the remaining component is
62  * stored in packed6.
63  *
64  * Note that in theory, the extra instructions may cause some loss of
65  * performance.  However, hopefully in most cases the performance loss will
66  * either be absorbed by a later optimization pass, or it will be offset by
67  * memory bandwidth savings (because fewer varyings are used).
68  *
69  * This lowering pass also packs flat floats, ints, and uints together, by
70  * using ivec4 as the base type of flat "varyings", and using appropriate
71  * casts to convert floats and uints into ints.
72  *
73  * This lowering pass also handles varyings whose type is a struct or an array
74  * of struct.  Structs are packed in order and with no gaps, so there may be a
75  * performance penalty due to structure elements being double-parked.
76  *
77  * Lowering of geometry shader inputs is slightly more complex, since geometry
78  * inputs are always arrays, so we need to lower arrays to arrays.  For
79  * example, the following input:
80  *
81  *   in struct Foo {
82  *     float f;
83  *     vec3 v;
84  *     vec2 a[2];
85  *   } arr[3];         // location=4, location_frac=0
86  *
87  * Would get lowered like this if it occurred in a fragment shader:
88  *
89  *   struct Foo {
90  *     float f;
91  *     vec3 v;
92  *     vec2 a[2];
93  *   } arr[3];
94  *   in vec4 packed4;  // location=4, location_frac=0
95  *   in vec4 packed5;  // location=5, location_frac=0
96  *   in vec4 packed6;  // location=6, location_frac=0
97  *   in vec4 packed7;  // location=7, location_frac=0
98  *   in vec4 packed8;  // location=8, location_frac=0
99  *   in vec4 packed9;  // location=9, location_frac=0
100  *
101  *   main()
102  *   {
103  *     arr[0].f = packed4.x;
104  *     arr[0].v = packed4.yzw;
105  *     arr[0].a[0] = packed5.xy;
106  *     arr[0].a[1] = packed5.zw;
107  *     arr[1].f = packed6.x;
108  *     arr[1].v = packed6.yzw;
109  *     arr[1].a[0] = packed7.xy;
110  *     arr[1].a[1] = packed7.zw;
111  *     arr[2].f = packed8.x;
112  *     arr[2].v = packed8.yzw;
113  *     arr[2].a[0] = packed9.xy;
114  *     arr[2].a[1] = packed9.zw;
115  *     ...
116  *   }
117  *
118  * But it would get lowered like this if it occurred in a geometry shader:
119  *
120  *   struct Foo {
121  *     float f;
122  *     vec3 v;
123  *     vec2 a[2];
124  *   } arr[3];
125  *   in vec4 packed4[3];  // location=4, location_frac=0
126  *   in vec4 packed5[3];  // location=5, location_frac=0
127  *
128  *   main()
129  *   {
130  *     arr[0].f = packed4[0].x;
131  *     arr[0].v = packed4[0].yzw;
132  *     arr[0].a[0] = packed5[0].xy;
133  *     arr[0].a[1] = packed5[0].zw;
134  *     arr[1].f = packed4[1].x;
135  *     arr[1].v = packed4[1].yzw;
136  *     arr[1].a[0] = packed5[1].xy;
137  *     arr[1].a[1] = packed5[1].zw;
138  *     arr[2].f = packed4[2].x;
139  *     arr[2].v = packed4[2].yzw;
140  *     arr[2].a[0] = packed5[2].xy;
141  *     arr[2].a[1] = packed5[2].zw;
142  *     ...
143  *   }
144  */
145 
146 #include "nir.h"
147 #include "nir_builder.h"
148 #include "gl_nir.h"
149 #include "gl_nir_linker.h"
150 #include "program/prog_instruction.h"
151 #include "main/mtypes.h"
152 
153 /**
154  * Visitor that performs varying packing.  For each varying declared in the
155  * shader, this visitor determines whether it needs to be packed.  If so, it
156  * demotes it to an ordinary global, creates new packed varyings, and
157  * generates assignments to convert between the original varying and the
158  * packed varying.
159  */
160 struct lower_packed_varyings_state
161 {
162    const struct gl_constants *consts;
163 
164    struct gl_shader_program *prog;
165 
166    /**
167     * Memory context used to allocate new instructions for the shader.
168     */
169    void *mem_ctx;
170 
171    /**
172     * Number of generic varying slots which are used by this shader.  This is
173     * used to allocate temporary intermediate data structures.  If any varying
174     * used by this shader has a location greater than or equal to
175     * VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
176     */
177    unsigned locations_used;
178 
179    const uint8_t* components;
180 
181    /**
182     * Array of pointers to the packed varyings that have been created for each
183     * generic varying slot.  NULL entries in this array indicate varying slots
184     * for which a packed varying has not been created yet.
185     */
186    nir_variable **packed_varyings;
187 
188    nir_shader *shader;
189 
190    nir_function_impl *impl;
191 
192    nir_builder b;
193 
194    /**
195     * Type of varying which is being lowered in this pass (either
196     * nir_var_shader_in or ir_var_shader_out).
197     */
198    nir_variable_mode mode;
199 
200    /**
201     * If we are currently lowering geometry shader inputs, the number of input
202     * vertices the geometry shader accepts.  Otherwise zero.
203     */
204    unsigned gs_input_vertices;
205 
206    bool disable_varying_packing;
207    bool disable_xfb_packing;
208    bool xfb_enabled;
209    bool ifc_exposed_to_query_api;
210 };
211 
212 bool
lower_packed_varying_needs_lowering(nir_shader * shader,nir_variable * var,bool xfb_enabled,bool disable_xfb_packing,bool disable_varying_packing)213 lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
214                                     bool xfb_enabled, bool disable_xfb_packing,
215                                     bool disable_varying_packing)
216 {
217    /* Things composed of vec4's, varyings with explicitly assigned
218     * locations or varyings marked as must_be_shader_input (which might be used
219     * by interpolateAt* functions) shouldn't be lowered. Everything else can be.
220     */
221    if (var->data.explicit_location || var->data.must_be_shader_input)
222       return false;
223 
224    const struct glsl_type *type = var->type;
225    if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
226       assert(glsl_type_is_array(type));
227       type = glsl_get_array_element(type);
228    }
229 
230    /* Some drivers (e.g. panfrost) don't support packing of transform
231     * feedback varyings.
232     */
233    if (disable_xfb_packing && var->data.is_xfb &&
234        !(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
235        xfb_enabled)
236       return false;
237 
238    /* Override disable_varying_packing if the var is only used by transform
239     * feedback. Also override it if transform feedback is enabled and the
240     * variable is an array, struct or matrix as the elements of these types
241     * will always have the same interpolation and therefore are safe to pack.
242     */
243    if (disable_varying_packing && !var->data.is_xfb_only &&
244        !((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
245          xfb_enabled))
246       return false;
247 
248    type = glsl_without_array(type);
249    if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type))
250       return false;
251    return true;
252 }
253 
254 /**
255  * If no packed varying has been created for the given varying location yet,
256  * create it and add it to the shader.
257  *
258  * The newly created varying inherits its interpolation parameters from \c
259  * unpacked_var.  Its base type is ivec4 if we are lowering a flat varying,
260  * vec4 otherwise.
261  */
262 static void
create_or_update_packed_varying(struct lower_packed_varyings_state * state,nir_variable * unpacked_var,const char * name,unsigned location,unsigned slot,unsigned vertex_index)263 create_or_update_packed_varying(struct lower_packed_varyings_state *state,
264                                 nir_variable *unpacked_var,
265                                 const char *name, unsigned location,
266                                 unsigned slot, unsigned vertex_index)
267 {
268    assert(slot < state->locations_used);
269    if (state->packed_varyings[slot] == NULL) {
270       assert(state->components[slot] != 0);
271       assert(name);
272 
273       nir_variable *packed_var = rzalloc(state->shader, nir_variable);
274       packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name);
275       packed_var->data.mode = state->mode;
276 
277       bool is_interpolation_flat =
278          unpacked_var->data.interpolation == INTERP_MODE_FLAT ||
279          glsl_contains_integer(unpacked_var->type) ||
280          glsl_contains_double(unpacked_var->type);
281 
282       const struct glsl_type *packed_type;
283       if (is_interpolation_flat)
284          packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]);
285       else
286          packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]);
287 
288       if (state->gs_input_vertices != 0) {
289          packed_type =
290             glsl_array_type(packed_type, state->gs_input_vertices, 0);
291       }
292 
293       packed_var->type = packed_type;
294       packed_var->data.centroid = unpacked_var->data.centroid;
295       packed_var->data.sample = unpacked_var->data.sample;
296       packed_var->data.patch = unpacked_var->data.patch;
297       packed_var->data.interpolation = is_interpolation_flat ?
298          (unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation;
299       packed_var->data.location = location;
300       packed_var->data.precision = unpacked_var->data.precision;
301       packed_var->data.always_active_io = unpacked_var->data.always_active_io;
302       packed_var->data.stream = NIR_STREAM_PACKED;
303 
304       nir_shader_add_variable(state->shader, packed_var);
305       state->packed_varyings[slot] = packed_var;
306    } else {
307       nir_variable *var = state->packed_varyings[slot];
308 
309       /* The slot needs to be marked as always active if any variable that got
310        * packed there was.
311        */
312       var->data.always_active_io |= unpacked_var->data.always_active_io;
313 
314       /* For geometry shader inputs, only update the packed variable name the
315        * first time we visit each component.
316        */
317       if (state->gs_input_vertices == 0 || vertex_index == 0) {
318          assert(name);
319          ralloc_asprintf_append((char **) &var->name, ",%s", name);
320       }
321    }
322 }
323 
324 /**
325  * Retrieve the packed varying corresponding to the given varying location.
326  *
327  * \param vertex_index: if we are lowering geometry shader inputs, then this
328  * indicates which vertex we are currently lowering.  Otherwise it is ignored.
329  */
330 static nir_deref_instr *
get_packed_varying_deref(struct lower_packed_varyings_state * state,unsigned location,nir_variable * unpacked_var,const char * name,unsigned vertex_index)331 get_packed_varying_deref(struct lower_packed_varyings_state *state,
332                          unsigned location, nir_variable *unpacked_var,
333                          const char *name, unsigned vertex_index)
334 {
335    unsigned slot = location - VARYING_SLOT_VAR0;
336    assert(slot < state->locations_used);
337 
338    create_or_update_packed_varying(state, unpacked_var, name, location, slot,
339                                    vertex_index);
340 
341    nir_deref_instr *deref =
342       nir_build_deref_var(&state->b, state->packed_varyings[slot]);
343 
344    if (state->gs_input_vertices != 0) {
345       /* When lowering GS inputs, the packed variable is an array, so we need
346        * to dereference it using vertex_index.
347        */
348       nir_load_const_instr *c_idx =
349          nir_load_const_instr_create(state->b.shader, 1, 32);
350       c_idx->value[0].u32 = vertex_index;
351       nir_builder_instr_insert(&state->b, &c_idx->instr);
352 
353       deref = nir_build_deref_array(&state->b, deref, &c_idx->def);
354    }
355 
356    return deref;
357 }
358 
359 struct packing_store_values {
360    bool is_64bit;
361    unsigned writemasks[2];
362    nir_def *values[2];
363    nir_deref_instr *deref;
364 };
365 
366 /**
367  * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
368  * bitcasts if necessary to match up types.
369  *
370  * This function is called when packing varyings.
371  */
372 static struct packing_store_values *
bitwise_assign_pack(struct lower_packed_varyings_state * state,nir_deref_instr * packed_deref,nir_deref_instr * unpacked_deref,const struct glsl_type * unpacked_type,nir_def * value,unsigned writemask)373 bitwise_assign_pack(struct lower_packed_varyings_state *state,
374                     nir_deref_instr *packed_deref,
375                     nir_deref_instr *unpacked_deref,
376                     const struct glsl_type *unpacked_type,
377                     nir_def *value,
378                     unsigned writemask)
379 
380 {
381    nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
382 
383    enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type);
384    enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
385 
386    struct packing_store_values *store_state =
387       calloc(1, sizeof(struct packing_store_values));
388 
389    if (unpacked_base_type != packed_base_type) {
390       /* Since we only mix types in flat varyings, and we always store flat
391        * varyings as type ivec4, we need only produce conversions from (uint
392        * or float) to int.
393        */
394       assert(packed_base_type == GLSL_TYPE_INT);
395       switch (unpacked_base_type) {
396       case GLSL_TYPE_UINT:
397       case GLSL_TYPE_FLOAT:
398          value = nir_mov(&state->b, value);
399          break;
400       case GLSL_TYPE_DOUBLE:
401       case GLSL_TYPE_UINT64:
402       case GLSL_TYPE_INT64:
403          assert(glsl_get_vector_elements(unpacked_type) <= 2);
404          if (glsl_get_vector_elements(unpacked_type) == 2) {
405             assert(glsl_get_vector_elements(packed_var->type) == 4);
406 
407             unsigned swiz_x = 0;
408             unsigned writemask = 0x3;
409             nir_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
410 
411             store_state->is_64bit = true;
412             store_state->deref = packed_deref;
413             store_state->values[0] = nir_unpack_64_2x32(&state->b, swizzle);
414             store_state->writemasks[0] = writemask;
415 
416             unsigned swiz_y = 1;
417             writemask = 0xc;
418             swizzle = nir_swizzle(&state->b, value, &swiz_y, 1);
419 
420             store_state->deref = packed_deref;
421             store_state->values[1] = nir_unpack_64_2x32(&state->b, swizzle);
422             store_state->writemasks[1] = writemask;
423             return store_state;
424          } else {
425             value = nir_unpack_64_2x32(&state->b, value);
426          }
427          break;
428       case GLSL_TYPE_SAMPLER:
429       case GLSL_TYPE_IMAGE:
430          value = nir_unpack_64_2x32(&state->b, value);
431          break;
432       default:
433          assert(!"Unexpected type conversion while lowering varyings");
434          break;
435       }
436    }
437 
438    store_state->deref = packed_deref;
439    store_state->values[0] = value;
440    store_state->writemasks[0] = writemask;
441 
442    return store_state;
443 }
444 
445 /**
446  * This function is called when unpacking varyings.
447  */
448 static struct packing_store_values *
bitwise_assign_unpack(struct lower_packed_varyings_state * state,nir_deref_instr * unpacked_deref,nir_deref_instr * packed_deref,const struct glsl_type * unpacked_type,nir_def * value,unsigned writemask)449 bitwise_assign_unpack(struct lower_packed_varyings_state *state,
450                       nir_deref_instr *unpacked_deref,
451                       nir_deref_instr *packed_deref,
452                       const struct glsl_type *unpacked_type,
453                       nir_def *value, unsigned writemask)
454 {
455    nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
456 
457    const struct glsl_type *packed_type = glsl_without_array(packed_var->type);
458    enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type);
459    enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
460 
461    struct packing_store_values *store_state =
462       calloc(1, sizeof(struct packing_store_values));
463 
464    if (unpacked_base_type != packed_base_type) {
465       /* Since we only mix types in flat varyings, and we always store flat
466        * varyings as type ivec4, we need only produce conversions from int to
467        * (uint or float).
468        */
469       assert(packed_base_type == GLSL_TYPE_INT);
470 
471       switch (unpacked_base_type) {
472       case GLSL_TYPE_UINT:
473       case GLSL_TYPE_FLOAT:
474          value = nir_mov(&state->b, value);
475          break;
476       case GLSL_TYPE_DOUBLE:
477       case GLSL_TYPE_UINT64:
478       case GLSL_TYPE_INT64:
479          assert(glsl_get_vector_elements(unpacked_type) <= 2);
480          if (glsl_get_vector_elements(unpacked_type) == 2) {
481             assert(glsl_get_vector_elements(packed_type) == 4);
482 
483             unsigned swiz_xy[2] = {0, 1};
484             writemask = 1 << (ffs(writemask) - 1);
485 
486             store_state->is_64bit = true;
487             store_state->deref = unpacked_deref;
488             store_state->values[0] =
489                nir_pack_64_2x32(&state->b,
490                                 nir_swizzle(&state->b, value, swiz_xy, 2));
491             store_state->writemasks[0] = writemask;
492 
493             unsigned swiz_zw[2] = {2, 3};
494             writemask = writemask << 1;
495 
496             store_state->deref = unpacked_deref;
497             store_state->values[1] =
498                nir_pack_64_2x32(&state->b,
499                                 nir_swizzle(&state->b, value, swiz_zw, 2));
500             store_state->writemasks[1] = writemask;
501 
502             return store_state;
503          } else {
504             value = nir_pack_64_2x32(&state->b, value);
505          }
506          break;
507       case GLSL_TYPE_SAMPLER:
508       case GLSL_TYPE_IMAGE:
509          value = nir_pack_64_2x32(&state->b, value);
510          break;
511       default:
512          assert(!"Unexpected type conversion while lowering varyings");
513          break;
514       }
515    }
516 
517    store_state->deref = unpacked_deref;
518    store_state->values[0] = value;
519    store_state->writemasks[0] = writemask;
520 
521    return store_state;
522 }
523 
524 static void
create_store_deref(struct lower_packed_varyings_state * state,nir_deref_instr * deref,nir_def * value,unsigned writemask,bool is_64bit)525 create_store_deref(struct lower_packed_varyings_state *state,
526                    nir_deref_instr *deref, nir_def *value,
527                    unsigned writemask, bool is_64bit)
528 {
529    /* If dest and value have different number of components pack the srcs
530     * into a vector.
531     */
532    const struct glsl_type *type = glsl_without_array(deref->type);
533    unsigned comps = glsl_get_vector_elements(type);
534    if (value->num_components != comps) {
535       nir_def *srcs[4];
536 
537       unsigned comp = 0;
538       for (unsigned i = 0; i < comps; i++) {
539          if (writemask & (1 << i)) {
540             if (is_64bit && state->mode == nir_var_shader_in)
541                srcs[i] = value;
542             else
543                srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
544             comp++;
545          } else {
546             srcs[i] = nir_undef(&state->b, 1,
547                                     glsl_type_is_64bit(type) ? 64 : 32);
548          }
549       }
550       value = nir_vec(&state->b, srcs, comps);
551    }
552 
553    nir_store_deref(&state->b, deref, value, writemask);
554 }
555 
556 static unsigned
557 lower_varying(struct lower_packed_varyings_state *state,
558               nir_def *rhs_swizzle, unsigned writemask,
559               const struct glsl_type *type, unsigned fine_location,
560               nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
561               const char *name, bool gs_input_toplevel, unsigned vertex_index);
562 
563 /**
564  * Recursively pack or unpack a varying for which we need to iterate over its
565  * constituent elements.
566  * This takes care of both arrays and matrices.
567  *
568  * \param gs_input_toplevel should be set to true if we are lowering geometry
569  * shader inputs, and we are currently lowering the whole input variable
570  * (i.e. we are lowering the array whose index selects the vertex).
571  *
572  * \param vertex_index: if we are lowering geometry shader inputs, and the
573  * level of the array that we are currently lowering is *not* the top level,
574  * then this indicates which vertex we are currently lowering.  Otherwise it
575  * is ignored.
576  */
577 static unsigned
lower_arraylike(struct lower_packed_varyings_state * state,nir_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)578 lower_arraylike(struct lower_packed_varyings_state *state,
579                 nir_def *rhs_swizzle, unsigned writemask,
580                 const struct glsl_type *type, unsigned fine_location,
581                 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
582                 const char *name, bool gs_input_toplevel, unsigned vertex_index)
583 {
584    unsigned array_size = glsl_get_length(type);
585    unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
586    if (array_size * dmul + fine_location % 4 > 4) {
587       fine_location = ALIGN_POT(fine_location, dmul);
588    }
589 
590    type = glsl_get_array_element(type);
591    for (unsigned i = 0; i < array_size; i++) {
592       nir_load_const_instr *c_idx =
593          nir_load_const_instr_create(state->b.shader, 1, 32);
594       c_idx->value[0].u32 = i;
595       nir_builder_instr_insert(&state->b, &c_idx->instr);
596 
597       nir_deref_instr *unpacked_array_deref =
598          nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def);
599 
600       if (gs_input_toplevel) {
601          /* Geometry shader inputs are a special case.  Instead of storing
602           * each element of the array at a different location, all elements
603           * are at the same location, but with a different vertex index.
604           */
605          (void) lower_varying(state, rhs_swizzle, writemask, type, fine_location,
606                               unpacked_var, unpacked_array_deref, name, false, i);
607       } else {
608          char *subscripted_name = name ?
609             ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL;
610          fine_location =
611             lower_varying(state, rhs_swizzle, writemask, type, fine_location,
612                           unpacked_var, unpacked_array_deref,
613                           subscripted_name, false, vertex_index);
614       }
615    }
616 
617    return fine_location;
618 }
619 
620 /**
621  * Recursively pack or unpack the given varying (or portion of a varying) by
622  * traversing all of its constituent vectors.
623  *
624  * \param fine_location is the location where the first constituent vector
625  * should be packed--the word "fine" indicates that this location is expressed
626  * in multiples of a float, rather than multiples of a vec4 as is used
627  * elsewhere in Mesa.
628  *
629  * \param gs_input_toplevel should be set to true if we are lowering geometry
630  * shader inputs, and we are currently lowering the whole input variable
631  * (i.e. we are lowering the array whose index selects the vertex).
632  *
633  * \param vertex_index: if we are lowering geometry shader inputs, and the
634  * level of the array that we are currently lowering is *not* the top level,
635  * then this indicates which vertex we are currently lowering.  Otherwise it
636  * is ignored.
637  *
638  * \return the location where the next constituent vector (after this one)
639  * should be packed.
640  */
641 static unsigned
lower_varying(struct lower_packed_varyings_state * state,nir_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)642 lower_varying(struct lower_packed_varyings_state *state,
643               nir_def *rhs_swizzle, unsigned writemask,
644               const struct glsl_type *type, unsigned fine_location,
645               nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
646               const char *name, bool gs_input_toplevel, unsigned vertex_index)
647 {
648    unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1;
649    /* When gs_input_toplevel is set, we should be looking at a geometry shader
650     * input array.
651     */
652    assert(!gs_input_toplevel || glsl_type_is_array(type));
653 
654    if (glsl_type_is_struct(type)) {
655       unsigned struct_len = glsl_get_length(type);
656       for (unsigned i = 0; i < struct_len; i++) {
657          const char *field_name = glsl_get_struct_elem_name(type, i);
658          char *deref_name = name ?
659             ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) :
660             NULL;
661          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
662 
663          nir_deref_instr *unpacked_struct_deref =
664             nir_build_deref_struct(&state->b, unpacked_var_deref, i);
665          fine_location = lower_varying(state, rhs_swizzle, writemask, field_type,
666                                        fine_location, unpacked_var,
667                                        unpacked_struct_deref, deref_name,
668                                        false, vertex_index);
669       }
670 
671       return fine_location;
672    } else if (glsl_type_is_array(type)) {
673       /* Arrays are packed/unpacked by considering each array element in
674        * sequence.
675        */
676       return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
677                              unpacked_var, unpacked_var_deref, name,
678                              gs_input_toplevel, vertex_index);
679    } else if (glsl_type_is_matrix(type)) {
680       /* Matrices are packed/unpacked by considering each column vector in
681        * sequence.
682        */
683       return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
684                              unpacked_var, unpacked_var_deref, name, false,
685                              vertex_index);
686    } else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) {
687       /* We don't have code to split up 64bit variable between two
688        * varying slots, instead we add padding if necessary.
689        */
690       unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
691       if (aligned_fine_location != fine_location) {
692          return lower_varying(state, rhs_swizzle, writemask, type,
693                               aligned_fine_location, unpacked_var,
694                               unpacked_var_deref, name, false, vertex_index);
695       }
696 
697       /* This vector is going to be "double parked" across two varying slots,
698        * so handle it as two separate assignments. For doubles, a dvec3/dvec4
699        * can end up being spread over 3 slots. However the second splitting
700        * will happen later, here we just always want to split into 2.
701        */
702       unsigned left_components, right_components;
703       unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
704       unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
705       char left_swizzle_name[4] = { 0, 0, 0, 0 };
706       char right_swizzle_name[4] = { 0, 0, 0, 0 };
707 
708       left_components = 4 - fine_location % 4;
709       if (glsl_type_is_64bit(type)) {
710          left_components /= 2;
711          assert(left_components > 0);
712       }
713       right_components = glsl_get_vector_elements(type) - left_components;
714 
715       /* If set use previously set writemask to offset the following
716        * swizzle/writemasks. This can happen when spliting a dvec, etc across
717        * slots.
718        */
719       unsigned offset = 0;
720       if (writemask) {
721          for (unsigned i = 0; i < left_components; i++) {
722             /* Keep going until we find the first component of the write */
723             if (!(writemask & (1 << i))) {
724                offset++;
725             } else
726                break;
727          }
728       }
729 
730       for (unsigned i = 0; i < left_components; i++) {
731          left_swizzle_values[i] = i + offset;
732          left_swizzle_name[i] = "xyzw"[i + offset];
733       }
734       for (unsigned i = 0; i < right_components; i++) {
735          right_swizzle_values[i] = i + left_components + offset;
736          right_swizzle_name[i] = "xyzw"[i + left_components + offset];
737       }
738 
739       if (left_components) {
740          char *left_name = name ?
741             ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
742             NULL;
743 
744          nir_def *left_swizzle = NULL;
745          unsigned left_writemask = ~0u;
746          if (state->mode == nir_var_shader_out) {
747             nir_def *ssa_def = rhs_swizzle ?
748                rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
749             left_swizzle =
750                nir_swizzle(&state->b, ssa_def,
751                            left_swizzle_values, left_components);
752          } else {
753             left_writemask = ((1 << left_components) - 1) << offset;
754          }
755 
756          const struct glsl_type *swiz_type =
757             glsl_vector_type(glsl_get_base_type(type), left_components);
758          fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type,
759                                        fine_location, unpacked_var, unpacked_var_deref,
760                                        left_name, false, vertex_index);
761       } else {
762          /* Top up the fine location to the next slot */
763          fine_location++;
764       }
765 
766       char *right_name = name ?
767          ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
768          NULL;
769 
770       nir_def *right_swizzle = NULL;
771       unsigned right_writemask = ~0u;
772       if (state->mode == nir_var_shader_out) {
773         nir_def *ssa_def = rhs_swizzle ?
774            rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
775         right_swizzle =
776            nir_swizzle(&state->b, ssa_def,
777                        right_swizzle_values, right_components);
778       } else {
779          right_writemask = ((1 << right_components) - 1) << (left_components + offset);
780       }
781 
782       const struct glsl_type *swiz_type =
783          glsl_vector_type(glsl_get_base_type(type), right_components);
784       return lower_varying(state, right_swizzle, right_writemask, swiz_type,
785                            fine_location, unpacked_var, unpacked_var_deref,
786                            right_name, false, vertex_index);
787    } else {
788       /* No special handling is necessary; (un)pack the old varying (now temp)
789        * from/into the new packed varying.
790        */
791       unsigned components = glsl_get_vector_elements(type) * dmul;
792       unsigned location = fine_location / 4;
793       unsigned location_frac = fine_location % 4;
794 
795       assert(state->components[location - VARYING_SLOT_VAR0] >= components);
796       nir_deref_instr *packed_deref =
797          get_packed_varying_deref(state, location, unpacked_var, name,
798                                   vertex_index);
799 
800       nir_variable *packed_var =
801          state->packed_varyings[location - VARYING_SLOT_VAR0];
802       if (unpacked_var->data.stream != 0) {
803          assert(unpacked_var->data.stream < 4);
804          for (unsigned i = 0; i < components; ++i) {
805             packed_var->data.stream |=
806                unpacked_var->data.stream << (2 * (location_frac + i));
807          }
808       }
809 
810       struct packing_store_values *store_value;
811       if (state->mode == nir_var_shader_out) {
812          unsigned writemask = ((1 << components) - 1) << location_frac;
813          nir_def *value = rhs_swizzle ? rhs_swizzle :
814             nir_load_deref(&state->b, unpacked_var_deref);
815 
816          store_value =
817             bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type,
818                                 value, writemask);
819       } else {
820          unsigned swizzle_values[4] = { 0, 0, 0, 0 };
821          for (unsigned i = 0; i < components; ++i) {
822             swizzle_values[i] = i + location_frac;
823          }
824 
825          nir_def *ssa_def = &packed_deref->def;
826          ssa_def = nir_load_deref(&state->b, packed_deref);
827          nir_def *swizzle =
828             nir_swizzle(&state->b, ssa_def, swizzle_values, components);
829 
830          store_value = bitwise_assign_unpack(state, unpacked_var_deref,
831                                              packed_deref, type, swizzle,
832                                              writemask);
833       }
834 
835       create_store_deref(state, store_value->deref, store_value->values[0],
836                          store_value->writemasks[0], store_value->is_64bit);
837       if (store_value->is_64bit) {
838          create_store_deref(state, store_value->deref, store_value->values[1],
839                             store_value->writemasks[1], store_value->is_64bit);
840       }
841 
842       free(store_value);
843       return fine_location + components;
844    }
845 }
846 
847 /* Recursively pack varying. */
848 static void
pack_output_var(struct lower_packed_varyings_state * state,nir_variable * var)849 pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
850 {
851    nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
852    lower_varying(state, NULL, ~0u, var->type,
853                  var->data.location * 4 + var->data.location_frac,
854                  var, unpacked_var_deref, var->name,
855                  state->gs_input_vertices != 0, 0);
856 }
857 
858 static void
lower_output_var(struct lower_packed_varyings_state * state,nir_variable * var)859 lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
860 {
861    if (var->data.mode != state->mode ||
862        var->data.location < VARYING_SLOT_VAR0 ||
863        !lower_packed_varying_needs_lowering(state->shader, var,
864                                             state->xfb_enabled,
865                                             state->disable_xfb_packing,
866                                             state->disable_varying_packing))
867       return;
868 
869       /* Skip any new packed varyings we just added */
870    if (strncmp("packed:", var->name, 7) == 0)
871       return;
872 
873    /* This lowering pass is only capable of packing floats and ints
874     * together when their interpolation mode is "flat".  Treat integers as
875     * being flat when the interpolation mode is none.
876     */
877    assert(var->data.interpolation == INTERP_MODE_FLAT ||
878           var->data.interpolation == INTERP_MODE_NONE ||
879           !glsl_contains_integer(var->type));
880 
881    if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
882       struct set *resource_set = _mesa_pointer_set_create(NULL);
883 
884       nir_add_packed_var_to_resource_list(state->consts, state->prog,
885                                           resource_set, var,
886                                           state->shader->info.stage,
887                                           GL_PROGRAM_OUTPUT);
888 
889       _mesa_set_destroy(resource_set, NULL);
890    }
891 
892    /* Change the old varying into an ordinary global. */
893    var->data.mode = nir_var_shader_temp;
894 
895    nir_foreach_block(block, state->impl) {
896       if (state->shader->info.stage != MESA_SHADER_GEOMETRY) {
897          /* For shaders other than geometry, outputs need to be lowered before
898           * each return statement and at the end of main()
899           */
900          if (nir_block_ends_in_return_or_halt(block)) {
901             state->b.cursor = nir_before_instr(nir_block_last_instr(block));
902             pack_output_var(state, var);
903          } else if (block == nir_impl_last_block(state->impl)) {
904             state->b.cursor = nir_after_block(block);
905             pack_output_var(state, var);
906          }
907       } else {
908         /* For geometry shaders, outputs need to be lowered before each call
909          * to EmitVertex()
910          */
911          nir_foreach_instr_safe(instr, block) {
912             if (instr->type != nir_instr_type_intrinsic)
913                continue;
914 
915             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
916             if (intrin->intrinsic != nir_intrinsic_emit_vertex)
917                continue;
918 
919             state->b.cursor = nir_before_instr(instr);
920             pack_output_var(state, var);
921          }
922       }
923    }
924 }
925 
926 static void
lower_packed_outputs(struct lower_packed_varyings_state * state)927 lower_packed_outputs(struct lower_packed_varyings_state *state)
928 {
929    nir_foreach_shader_out_variable_safe(var, state->shader) {
930       lower_output_var(state, var);
931    }
932 }
933 
934 static void
lower_packed_inputs(struct lower_packed_varyings_state * state)935 lower_packed_inputs(struct lower_packed_varyings_state *state)
936 {
937    /* Shader inputs need to be lowered at the beginning of main() so set bulder
938     * cursor to insert packing code at the start of the main function.
939     */
940    state->b.cursor = nir_before_impl(state->impl);
941 
942    /* insert new varyings, lower old ones to locals and add unpacking code a
943     * the start of the shader.
944     */
945    nir_foreach_shader_in_variable_safe(var, state->shader) {
946       if (var->data.mode != state->mode ||
947           var->data.location < VARYING_SLOT_VAR0 ||
948           !lower_packed_varying_needs_lowering(state->shader, var,
949                                                state->xfb_enabled,
950                                                state->disable_xfb_packing,
951                                                state->disable_varying_packing))
952          continue;
953 
954       /* Skip any new packed varyings we just added */
955       if (strncmp("packed:", var->name, 7) == 0)
956          continue;
957 
958       /* This lowering pass is only capable of packing floats and ints
959        * together when their interpolation mode is "flat".  Treat integers as
960        * being flat when the interpolation mode is none.
961        */
962       assert(var->data.interpolation == INTERP_MODE_FLAT ||
963              var->data.interpolation == INTERP_MODE_NONE ||
964              !glsl_contains_integer(var->type));
965 
966       /* Program interface needs to expose varyings in case of SSO. Add the
967        * variable for program resource list before it gets modified and lost.
968        */
969       if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
970          struct set *resource_set = _mesa_pointer_set_create(NULL);
971 
972          nir_add_packed_var_to_resource_list(state->consts, state->prog,
973                                              resource_set, var,
974                                              state->shader->info.stage,
975                                              GL_PROGRAM_INPUT);
976 
977          _mesa_set_destroy(resource_set, NULL);
978       }
979 
980       /* Change the old varying into an ordinary global. */
981       var->data.mode = nir_var_shader_temp;
982 
983       /* Recursively unpack varying. */
984       nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
985       lower_varying(state, NULL, ~0u, var->type,
986                     var->data.location * 4 + var->data.location_frac,
987                     var, unpacked_var_deref, var->name,
988                     state->gs_input_vertices != 0, 0);
989    }
990 }
991 
992 void
gl_nir_lower_packed_varyings(const struct gl_constants * consts,struct gl_shader_program * prog,void * mem_ctx,unsigned locations_used,const uint8_t * components,nir_variable_mode mode,unsigned gs_input_vertices,struct gl_linked_shader * linked_shader,bool disable_varying_packing,bool disable_xfb_packing,bool xfb_enabled)993 gl_nir_lower_packed_varyings(const struct gl_constants *consts,
994                              struct gl_shader_program *prog,
995                              void *mem_ctx, unsigned locations_used,
996                              const uint8_t *components,
997                              nir_variable_mode mode, unsigned gs_input_vertices,
998                              struct gl_linked_shader *linked_shader,
999                              bool disable_varying_packing,
1000                              bool disable_xfb_packing, bool xfb_enabled)
1001 {
1002    struct lower_packed_varyings_state state;
1003    nir_shader *shader = linked_shader->Program->nir;
1004    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1005 
1006    assert(shader->info.stage != MESA_SHADER_COMPUTE);
1007 
1008    /* assert that functions have been inlined before packing is called */
1009    nir_foreach_function(f, shader) {
1010       assert(f->impl == impl);
1011    }
1012 
1013    state.b = nir_builder_create(impl);
1014    state.consts = consts;
1015    state.prog = prog;
1016    state.mem_ctx = mem_ctx;
1017    state.shader = shader;
1018    state.impl = impl;
1019    state.locations_used = locations_used;
1020    state.components = components;
1021    state.mode = mode;
1022    state.gs_input_vertices = gs_input_vertices;
1023    state.disable_varying_packing = disable_varying_packing;
1024    state.disable_xfb_packing = disable_xfb_packing;
1025    state.xfb_enabled = xfb_enabled;
1026    state.packed_varyings =
1027       (nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *),
1028                                            locations_used);
1029 
1030    /* Determine if the shader interface is exposed to api query */
1031    struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES];
1032    unsigned num_shaders = 0;
1033    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
1034       if (prog->_LinkedShaders[i])
1035          linked_shaders[num_shaders++] = prog->_LinkedShaders[i];
1036    }
1037 
1038    if (mode == nir_var_shader_in) {
1039       state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader;
1040       lower_packed_inputs(&state);
1041    } else {
1042       state.ifc_exposed_to_query_api =
1043          linked_shaders[num_shaders - 1] == linked_shader;
1044       lower_packed_outputs(&state);
1045    }
1046 
1047    nir_lower_global_vars_to_local(shader);
1048    nir_fixup_deref_modes(shader);
1049 }
1050