1 /*
2 * Copyright © 2011 Intel Corporation
3 * Copyright © 2022 Valve Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * This lowering pass generates GLSL code that manually packs varyings into
27 * vec4 slots, for the benefit of back-ends that don't support packed varyings
28 * natively.
29 *
30 * For example, the following shader:
31 *
32 * out mat3x2 foo; // location=4, location_frac=0
33 * out vec3 bar[2]; // location=5, location_frac=2
34 *
35 * main()
36 * {
37 * ...
38 * }
39 *
40 * Is rewritten to:
41 *
42 * mat3x2 foo;
43 * vec3 bar[2];
44 * out vec4 packed4; // location=4, location_frac=0
45 * out vec4 packed5; // location=5, location_frac=0
46 * out vec4 packed6; // location=6, location_frac=0
47 *
48 * main()
49 * {
50 * ...
51 * packed4.xy = foo[0];
52 * packed4.zw = foo[1];
53 * packed5.xy = foo[2];
54 * packed5.zw = bar[0].xy;
55 * packed6.x = bar[0].z;
56 * packed6.yzw = bar[1];
57 * }
58 *
59 * This lowering pass properly handles "double parking" of a varying vector
60 * across two varying slots. For example, in the code above, two of the
61 * components of bar[0] are stored in packed5, and the remaining component is
62 * stored in packed6.
63 *
64 * Note that in theory, the extra instructions may cause some loss of
65 * performance. However, hopefully in most cases the performance loss will
66 * either be absorbed by a later optimization pass, or it will be offset by
67 * memory bandwidth savings (because fewer varyings are used).
68 *
69 * This lowering pass also packs flat floats, ints, and uints together, by
70 * using ivec4 as the base type of flat "varyings", and using appropriate
71 * casts to convert floats and uints into ints.
72 *
73 * This lowering pass also handles varyings whose type is a struct or an array
74 * of struct. Structs are packed in order and with no gaps, so there may be a
75 * performance penalty due to structure elements being double-parked.
76 *
77 * Lowering of geometry shader inputs is slightly more complex, since geometry
78 * inputs are always arrays, so we need to lower arrays to arrays. For
79 * example, the following input:
80 *
81 * in struct Foo {
82 * float f;
83 * vec3 v;
84 * vec2 a[2];
85 * } arr[3]; // location=4, location_frac=0
86 *
87 * Would get lowered like this if it occurred in a fragment shader:
88 *
89 * struct Foo {
90 * float f;
91 * vec3 v;
92 * vec2 a[2];
93 * } arr[3];
94 * in vec4 packed4; // location=4, location_frac=0
95 * in vec4 packed5; // location=5, location_frac=0
96 * in vec4 packed6; // location=6, location_frac=0
97 * in vec4 packed7; // location=7, location_frac=0
98 * in vec4 packed8; // location=8, location_frac=0
99 * in vec4 packed9; // location=9, location_frac=0
100 *
101 * main()
102 * {
103 * arr[0].f = packed4.x;
104 * arr[0].v = packed4.yzw;
105 * arr[0].a[0] = packed5.xy;
106 * arr[0].a[1] = packed5.zw;
107 * arr[1].f = packed6.x;
108 * arr[1].v = packed6.yzw;
109 * arr[1].a[0] = packed7.xy;
110 * arr[1].a[1] = packed7.zw;
111 * arr[2].f = packed8.x;
112 * arr[2].v = packed8.yzw;
113 * arr[2].a[0] = packed9.xy;
114 * arr[2].a[1] = packed9.zw;
115 * ...
116 * }
117 *
118 * But it would get lowered like this if it occurred in a geometry shader:
119 *
120 * struct Foo {
121 * float f;
122 * vec3 v;
123 * vec2 a[2];
124 * } arr[3];
125 * in vec4 packed4[3]; // location=4, location_frac=0
126 * in vec4 packed5[3]; // location=5, location_frac=0
127 *
128 * main()
129 * {
130 * arr[0].f = packed4[0].x;
131 * arr[0].v = packed4[0].yzw;
132 * arr[0].a[0] = packed5[0].xy;
133 * arr[0].a[1] = packed5[0].zw;
134 * arr[1].f = packed4[1].x;
135 * arr[1].v = packed4[1].yzw;
136 * arr[1].a[0] = packed5[1].xy;
137 * arr[1].a[1] = packed5[1].zw;
138 * arr[2].f = packed4[2].x;
139 * arr[2].v = packed4[2].yzw;
140 * arr[2].a[0] = packed5[2].xy;
141 * arr[2].a[1] = packed5[2].zw;
142 * ...
143 * }
144 */
145
146 #include "nir.h"
147 #include "nir_builder.h"
148 #include "gl_nir.h"
149 #include "gl_nir_linker.h"
150 #include "program/prog_instruction.h"
151 #include "main/mtypes.h"
152
153 /**
154 * Visitor that performs varying packing. For each varying declared in the
155 * shader, this visitor determines whether it needs to be packed. If so, it
156 * demotes it to an ordinary global, creates new packed varyings, and
157 * generates assignments to convert between the original varying and the
158 * packed varying.
159 */
160 struct lower_packed_varyings_state
161 {
162 const struct gl_constants *consts;
163
164 struct gl_shader_program *prog;
165
166 /**
167 * Memory context used to allocate new instructions for the shader.
168 */
169 void *mem_ctx;
170
171 /**
172 * Number of generic varying slots which are used by this shader. This is
173 * used to allocate temporary intermediate data structures. If any varying
174 * used by this shader has a location greater than or equal to
175 * VARYING_SLOT_VAR0 + locations_used, an assertion will fire.
176 */
177 unsigned locations_used;
178
179 const uint8_t* components;
180
181 /**
182 * Array of pointers to the packed varyings that have been created for each
183 * generic varying slot. NULL entries in this array indicate varying slots
184 * for which a packed varying has not been created yet.
185 */
186 nir_variable **packed_varyings;
187
188 nir_shader *shader;
189
190 nir_function_impl *impl;
191
192 nir_builder b;
193
194 /**
195 * Type of varying which is being lowered in this pass (either
196 * nir_var_shader_in or ir_var_shader_out).
197 */
198 nir_variable_mode mode;
199
200 /**
201 * If we are currently lowering geometry shader inputs, the number of input
202 * vertices the geometry shader accepts. Otherwise zero.
203 */
204 unsigned gs_input_vertices;
205
206 bool disable_varying_packing;
207 bool disable_xfb_packing;
208 bool xfb_enabled;
209 bool ifc_exposed_to_query_api;
210 };
211
212 bool
lower_packed_varying_needs_lowering(nir_shader * shader,nir_variable * var,bool xfb_enabled,bool disable_xfb_packing,bool disable_varying_packing)213 lower_packed_varying_needs_lowering(nir_shader *shader, nir_variable *var,
214 bool xfb_enabled, bool disable_xfb_packing,
215 bool disable_varying_packing)
216 {
217 /* Things composed of vec4's, varyings with explicitly assigned
218 * locations or varyings marked as must_be_shader_input (which might be used
219 * by interpolateAt* functions) shouldn't be lowered. Everything else can be.
220 */
221 if (var->data.explicit_location || var->data.must_be_shader_input)
222 return false;
223
224 const struct glsl_type *type = var->type;
225 if (nir_is_arrayed_io(var, shader->info.stage) || var->data.per_view) {
226 assert(glsl_type_is_array(type));
227 type = glsl_get_array_element(type);
228 }
229
230 /* Some drivers (e.g. panfrost) don't support packing of transform
231 * feedback varyings.
232 */
233 if (disable_xfb_packing && var->data.is_xfb &&
234 !(glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
235 xfb_enabled)
236 return false;
237
238 /* Override disable_varying_packing if the var is only used by transform
239 * feedback. Also override it if transform feedback is enabled and the
240 * variable is an array, struct or matrix as the elements of these types
241 * will always have the same interpolation and therefore are safe to pack.
242 */
243 if (disable_varying_packing && !var->data.is_xfb_only &&
244 !((glsl_type_is_array(type) || glsl_type_is_struct(type) || glsl_type_is_matrix(type)) &&
245 xfb_enabled))
246 return false;
247
248 type = glsl_without_array(type);
249 if (glsl_get_vector_elements(type) == 4 && !glsl_type_is_64bit(type))
250 return false;
251 return true;
252 }
253
254 /**
255 * If no packed varying has been created for the given varying location yet,
256 * create it and add it to the shader.
257 *
258 * The newly created varying inherits its interpolation parameters from \c
259 * unpacked_var. Its base type is ivec4 if we are lowering a flat varying,
260 * vec4 otherwise.
261 */
262 static void
create_or_update_packed_varying(struct lower_packed_varyings_state * state,nir_variable * unpacked_var,const char * name,unsigned location,unsigned slot,unsigned vertex_index)263 create_or_update_packed_varying(struct lower_packed_varyings_state *state,
264 nir_variable *unpacked_var,
265 const char *name, unsigned location,
266 unsigned slot, unsigned vertex_index)
267 {
268 assert(slot < state->locations_used);
269 if (state->packed_varyings[slot] == NULL) {
270 assert(state->components[slot] != 0);
271 assert(name);
272
273 nir_variable *packed_var = rzalloc(state->shader, nir_variable);
274 packed_var->name = ralloc_asprintf(packed_var, "packed:%s", name);
275 packed_var->data.mode = state->mode;
276
277 bool is_interpolation_flat =
278 unpacked_var->data.interpolation == INTERP_MODE_FLAT ||
279 glsl_contains_integer(unpacked_var->type) ||
280 glsl_contains_double(unpacked_var->type);
281
282 const struct glsl_type *packed_type;
283 if (is_interpolation_flat)
284 packed_type = glsl_vector_type(GLSL_TYPE_INT, state->components[slot]);
285 else
286 packed_type = glsl_vector_type(GLSL_TYPE_FLOAT, state->components[slot]);
287
288 if (state->gs_input_vertices != 0) {
289 packed_type =
290 glsl_array_type(packed_type, state->gs_input_vertices, 0);
291 }
292
293 packed_var->type = packed_type;
294 packed_var->data.centroid = unpacked_var->data.centroid;
295 packed_var->data.sample = unpacked_var->data.sample;
296 packed_var->data.patch = unpacked_var->data.patch;
297 packed_var->data.interpolation = is_interpolation_flat ?
298 (unsigned) INTERP_MODE_FLAT : unpacked_var->data.interpolation;
299 packed_var->data.location = location;
300 packed_var->data.precision = unpacked_var->data.precision;
301 packed_var->data.always_active_io = unpacked_var->data.always_active_io;
302 packed_var->data.stream = NIR_STREAM_PACKED;
303
304 nir_shader_add_variable(state->shader, packed_var);
305 state->packed_varyings[slot] = packed_var;
306 } else {
307 nir_variable *var = state->packed_varyings[slot];
308
309 /* The slot needs to be marked as always active if any variable that got
310 * packed there was.
311 */
312 var->data.always_active_io |= unpacked_var->data.always_active_io;
313
314 /* For geometry shader inputs, only update the packed variable name the
315 * first time we visit each component.
316 */
317 if (state->gs_input_vertices == 0 || vertex_index == 0) {
318 assert(name);
319 ralloc_asprintf_append((char **) &var->name, ",%s", name);
320 }
321 }
322 }
323
324 /**
325 * Retrieve the packed varying corresponding to the given varying location.
326 *
327 * \param vertex_index: if we are lowering geometry shader inputs, then this
328 * indicates which vertex we are currently lowering. Otherwise it is ignored.
329 */
330 static nir_deref_instr *
get_packed_varying_deref(struct lower_packed_varyings_state * state,unsigned location,nir_variable * unpacked_var,const char * name,unsigned vertex_index)331 get_packed_varying_deref(struct lower_packed_varyings_state *state,
332 unsigned location, nir_variable *unpacked_var,
333 const char *name, unsigned vertex_index)
334 {
335 unsigned slot = location - VARYING_SLOT_VAR0;
336 assert(slot < state->locations_used);
337
338 create_or_update_packed_varying(state, unpacked_var, name, location, slot,
339 vertex_index);
340
341 nir_deref_instr *deref =
342 nir_build_deref_var(&state->b, state->packed_varyings[slot]);
343
344 if (state->gs_input_vertices != 0) {
345 /* When lowering GS inputs, the packed variable is an array, so we need
346 * to dereference it using vertex_index.
347 */
348 nir_load_const_instr *c_idx =
349 nir_load_const_instr_create(state->b.shader, 1, 32);
350 c_idx->value[0].u32 = vertex_index;
351 nir_builder_instr_insert(&state->b, &c_idx->instr);
352
353 deref = nir_build_deref_array(&state->b, deref, &c_idx->def);
354 }
355
356 return deref;
357 }
358
359 struct packing_store_values {
360 bool is_64bit;
361 unsigned writemasks[2];
362 nir_def *values[2];
363 nir_deref_instr *deref;
364 };
365
366 /**
367 * Make an ir_assignment from \c rhs to \c lhs, performing appropriate
368 * bitcasts if necessary to match up types.
369 *
370 * This function is called when packing varyings.
371 */
372 static struct packing_store_values *
bitwise_assign_pack(struct lower_packed_varyings_state * state,nir_deref_instr * packed_deref,nir_deref_instr * unpacked_deref,const struct glsl_type * unpacked_type,nir_def * value,unsigned writemask)373 bitwise_assign_pack(struct lower_packed_varyings_state *state,
374 nir_deref_instr *packed_deref,
375 nir_deref_instr *unpacked_deref,
376 const struct glsl_type *unpacked_type,
377 nir_def *value,
378 unsigned writemask)
379
380 {
381 nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
382
383 enum glsl_base_type packed_base_type = glsl_get_base_type(packed_var->type);
384 enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
385
386 struct packing_store_values *store_state =
387 calloc(1, sizeof(struct packing_store_values));
388
389 if (unpacked_base_type != packed_base_type) {
390 /* Since we only mix types in flat varyings, and we always store flat
391 * varyings as type ivec4, we need only produce conversions from (uint
392 * or float) to int.
393 */
394 assert(packed_base_type == GLSL_TYPE_INT);
395 switch (unpacked_base_type) {
396 case GLSL_TYPE_UINT:
397 case GLSL_TYPE_FLOAT:
398 value = nir_mov(&state->b, value);
399 break;
400 case GLSL_TYPE_DOUBLE:
401 case GLSL_TYPE_UINT64:
402 case GLSL_TYPE_INT64:
403 assert(glsl_get_vector_elements(unpacked_type) <= 2);
404 if (glsl_get_vector_elements(unpacked_type) == 2) {
405 assert(glsl_get_vector_elements(packed_var->type) == 4);
406
407 unsigned swiz_x = 0;
408 unsigned writemask = 0x3;
409 nir_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
410
411 store_state->is_64bit = true;
412 store_state->deref = packed_deref;
413 store_state->values[0] = nir_unpack_64_2x32(&state->b, swizzle);
414 store_state->writemasks[0] = writemask;
415
416 unsigned swiz_y = 1;
417 writemask = 0xc;
418 swizzle = nir_swizzle(&state->b, value, &swiz_y, 1);
419
420 store_state->deref = packed_deref;
421 store_state->values[1] = nir_unpack_64_2x32(&state->b, swizzle);
422 store_state->writemasks[1] = writemask;
423 return store_state;
424 } else {
425 value = nir_unpack_64_2x32(&state->b, value);
426 }
427 break;
428 case GLSL_TYPE_SAMPLER:
429 case GLSL_TYPE_IMAGE:
430 value = nir_unpack_64_2x32(&state->b, value);
431 break;
432 default:
433 assert(!"Unexpected type conversion while lowering varyings");
434 break;
435 }
436 }
437
438 store_state->deref = packed_deref;
439 store_state->values[0] = value;
440 store_state->writemasks[0] = writemask;
441
442 return store_state;
443 }
444
445 /**
446 * This function is called when unpacking varyings.
447 */
448 static struct packing_store_values *
bitwise_assign_unpack(struct lower_packed_varyings_state * state,nir_deref_instr * unpacked_deref,nir_deref_instr * packed_deref,const struct glsl_type * unpacked_type,nir_def * value,unsigned writemask)449 bitwise_assign_unpack(struct lower_packed_varyings_state *state,
450 nir_deref_instr *unpacked_deref,
451 nir_deref_instr *packed_deref,
452 const struct glsl_type *unpacked_type,
453 nir_def *value, unsigned writemask)
454 {
455 nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
456
457 const struct glsl_type *packed_type = glsl_without_array(packed_var->type);
458 enum glsl_base_type packed_base_type = glsl_get_base_type(packed_type);
459 enum glsl_base_type unpacked_base_type = glsl_get_base_type(unpacked_type);
460
461 struct packing_store_values *store_state =
462 calloc(1, sizeof(struct packing_store_values));
463
464 if (unpacked_base_type != packed_base_type) {
465 /* Since we only mix types in flat varyings, and we always store flat
466 * varyings as type ivec4, we need only produce conversions from int to
467 * (uint or float).
468 */
469 assert(packed_base_type == GLSL_TYPE_INT);
470
471 switch (unpacked_base_type) {
472 case GLSL_TYPE_UINT:
473 case GLSL_TYPE_FLOAT:
474 value = nir_mov(&state->b, value);
475 break;
476 case GLSL_TYPE_DOUBLE:
477 case GLSL_TYPE_UINT64:
478 case GLSL_TYPE_INT64:
479 assert(glsl_get_vector_elements(unpacked_type) <= 2);
480 if (glsl_get_vector_elements(unpacked_type) == 2) {
481 assert(glsl_get_vector_elements(packed_type) == 4);
482
483 unsigned swiz_xy[2] = {0, 1};
484 writemask = 1 << (ffs(writemask) - 1);
485
486 store_state->is_64bit = true;
487 store_state->deref = unpacked_deref;
488 store_state->values[0] =
489 nir_pack_64_2x32(&state->b,
490 nir_swizzle(&state->b, value, swiz_xy, 2));
491 store_state->writemasks[0] = writemask;
492
493 unsigned swiz_zw[2] = {2, 3};
494 writemask = writemask << 1;
495
496 store_state->deref = unpacked_deref;
497 store_state->values[1] =
498 nir_pack_64_2x32(&state->b,
499 nir_swizzle(&state->b, value, swiz_zw, 2));
500 store_state->writemasks[1] = writemask;
501
502 return store_state;
503 } else {
504 value = nir_pack_64_2x32(&state->b, value);
505 }
506 break;
507 case GLSL_TYPE_SAMPLER:
508 case GLSL_TYPE_IMAGE:
509 value = nir_pack_64_2x32(&state->b, value);
510 break;
511 default:
512 assert(!"Unexpected type conversion while lowering varyings");
513 break;
514 }
515 }
516
517 store_state->deref = unpacked_deref;
518 store_state->values[0] = value;
519 store_state->writemasks[0] = writemask;
520
521 return store_state;
522 }
523
524 static void
create_store_deref(struct lower_packed_varyings_state * state,nir_deref_instr * deref,nir_def * value,unsigned writemask,bool is_64bit)525 create_store_deref(struct lower_packed_varyings_state *state,
526 nir_deref_instr *deref, nir_def *value,
527 unsigned writemask, bool is_64bit)
528 {
529 /* If dest and value have different number of components pack the srcs
530 * into a vector.
531 */
532 const struct glsl_type *type = glsl_without_array(deref->type);
533 unsigned comps = glsl_get_vector_elements(type);
534 if (value->num_components != comps) {
535 nir_def *srcs[4];
536
537 unsigned comp = 0;
538 for (unsigned i = 0; i < comps; i++) {
539 if (writemask & (1 << i)) {
540 if (is_64bit && state->mode == nir_var_shader_in)
541 srcs[i] = value;
542 else
543 srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
544 comp++;
545 } else {
546 srcs[i] = nir_undef(&state->b, 1,
547 glsl_type_is_64bit(type) ? 64 : 32);
548 }
549 }
550 value = nir_vec(&state->b, srcs, comps);
551 }
552
553 nir_store_deref(&state->b, deref, value, writemask);
554 }
555
556 static unsigned
557 lower_varying(struct lower_packed_varyings_state *state,
558 nir_def *rhs_swizzle, unsigned writemask,
559 const struct glsl_type *type, unsigned fine_location,
560 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
561 const char *name, bool gs_input_toplevel, unsigned vertex_index);
562
563 /**
564 * Recursively pack or unpack a varying for which we need to iterate over its
565 * constituent elements.
566 * This takes care of both arrays and matrices.
567 *
568 * \param gs_input_toplevel should be set to true if we are lowering geometry
569 * shader inputs, and we are currently lowering the whole input variable
570 * (i.e. we are lowering the array whose index selects the vertex).
571 *
572 * \param vertex_index: if we are lowering geometry shader inputs, and the
573 * level of the array that we are currently lowering is *not* the top level,
574 * then this indicates which vertex we are currently lowering. Otherwise it
575 * is ignored.
576 */
577 static unsigned
lower_arraylike(struct lower_packed_varyings_state * state,nir_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)578 lower_arraylike(struct lower_packed_varyings_state *state,
579 nir_def *rhs_swizzle, unsigned writemask,
580 const struct glsl_type *type, unsigned fine_location,
581 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
582 const char *name, bool gs_input_toplevel, unsigned vertex_index)
583 {
584 unsigned array_size = glsl_get_length(type);
585 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
586 if (array_size * dmul + fine_location % 4 > 4) {
587 fine_location = ALIGN_POT(fine_location, dmul);
588 }
589
590 type = glsl_get_array_element(type);
591 for (unsigned i = 0; i < array_size; i++) {
592 nir_load_const_instr *c_idx =
593 nir_load_const_instr_create(state->b.shader, 1, 32);
594 c_idx->value[0].u32 = i;
595 nir_builder_instr_insert(&state->b, &c_idx->instr);
596
597 nir_deref_instr *unpacked_array_deref =
598 nir_build_deref_array(&state->b, unpacked_var_deref, &c_idx->def);
599
600 if (gs_input_toplevel) {
601 /* Geometry shader inputs are a special case. Instead of storing
602 * each element of the array at a different location, all elements
603 * are at the same location, but with a different vertex index.
604 */
605 (void) lower_varying(state, rhs_swizzle, writemask, type, fine_location,
606 unpacked_var, unpacked_array_deref, name, false, i);
607 } else {
608 char *subscripted_name = name ?
609 ralloc_asprintf(state->mem_ctx, "%s[%d]", name, i) : NULL;
610 fine_location =
611 lower_varying(state, rhs_swizzle, writemask, type, fine_location,
612 unpacked_var, unpacked_array_deref,
613 subscripted_name, false, vertex_index);
614 }
615 }
616
617 return fine_location;
618 }
619
620 /**
621 * Recursively pack or unpack the given varying (or portion of a varying) by
622 * traversing all of its constituent vectors.
623 *
624 * \param fine_location is the location where the first constituent vector
625 * should be packed--the word "fine" indicates that this location is expressed
626 * in multiples of a float, rather than multiples of a vec4 as is used
627 * elsewhere in Mesa.
628 *
629 * \param gs_input_toplevel should be set to true if we are lowering geometry
630 * shader inputs, and we are currently lowering the whole input variable
631 * (i.e. we are lowering the array whose index selects the vertex).
632 *
633 * \param vertex_index: if we are lowering geometry shader inputs, and the
634 * level of the array that we are currently lowering is *not* the top level,
635 * then this indicates which vertex we are currently lowering. Otherwise it
636 * is ignored.
637 *
638 * \return the location where the next constituent vector (after this one)
639 * should be packed.
640 */
641 static unsigned
lower_varying(struct lower_packed_varyings_state * state,nir_def * rhs_swizzle,unsigned writemask,const struct glsl_type * type,unsigned fine_location,nir_variable * unpacked_var,nir_deref_instr * unpacked_var_deref,const char * name,bool gs_input_toplevel,unsigned vertex_index)642 lower_varying(struct lower_packed_varyings_state *state,
643 nir_def *rhs_swizzle, unsigned writemask,
644 const struct glsl_type *type, unsigned fine_location,
645 nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
646 const char *name, bool gs_input_toplevel, unsigned vertex_index)
647 {
648 unsigned dmul = glsl_type_is_64bit(type) ? 2 : 1;
649 /* When gs_input_toplevel is set, we should be looking at a geometry shader
650 * input array.
651 */
652 assert(!gs_input_toplevel || glsl_type_is_array(type));
653
654 if (glsl_type_is_struct(type)) {
655 unsigned struct_len = glsl_get_length(type);
656 for (unsigned i = 0; i < struct_len; i++) {
657 const char *field_name = glsl_get_struct_elem_name(type, i);
658 char *deref_name = name ?
659 ralloc_asprintf(state->mem_ctx, "%s.%s", name, field_name) :
660 NULL;
661 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
662
663 nir_deref_instr *unpacked_struct_deref =
664 nir_build_deref_struct(&state->b, unpacked_var_deref, i);
665 fine_location = lower_varying(state, rhs_swizzle, writemask, field_type,
666 fine_location, unpacked_var,
667 unpacked_struct_deref, deref_name,
668 false, vertex_index);
669 }
670
671 return fine_location;
672 } else if (glsl_type_is_array(type)) {
673 /* Arrays are packed/unpacked by considering each array element in
674 * sequence.
675 */
676 return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
677 unpacked_var, unpacked_var_deref, name,
678 gs_input_toplevel, vertex_index);
679 } else if (glsl_type_is_matrix(type)) {
680 /* Matrices are packed/unpacked by considering each column vector in
681 * sequence.
682 */
683 return lower_arraylike(state, rhs_swizzle, writemask, type, fine_location,
684 unpacked_var, unpacked_var_deref, name, false,
685 vertex_index);
686 } else if (glsl_get_vector_elements(type) * dmul + fine_location % 4 > 4) {
687 /* We don't have code to split up 64bit variable between two
688 * varying slots, instead we add padding if necessary.
689 */
690 unsigned aligned_fine_location = ALIGN_POT(fine_location, dmul);
691 if (aligned_fine_location != fine_location) {
692 return lower_varying(state, rhs_swizzle, writemask, type,
693 aligned_fine_location, unpacked_var,
694 unpacked_var_deref, name, false, vertex_index);
695 }
696
697 /* This vector is going to be "double parked" across two varying slots,
698 * so handle it as two separate assignments. For doubles, a dvec3/dvec4
699 * can end up being spread over 3 slots. However the second splitting
700 * will happen later, here we just always want to split into 2.
701 */
702 unsigned left_components, right_components;
703 unsigned left_swizzle_values[4] = { 0, 0, 0, 0 };
704 unsigned right_swizzle_values[4] = { 0, 0, 0, 0 };
705 char left_swizzle_name[4] = { 0, 0, 0, 0 };
706 char right_swizzle_name[4] = { 0, 0, 0, 0 };
707
708 left_components = 4 - fine_location % 4;
709 if (glsl_type_is_64bit(type)) {
710 left_components /= 2;
711 assert(left_components > 0);
712 }
713 right_components = glsl_get_vector_elements(type) - left_components;
714
715 /* If set use previously set writemask to offset the following
716 * swizzle/writemasks. This can happen when spliting a dvec, etc across
717 * slots.
718 */
719 unsigned offset = 0;
720 if (writemask) {
721 for (unsigned i = 0; i < left_components; i++) {
722 /* Keep going until we find the first component of the write */
723 if (!(writemask & (1 << i))) {
724 offset++;
725 } else
726 break;
727 }
728 }
729
730 for (unsigned i = 0; i < left_components; i++) {
731 left_swizzle_values[i] = i + offset;
732 left_swizzle_name[i] = "xyzw"[i + offset];
733 }
734 for (unsigned i = 0; i < right_components; i++) {
735 right_swizzle_values[i] = i + left_components + offset;
736 right_swizzle_name[i] = "xyzw"[i + left_components + offset];
737 }
738
739 if (left_components) {
740 char *left_name = name ?
741 ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
742 NULL;
743
744 nir_def *left_swizzle = NULL;
745 unsigned left_writemask = ~0u;
746 if (state->mode == nir_var_shader_out) {
747 nir_def *ssa_def = rhs_swizzle ?
748 rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
749 left_swizzle =
750 nir_swizzle(&state->b, ssa_def,
751 left_swizzle_values, left_components);
752 } else {
753 left_writemask = ((1 << left_components) - 1) << offset;
754 }
755
756 const struct glsl_type *swiz_type =
757 glsl_vector_type(glsl_get_base_type(type), left_components);
758 fine_location = lower_varying(state, left_swizzle, left_writemask, swiz_type,
759 fine_location, unpacked_var, unpacked_var_deref,
760 left_name, false, vertex_index);
761 } else {
762 /* Top up the fine location to the next slot */
763 fine_location++;
764 }
765
766 char *right_name = name ?
767 ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
768 NULL;
769
770 nir_def *right_swizzle = NULL;
771 unsigned right_writemask = ~0u;
772 if (state->mode == nir_var_shader_out) {
773 nir_def *ssa_def = rhs_swizzle ?
774 rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
775 right_swizzle =
776 nir_swizzle(&state->b, ssa_def,
777 right_swizzle_values, right_components);
778 } else {
779 right_writemask = ((1 << right_components) - 1) << (left_components + offset);
780 }
781
782 const struct glsl_type *swiz_type =
783 glsl_vector_type(glsl_get_base_type(type), right_components);
784 return lower_varying(state, right_swizzle, right_writemask, swiz_type,
785 fine_location, unpacked_var, unpacked_var_deref,
786 right_name, false, vertex_index);
787 } else {
788 /* No special handling is necessary; (un)pack the old varying (now temp)
789 * from/into the new packed varying.
790 */
791 unsigned components = glsl_get_vector_elements(type) * dmul;
792 unsigned location = fine_location / 4;
793 unsigned location_frac = fine_location % 4;
794
795 assert(state->components[location - VARYING_SLOT_VAR0] >= components);
796 nir_deref_instr *packed_deref =
797 get_packed_varying_deref(state, location, unpacked_var, name,
798 vertex_index);
799
800 nir_variable *packed_var =
801 state->packed_varyings[location - VARYING_SLOT_VAR0];
802 if (unpacked_var->data.stream != 0) {
803 assert(unpacked_var->data.stream < 4);
804 for (unsigned i = 0; i < components; ++i) {
805 packed_var->data.stream |=
806 unpacked_var->data.stream << (2 * (location_frac + i));
807 }
808 }
809
810 struct packing_store_values *store_value;
811 if (state->mode == nir_var_shader_out) {
812 unsigned writemask = ((1 << components) - 1) << location_frac;
813 nir_def *value = rhs_swizzle ? rhs_swizzle :
814 nir_load_deref(&state->b, unpacked_var_deref);
815
816 store_value =
817 bitwise_assign_pack(state, packed_deref, unpacked_var_deref, type,
818 value, writemask);
819 } else {
820 unsigned swizzle_values[4] = { 0, 0, 0, 0 };
821 for (unsigned i = 0; i < components; ++i) {
822 swizzle_values[i] = i + location_frac;
823 }
824
825 nir_def *ssa_def = &packed_deref->def;
826 ssa_def = nir_load_deref(&state->b, packed_deref);
827 nir_def *swizzle =
828 nir_swizzle(&state->b, ssa_def, swizzle_values, components);
829
830 store_value = bitwise_assign_unpack(state, unpacked_var_deref,
831 packed_deref, type, swizzle,
832 writemask);
833 }
834
835 create_store_deref(state, store_value->deref, store_value->values[0],
836 store_value->writemasks[0], store_value->is_64bit);
837 if (store_value->is_64bit) {
838 create_store_deref(state, store_value->deref, store_value->values[1],
839 store_value->writemasks[1], store_value->is_64bit);
840 }
841
842 free(store_value);
843 return fine_location + components;
844 }
845 }
846
847 /* Recursively pack varying. */
848 static void
pack_output_var(struct lower_packed_varyings_state * state,nir_variable * var)849 pack_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
850 {
851 nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
852 lower_varying(state, NULL, ~0u, var->type,
853 var->data.location * 4 + var->data.location_frac,
854 var, unpacked_var_deref, var->name,
855 state->gs_input_vertices != 0, 0);
856 }
857
858 static void
lower_output_var(struct lower_packed_varyings_state * state,nir_variable * var)859 lower_output_var(struct lower_packed_varyings_state *state, nir_variable *var)
860 {
861 if (var->data.mode != state->mode ||
862 var->data.location < VARYING_SLOT_VAR0 ||
863 !lower_packed_varying_needs_lowering(state->shader, var,
864 state->xfb_enabled,
865 state->disable_xfb_packing,
866 state->disable_varying_packing))
867 return;
868
869 /* Skip any new packed varyings we just added */
870 if (strncmp("packed:", var->name, 7) == 0)
871 return;
872
873 /* This lowering pass is only capable of packing floats and ints
874 * together when their interpolation mode is "flat". Treat integers as
875 * being flat when the interpolation mode is none.
876 */
877 assert(var->data.interpolation == INTERP_MODE_FLAT ||
878 var->data.interpolation == INTERP_MODE_NONE ||
879 !glsl_contains_integer(var->type));
880
881 if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
882 struct set *resource_set = _mesa_pointer_set_create(NULL);
883
884 nir_add_packed_var_to_resource_list(state->consts, state->prog,
885 resource_set, var,
886 state->shader->info.stage,
887 GL_PROGRAM_OUTPUT);
888
889 _mesa_set_destroy(resource_set, NULL);
890 }
891
892 /* Change the old varying into an ordinary global. */
893 var->data.mode = nir_var_shader_temp;
894
895 nir_foreach_block(block, state->impl) {
896 if (state->shader->info.stage != MESA_SHADER_GEOMETRY) {
897 /* For shaders other than geometry, outputs need to be lowered before
898 * each return statement and at the end of main()
899 */
900 if (nir_block_ends_in_return_or_halt(block)) {
901 state->b.cursor = nir_before_instr(nir_block_last_instr(block));
902 pack_output_var(state, var);
903 } else if (block == nir_impl_last_block(state->impl)) {
904 state->b.cursor = nir_after_block(block);
905 pack_output_var(state, var);
906 }
907 } else {
908 /* For geometry shaders, outputs need to be lowered before each call
909 * to EmitVertex()
910 */
911 nir_foreach_instr_safe(instr, block) {
912 if (instr->type != nir_instr_type_intrinsic)
913 continue;
914
915 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
916 if (intrin->intrinsic != nir_intrinsic_emit_vertex)
917 continue;
918
919 state->b.cursor = nir_before_instr(instr);
920 pack_output_var(state, var);
921 }
922 }
923 }
924 }
925
926 static void
lower_packed_outputs(struct lower_packed_varyings_state * state)927 lower_packed_outputs(struct lower_packed_varyings_state *state)
928 {
929 nir_foreach_shader_out_variable_safe(var, state->shader) {
930 lower_output_var(state, var);
931 }
932 }
933
934 static void
lower_packed_inputs(struct lower_packed_varyings_state * state)935 lower_packed_inputs(struct lower_packed_varyings_state *state)
936 {
937 /* Shader inputs need to be lowered at the beginning of main() so set bulder
938 * cursor to insert packing code at the start of the main function.
939 */
940 state->b.cursor = nir_before_impl(state->impl);
941
942 /* insert new varyings, lower old ones to locals and add unpacking code a
943 * the start of the shader.
944 */
945 nir_foreach_shader_in_variable_safe(var, state->shader) {
946 if (var->data.mode != state->mode ||
947 var->data.location < VARYING_SLOT_VAR0 ||
948 !lower_packed_varying_needs_lowering(state->shader, var,
949 state->xfb_enabled,
950 state->disable_xfb_packing,
951 state->disable_varying_packing))
952 continue;
953
954 /* Skip any new packed varyings we just added */
955 if (strncmp("packed:", var->name, 7) == 0)
956 continue;
957
958 /* This lowering pass is only capable of packing floats and ints
959 * together when their interpolation mode is "flat". Treat integers as
960 * being flat when the interpolation mode is none.
961 */
962 assert(var->data.interpolation == INTERP_MODE_FLAT ||
963 var->data.interpolation == INTERP_MODE_NONE ||
964 !glsl_contains_integer(var->type));
965
966 /* Program interface needs to expose varyings in case of SSO. Add the
967 * variable for program resource list before it gets modified and lost.
968 */
969 if (state->prog->SeparateShader && state->ifc_exposed_to_query_api) {
970 struct set *resource_set = _mesa_pointer_set_create(NULL);
971
972 nir_add_packed_var_to_resource_list(state->consts, state->prog,
973 resource_set, var,
974 state->shader->info.stage,
975 GL_PROGRAM_INPUT);
976
977 _mesa_set_destroy(resource_set, NULL);
978 }
979
980 /* Change the old varying into an ordinary global. */
981 var->data.mode = nir_var_shader_temp;
982
983 /* Recursively unpack varying. */
984 nir_deref_instr *unpacked_var_deref = nir_build_deref_var(&state->b, var);
985 lower_varying(state, NULL, ~0u, var->type,
986 var->data.location * 4 + var->data.location_frac,
987 var, unpacked_var_deref, var->name,
988 state->gs_input_vertices != 0, 0);
989 }
990 }
991
992 void
gl_nir_lower_packed_varyings(const struct gl_constants * consts,struct gl_shader_program * prog,void * mem_ctx,unsigned locations_used,const uint8_t * components,nir_variable_mode mode,unsigned gs_input_vertices,struct gl_linked_shader * linked_shader,bool disable_varying_packing,bool disable_xfb_packing,bool xfb_enabled)993 gl_nir_lower_packed_varyings(const struct gl_constants *consts,
994 struct gl_shader_program *prog,
995 void *mem_ctx, unsigned locations_used,
996 const uint8_t *components,
997 nir_variable_mode mode, unsigned gs_input_vertices,
998 struct gl_linked_shader *linked_shader,
999 bool disable_varying_packing,
1000 bool disable_xfb_packing, bool xfb_enabled)
1001 {
1002 struct lower_packed_varyings_state state;
1003 nir_shader *shader = linked_shader->Program->nir;
1004 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1005
1006 assert(shader->info.stage != MESA_SHADER_COMPUTE);
1007
1008 /* assert that functions have been inlined before packing is called */
1009 nir_foreach_function(f, shader) {
1010 assert(f->impl == impl);
1011 }
1012
1013 state.b = nir_builder_create(impl);
1014 state.consts = consts;
1015 state.prog = prog;
1016 state.mem_ctx = mem_ctx;
1017 state.shader = shader;
1018 state.impl = impl;
1019 state.locations_used = locations_used;
1020 state.components = components;
1021 state.mode = mode;
1022 state.gs_input_vertices = gs_input_vertices;
1023 state.disable_varying_packing = disable_varying_packing;
1024 state.disable_xfb_packing = disable_xfb_packing;
1025 state.xfb_enabled = xfb_enabled;
1026 state.packed_varyings =
1027 (nir_variable **) rzalloc_array_size(mem_ctx, sizeof(nir_variable *),
1028 locations_used);
1029
1030 /* Determine if the shader interface is exposed to api query */
1031 struct gl_linked_shader *linked_shaders[MESA_SHADER_STAGES];
1032 unsigned num_shaders = 0;
1033 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
1034 if (prog->_LinkedShaders[i])
1035 linked_shaders[num_shaders++] = prog->_LinkedShaders[i];
1036 }
1037
1038 if (mode == nir_var_shader_in) {
1039 state.ifc_exposed_to_query_api = linked_shaders[0] == linked_shader;
1040 lower_packed_inputs(&state);
1041 } else {
1042 state.ifc_exposed_to_query_api =
1043 linked_shaders[num_shaders - 1] == linked_shader;
1044 lower_packed_outputs(&state);
1045 }
1046
1047 nir_lower_global_vars_to_local(shader);
1048 nir_fixup_deref_modes(shader);
1049 }
1050