xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/nir/tgsi_to_nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014-2015 Broadcom
3  * Copyright (C) 2014 Rob Clark <[email protected]>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include "util/blob.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/u_memory.h"
29 #include "util/perf/cpu_trace.h"
30 #include "util/ralloc.h"
31 #include "pipe/p_screen.h"
32 
33 #include "compiler/nir/nir.h"
34 #include "compiler/nir/nir_control_flow.h"
35 #include "compiler/nir/nir_builder.h"
36 #include "compiler/nir/nir_serialize.h"
37 #include "compiler/shader_enums.h"
38 
39 #include "tgsi_to_nir.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "tgsi/tgsi_dump.h"
42 #include "tgsi/tgsi_info.h"
43 #include "tgsi/tgsi_scan.h"
44 #include "tgsi/tgsi_from_mesa.h"
45 
46 #define SWIZ(X, Y, Z, W) (unsigned[4]){      \
47       TGSI_SWIZZLE_##X,                      \
48       TGSI_SWIZZLE_##Y,                      \
49       TGSI_SWIZZLE_##Z,                      \
50       TGSI_SWIZZLE_##W,                      \
51    }
52 
53 struct ttn_reg_info {
54    /** nir register handle containing this TGSI index. */
55    nir_def *reg;
56    nir_variable *var;
57    /** Offset (in vec4s) from the start of var for this TGSI index. */
58    int offset;
59 };
60 
61 struct ttn_compile {
62    union tgsi_full_token *token;
63    nir_builder build;
64    struct tgsi_shader_info *scan;
65 
66    struct ttn_reg_info *output_regs;
67    struct ttn_reg_info *temp_regs;
68    nir_def **imm_defs;
69 
70    unsigned num_samp_types;
71    nir_alu_type *samp_types;
72 
73    nir_def *addr_reg;
74 
75    nir_variable **inputs;
76    nir_variable **outputs;
77    nir_variable *samplers[PIPE_MAX_SAMPLERS];
78    nir_variable *images[PIPE_MAX_SHADER_IMAGES];
79    nir_variable *ssbo[PIPE_MAX_SHADER_BUFFERS];
80    uint32_t ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS];
81 
82    unsigned num_samplers;
83    unsigned num_images;
84    unsigned num_msaa_images;
85 
86    nir_variable *input_var_face;
87    nir_variable *input_var_position;
88    nir_variable *input_var_point;
89    nir_variable *clipdist;
90 
91    /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
92    unsigned next_imm;
93 
94    bool cap_face_is_sysval;
95    bool cap_position_is_sysval;
96    bool cap_point_is_sysval;
97    bool cap_samplers_as_deref;
98    bool cap_integers;
99    bool cap_tg4_component_in_swizzle;
100 };
101 
102 #define ttn_swizzle(b, src, x, y, z, w) \
103    nir_swizzle(b, src, SWIZ(x, y, z, w), 4)
104 #define ttn_channel(b, src, swiz) \
105    nir_channel(b, src, TGSI_SWIZZLE_##swiz)
106 
107 static gl_varying_slot
tgsi_varying_semantic_to_slot(unsigned semantic,unsigned index)108 tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
109 {
110    switch (semantic) {
111    case TGSI_SEMANTIC_POSITION:
112       return VARYING_SLOT_POS;
113    case TGSI_SEMANTIC_COLOR:
114       if (index == 0)
115          return VARYING_SLOT_COL0;
116       else
117          return VARYING_SLOT_COL1;
118    case TGSI_SEMANTIC_BCOLOR:
119       if (index == 0)
120          return VARYING_SLOT_BFC0;
121       else
122          return VARYING_SLOT_BFC1;
123    case TGSI_SEMANTIC_FOG:
124       return VARYING_SLOT_FOGC;
125    case TGSI_SEMANTIC_PSIZE:
126       return VARYING_SLOT_PSIZ;
127    case TGSI_SEMANTIC_GENERIC:
128       assert(index < 32);
129       return VARYING_SLOT_VAR0 + index;
130    case TGSI_SEMANTIC_FACE:
131       return VARYING_SLOT_FACE;
132    case TGSI_SEMANTIC_EDGEFLAG:
133       return VARYING_SLOT_EDGE;
134    case TGSI_SEMANTIC_PRIMID:
135       return VARYING_SLOT_PRIMITIVE_ID;
136    case TGSI_SEMANTIC_CLIPDIST:
137       if (index == 0)
138          return VARYING_SLOT_CLIP_DIST0;
139       else
140          return VARYING_SLOT_CLIP_DIST1;
141    case TGSI_SEMANTIC_CLIPVERTEX:
142       return VARYING_SLOT_CLIP_VERTEX;
143    case TGSI_SEMANTIC_TEXCOORD:
144       assert(index < 8);
145       return VARYING_SLOT_TEX0 + index;
146    case TGSI_SEMANTIC_PCOORD:
147       return VARYING_SLOT_PNTC;
148    case TGSI_SEMANTIC_VIEWPORT_INDEX:
149       return VARYING_SLOT_VIEWPORT;
150    case TGSI_SEMANTIC_LAYER:
151       return VARYING_SLOT_LAYER;
152    case TGSI_SEMANTIC_TESSINNER:
153       return VARYING_SLOT_TESS_LEVEL_INNER;
154    case TGSI_SEMANTIC_TESSOUTER:
155       return VARYING_SLOT_TESS_LEVEL_OUTER;
156    default:
157       fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
158       abort();
159    }
160 }
161 
162 static enum gl_frag_depth_layout
ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)163 ttn_get_depth_layout(unsigned tgsi_fs_depth_layout)
164 {
165    switch (tgsi_fs_depth_layout) {
166    case TGSI_FS_DEPTH_LAYOUT_NONE:
167       return FRAG_DEPTH_LAYOUT_NONE;
168    case TGSI_FS_DEPTH_LAYOUT_ANY:
169       return FRAG_DEPTH_LAYOUT_ANY;
170    case TGSI_FS_DEPTH_LAYOUT_GREATER:
171       return FRAG_DEPTH_LAYOUT_GREATER;
172    case TGSI_FS_DEPTH_LAYOUT_LESS:
173       return FRAG_DEPTH_LAYOUT_LESS;
174    case TGSI_FS_DEPTH_LAYOUT_UNCHANGED:
175       return FRAG_DEPTH_LAYOUT_UNCHANGED;
176    default:
177       unreachable("bad TGSI FS depth layout");
178    }
179 }
180 
181 static enum glsl_interp_mode
ttn_translate_interp_mode(unsigned tgsi_interp)182 ttn_translate_interp_mode(unsigned tgsi_interp)
183 {
184    switch (tgsi_interp) {
185    case TGSI_INTERPOLATE_CONSTANT:
186       return INTERP_MODE_FLAT;
187    case TGSI_INTERPOLATE_LINEAR:
188       return INTERP_MODE_NOPERSPECTIVE;
189    case TGSI_INTERPOLATE_PERSPECTIVE:
190       return INTERP_MODE_SMOOTH;
191    case TGSI_INTERPOLATE_COLOR:
192       return INTERP_MODE_NONE;
193    default:
194       unreachable("bad TGSI interpolation mode");
195    }
196 }
197 
198 static void
ttn_emit_declaration(struct ttn_compile * c)199 ttn_emit_declaration(struct ttn_compile *c)
200 {
201    nir_builder *b = &c->build;
202    struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
203    unsigned array_size = decl->Range.Last - decl->Range.First + 1;
204    unsigned file = decl->Declaration.File;
205    unsigned i;
206 
207    if (file == TGSI_FILE_TEMPORARY) {
208       if (decl->Declaration.Array) {
209          /* for arrays, we create variables instead of registers: */
210          nir_variable *var =
211             nir_variable_create(b->shader, nir_var_shader_temp,
212                                 glsl_array_type(glsl_vec4_type(), array_size, 0),
213                                 ralloc_asprintf(b->shader, "arr_%d",
214                                                 decl->Array.ArrayID));
215 
216          for (i = 0; i < array_size; i++) {
217             /* point all the matching slots to the same var,
218              * with appropriate offset set, mostly just so
219              * we know what to do when tgsi does a non-indirect
220              * access
221              */
222             c->temp_regs[decl->Range.First + i].reg = NULL;
223             c->temp_regs[decl->Range.First + i].var = var;
224             c->temp_regs[decl->Range.First + i].offset = i;
225          }
226       } else {
227          for (i = 0; i < array_size; i++) {
228             nir_def *reg = nir_decl_reg(b, 4, 32, 0);
229             c->temp_regs[decl->Range.First + i].reg = reg;
230             c->temp_regs[decl->Range.First + i].var = NULL;
231             c->temp_regs[decl->Range.First + i].offset = 0;
232          }
233       }
234    } else if (file == TGSI_FILE_ADDRESS) {
235       c->addr_reg = nir_decl_reg(b, 4, 32, 0);
236    } else if (file == TGSI_FILE_SYSTEM_VALUE) {
237       /* Nothing to record for system values. */
238    } else if (file == TGSI_FILE_BUFFER) {
239       /* Nothing to record for buffers. */
240    } else if (file == TGSI_FILE_IMAGE) {
241       /* Nothing to record for images. */
242    } else if (file == TGSI_FILE_SAMPLER) {
243       /* Nothing to record for samplers. */
244    } else if (file == TGSI_FILE_SAMPLER_VIEW) {
245       struct tgsi_declaration_sampler_view *sview = &decl->SamplerView;
246       nir_alu_type type;
247 
248       assert((sview->ReturnTypeX == sview->ReturnTypeY) &&
249              (sview->ReturnTypeX == sview->ReturnTypeZ) &&
250              (sview->ReturnTypeX == sview->ReturnTypeW));
251 
252       switch (sview->ReturnTypeX) {
253       case TGSI_RETURN_TYPE_SINT:
254          type = nir_type_int32;
255          break;
256       case TGSI_RETURN_TYPE_UINT:
257          type = nir_type_uint32;
258          break;
259       case TGSI_RETURN_TYPE_FLOAT:
260       default:
261          type = nir_type_float32;
262          break;
263       }
264 
265       for (i = 0; i < array_size; i++) {
266          c->samp_types[decl->Range.First + i] = type;
267       }
268    } else {
269       bool is_array = (array_size > 1);
270 
271       assert(file == TGSI_FILE_INPUT ||
272              file == TGSI_FILE_OUTPUT ||
273              file == TGSI_FILE_CONSTANT);
274 
275       /* nothing to do for UBOs: */
276       if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension &&
277           decl->Dim.Index2D != 0) {
278          b->shader->info.num_ubos =
279             MAX2(b->shader->info.num_ubos, decl->Dim.Index2D);
280          c->ubo_sizes[decl->Dim.Index2D] =
281             MAX2(c->ubo_sizes[decl->Dim.Index2D], decl->Range.Last * 16);
282          return;
283       }
284 
285       if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
286          is_array = (is_array && decl->Declaration.Array &&
287                      (decl->Array.ArrayID != 0));
288       }
289 
290       for (i = 0; i < array_size; i++) {
291          unsigned idx = decl->Range.First + i;
292          nir_variable *var = rzalloc(b->shader, nir_variable);
293 
294          var->data.driver_location = idx;
295 
296          var->type = glsl_vec4_type();
297          if (is_array)
298             var->type = glsl_array_type(var->type, array_size, 0);
299 
300          switch (file) {
301          case TGSI_FILE_INPUT:
302             var->data.read_only = true;
303             var->data.mode = nir_var_shader_in;
304             var->name = ralloc_asprintf(var, "in_%d", idx);
305 
306             if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
307                if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
308                   var->type = glsl_bool_type();
309                   if (c->cap_face_is_sysval) {
310                      var->data.mode = nir_var_system_value;
311                      var->data.location = SYSTEM_VALUE_FRONT_FACE;
312                   } else {
313                      var->data.location = VARYING_SLOT_FACE;
314                   }
315                   c->input_var_face = var;
316                } else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
317                   if (c->cap_position_is_sysval) {
318                      var->data.mode = nir_var_system_value;
319                      var->data.location = SYSTEM_VALUE_FRAG_COORD;
320                   } else {
321                      var->data.location = VARYING_SLOT_POS;
322                   }
323                   c->input_var_position = var;
324                } else if (decl->Semantic.Name == TGSI_SEMANTIC_PCOORD) {
325                   if (c->cap_point_is_sysval) {
326                      var->data.mode = nir_var_system_value;
327                      var->data.location = SYSTEM_VALUE_POINT_COORD;
328                   } else {
329                      var->data.location = VARYING_SLOT_PNTC;
330                   }
331                   c->input_var_point = var;
332                } else {
333                   var->data.location =
334                      tgsi_varying_semantic_to_slot(decl->Semantic.Name,
335                                                    decl->Semantic.Index);
336                }
337             } else {
338                assert(!decl->Declaration.Semantic);
339                var->data.location = VERT_ATTRIB_GENERIC0 + idx;
340             }
341             var->data.index = 0;
342             var->data.interpolation =
343                ttn_translate_interp_mode(decl->Interp.Interpolate);
344 
345             c->inputs[idx] = var;
346 
347             for (int i = 0; i < array_size; i++)
348                b->shader->info.inputs_read |= 1ull << (var->data.location + i);
349 
350             break;
351          case TGSI_FILE_OUTPUT: {
352             int semantic_name = decl->Semantic.Name;
353             int semantic_index = decl->Semantic.Index;
354             /* Since we can't load from outputs in the IR, we make temporaries
355              * for the outputs and emit stores to the real outputs at the end of
356              * the shader.
357              */
358             nir_def *reg = nir_decl_reg(b, 4, 32,
359                                             is_array ? array_size : 0);
360 
361             var->data.mode = nir_var_shader_out;
362             var->name = ralloc_asprintf(var, "out_%d", idx);
363             var->data.index = 0;
364             var->data.interpolation =
365                ttn_translate_interp_mode(decl->Interp.Interpolate);
366             var->data.patch = semantic_name == TGSI_SEMANTIC_TESSINNER ||
367                               semantic_name == TGSI_SEMANTIC_TESSOUTER ||
368                               semantic_name == TGSI_SEMANTIC_PATCH;
369 
370             if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
371                switch (semantic_name) {
372                case TGSI_SEMANTIC_COLOR: {
373                   /* TODO tgsi loses some information, so we cannot
374                    * actually differentiate here between DSB and MRT
375                    * at this point.  But so far no drivers using tgsi-
376                    * to-nir support dual source blend:
377                    */
378                   bool dual_src_blend = false;
379                   if (dual_src_blend && (semantic_index == 1)) {
380                      var->data.location = FRAG_RESULT_DATA0;
381                      var->data.index = 1;
382                   } else {
383                      if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
384                         var->data.location = FRAG_RESULT_COLOR;
385                      else
386                         var->data.location = FRAG_RESULT_DATA0 + semantic_index;
387                   }
388                   break;
389                }
390                case TGSI_SEMANTIC_POSITION:
391                   var->data.location = FRAG_RESULT_DEPTH;
392                   var->type = glsl_float_type();
393                   break;
394                case TGSI_SEMANTIC_STENCIL:
395                   var->data.location = FRAG_RESULT_STENCIL;
396                   var->type = glsl_int_type();
397                   break;
398                case TGSI_SEMANTIC_SAMPLEMASK:
399                   var->data.location = FRAG_RESULT_SAMPLE_MASK;
400                   var->type = glsl_int_type();
401                   break;
402 
403                default:
404                   fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
405                           decl->Semantic.Name, decl->Semantic.Index);
406                   abort();
407                }
408             } else {
409                var->data.location =
410                   tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
411                if (var->data.location == VARYING_SLOT_FOGC ||
412                    var->data.location == VARYING_SLOT_PSIZ) {
413                   var->type = glsl_float_type();
414                } else if (var->data.location == VARYING_SLOT_LAYER) {
415                   var->type = glsl_int_type();
416                } else if (b->shader->options->compact_arrays &&
417                           var->data.location == VARYING_SLOT_CLIP_DIST0) {
418                   var->type = glsl_array_type(glsl_float_type(),
419                                               b->shader->info.clip_distance_array_size,
420                                               sizeof(float));
421                   c->clipdist = var;
422                }
423             }
424 
425             if (is_array) {
426                unsigned j;
427                for (j = 0; j < array_size; j++) {
428                   c->output_regs[idx + j].offset = i + j;
429                   c->output_regs[idx + j].reg = reg;
430                }
431             } else {
432                c->output_regs[idx].offset = i;
433                c->output_regs[idx].reg = reg;
434             }
435 
436             c->outputs[idx] = var;
437 
438             if (b->shader->options->compact_arrays && var->data.location == VARYING_SLOT_CLIP_DIST1) {
439                /* ignore this entirely */
440                continue;
441             }
442 
443             for (int i = 0; i < array_size; i++)
444                b->shader->info.outputs_written |= 1ull << (var->data.location + i);
445          }
446             break;
447          case TGSI_FILE_CONSTANT:
448             var->data.mode = nir_var_uniform;
449             var->name = ralloc_asprintf(var, "uniform_%d", idx);
450             var->data.location = idx;
451             break;
452          default:
453             unreachable("bad declaration file");
454             return;
455          }
456 
457          nir_shader_add_variable(b->shader, var);
458 
459          if (is_array)
460             break;
461       }
462 
463    }
464 }
465 
466 static void
ttn_emit_immediate(struct ttn_compile * c)467 ttn_emit_immediate(struct ttn_compile *c)
468 {
469    nir_builder *b = &c->build;
470    struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
471    nir_load_const_instr *load_const;
472    int i;
473 
474    load_const = nir_load_const_instr_create(b->shader, 4, 32);
475    c->imm_defs[c->next_imm] = &load_const->def;
476    c->next_imm++;
477 
478    for (i = 0; i < load_const->def.num_components; i++)
479       load_const->value[i].u32 = tgsi_imm->u[i].Uint;
480 
481    nir_builder_instr_insert(b, &load_const->instr);
482 }
483 
484 static nir_def *
485 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
486 
487 /* generate either a constant or indirect deref chain for accessing an
488  * array variable.
489  */
490 static nir_deref_instr *
ttn_array_deref(struct ttn_compile * c,nir_variable * var,unsigned offset,struct tgsi_ind_register * indirect)491 ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
492                 struct tgsi_ind_register *indirect)
493 {
494    nir_deref_instr *deref = nir_build_deref_var(&c->build, var);
495    nir_def *index = nir_imm_int(&c->build, offset);
496    if (indirect)
497       index = nir_iadd(&c->build, index, ttn_src_for_indirect(c, indirect));
498    return nir_build_deref_array(&c->build, deref, index);
499 }
500 
501 /* Special case: Turn the frontface varying into a load of the
502  * frontface variable, and create the vector as required by TGSI.
503  */
504 static nir_def *
ttn_emulate_tgsi_front_face(struct ttn_compile * c)505 ttn_emulate_tgsi_front_face(struct ttn_compile *c)
506 {
507    nir_def *tgsi_frontface[4];
508 
509    if (c->cap_face_is_sysval) {
510       /* When it's a system value, it should be an integer vector: (F, 0, 0, 1)
511        * F is 0xffffffff if front-facing, 0 if not.
512        */
513 
514       nir_def *frontface = nir_load_front_face(&c->build, 1);
515 
516       tgsi_frontface[0] = nir_bcsel(&c->build,
517                              frontface,
518                              nir_imm_int(&c->build, 0xffffffff),
519                              nir_imm_int(&c->build, 0));
520       tgsi_frontface[1] = nir_imm_int(&c->build, 0);
521       tgsi_frontface[2] = nir_imm_int(&c->build, 0);
522       tgsi_frontface[3] = nir_imm_int(&c->build, 1);
523    } else {
524       /* When it's an input, it should be a float vector: (F, 0.0, 0.0, 1.0)
525        * F is positive if front-facing, negative if not.
526        */
527 
528       assert(c->input_var_face);
529       nir_def *frontface = nir_load_var(&c->build, c->input_var_face);
530 
531       tgsi_frontface[0] = nir_bcsel(&c->build,
532                              frontface,
533                              nir_imm_float(&c->build, 1.0),
534                              nir_imm_float(&c->build, -1.0));
535       tgsi_frontface[1] = nir_imm_float(&c->build, 0.0);
536       tgsi_frontface[2] = nir_imm_float(&c->build, 0.0);
537       tgsi_frontface[3] = nir_imm_float(&c->build, 1.0);
538    }
539 
540    return nir_vec(&c->build, tgsi_frontface, 4);
541 }
542 
543 static nir_src
ttn_src_for_file_and_index(struct ttn_compile * c,unsigned file,unsigned index,struct tgsi_ind_register * indirect,struct tgsi_dimension * dim,struct tgsi_ind_register * dimind,bool src_is_float)544 ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
545                            struct tgsi_ind_register *indirect,
546                            struct tgsi_dimension *dim,
547                            struct tgsi_ind_register *dimind,
548                            bool src_is_float)
549 {
550    nir_builder *b = &c->build;
551    nir_src src;
552 
553    memset(&src, 0, sizeof(src));
554 
555    switch (file) {
556    case TGSI_FILE_TEMPORARY:
557       if (c->temp_regs[index].var) {
558          unsigned offset = c->temp_regs[index].offset;
559          nir_variable *var = c->temp_regs[index].var;
560          nir_def *load = nir_load_deref(&c->build,
561                ttn_array_deref(c, var, offset, indirect));
562 
563          src = nir_src_for_ssa(load);
564       } else {
565          assert(!indirect);
566          src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[index].reg));
567       }
568       assert(!dim);
569       break;
570 
571    case TGSI_FILE_ADDRESS:
572       src = nir_src_for_ssa(nir_load_reg(b, c->addr_reg));
573       assert(!dim);
574       break;
575 
576    case TGSI_FILE_IMMEDIATE:
577       src = nir_src_for_ssa(c->imm_defs[index]);
578       assert(!indirect);
579       assert(!dim);
580       break;
581 
582    case TGSI_FILE_SYSTEM_VALUE: {
583       nir_def *load;
584 
585       assert(!indirect);
586       assert(!dim);
587 
588       switch (c->scan->system_value_semantic_name[index]) {
589       case TGSI_SEMANTIC_VERTEXID_NOBASE:
590          load = nir_load_vertex_id_zero_base(b);
591          break;
592       case TGSI_SEMANTIC_VERTEXID:
593          load = nir_load_vertex_id(b);
594          break;
595       case TGSI_SEMANTIC_BASEVERTEX:
596          load = nir_load_base_vertex(b);
597          break;
598       case TGSI_SEMANTIC_INSTANCEID:
599          load = nir_load_instance_id(b);
600          break;
601       case TGSI_SEMANTIC_FACE:
602          assert(c->cap_face_is_sysval);
603          load = ttn_emulate_tgsi_front_face(c);
604          break;
605       case TGSI_SEMANTIC_POSITION:
606          assert(c->cap_position_is_sysval);
607          load = nir_load_frag_coord(b);
608          break;
609       case TGSI_SEMANTIC_PCOORD:
610          assert(c->cap_point_is_sysval);
611          load = nir_load_point_coord(b);
612          break;
613       case TGSI_SEMANTIC_THREAD_ID:
614          load = nir_load_local_invocation_id(b);
615          break;
616       case TGSI_SEMANTIC_BLOCK_ID:
617          load = nir_load_workgroup_id(b);
618          break;
619       case TGSI_SEMANTIC_BLOCK_SIZE:
620          load = nir_load_workgroup_size(b);
621          break;
622       case TGSI_SEMANTIC_CS_USER_DATA_AMD:
623          load = nir_load_user_data_amd(b);
624          break;
625       case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL:
626          load = nir_load_tess_level_inner_default(b);
627          break;
628       case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL:
629          load = nir_load_tess_level_outer_default(b);
630          break;
631       case TGSI_SEMANTIC_SAMPLEID:
632          load = nir_load_sample_id(b);
633          b->shader->info.fs.uses_sample_shading = true;
634          break;
635       default:
636          unreachable("bad system value");
637       }
638 
639       if (load->num_components == 2)
640          load = nir_swizzle(b, load, SWIZ(X, Y, Y, Y), 4);
641       else if (load->num_components == 3)
642          load = nir_swizzle(b, load, SWIZ(X, Y, Z, Z), 4);
643 
644       src = nir_src_for_ssa(load);
645       break;
646    }
647 
648    case TGSI_FILE_INPUT:
649       if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
650           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_FACE) {
651          assert(!c->cap_face_is_sysval && c->input_var_face);
652          return nir_src_for_ssa(ttn_emulate_tgsi_front_face(c));
653       } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
654           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_POSITION) {
655          assert(!c->cap_position_is_sysval && c->input_var_position);
656          return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_position));
657       } else if (c->scan->processor == PIPE_SHADER_FRAGMENT &&
658           c->scan->input_semantic_name[index] == TGSI_SEMANTIC_PCOORD) {
659          assert(!c->cap_point_is_sysval && c->input_var_point);
660          return nir_src_for_ssa(nir_load_var(&c->build, c->input_var_point));
661       } else {
662          /* Indirection on input arrays isn't supported by TTN. */
663          assert(!dim);
664          nir_deref_instr *deref = nir_build_deref_var(&c->build,
665                                                       c->inputs[index]);
666          return nir_src_for_ssa(nir_load_deref(&c->build, deref));
667       }
668       break;
669 
670    case TGSI_FILE_OUTPUT:
671       if (c->scan->processor == PIPE_SHADER_FRAGMENT) {
672          c->outputs[index]->data.fb_fetch_output = 1;
673          nir_deref_instr *deref = nir_build_deref_var(&c->build,
674                                                       c->outputs[index]);
675          return nir_src_for_ssa(nir_load_deref(&c->build, deref));
676       }
677       unreachable("unsupported output read");
678       break;
679 
680    case TGSI_FILE_CONSTANT: {
681       nir_intrinsic_instr *load;
682       nir_intrinsic_op op;
683       unsigned srcn = 0;
684 
685       if (dim && (dim->Index > 0 || dim->Indirect)) {
686          op = nir_intrinsic_load_ubo;
687       } else {
688          op = nir_intrinsic_load_uniform;
689       }
690 
691       load = nir_intrinsic_instr_create(b->shader, op);
692       if (op == nir_intrinsic_load_uniform) {
693          nir_intrinsic_set_dest_type(load, src_is_float ? nir_type_float :
694                                                           nir_type_int);
695       }
696 
697       load->num_components = 4;
698       if (dim && (dim->Index > 0 || dim->Indirect)) {
699          if (dimind) {
700             load->src[srcn] =
701                ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
702                                           NULL, NULL, NULL, false);
703          } else {
704             /* UBOs start at index 1 in TGSI: */
705             load->src[srcn] =
706                nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
707          }
708          srcn++;
709       }
710 
711       nir_def *offset;
712       if (op == nir_intrinsic_load_ubo) {
713          /* UBO loads don't have a base offset. */
714          offset = nir_imm_int(b, index);
715          if (indirect) {
716             offset = nir_iadd(b, offset, ttn_src_for_indirect(c, indirect));
717          }
718          /* UBO offsets are in bytes, but TGSI gives them to us in vec4's */
719          offset = nir_ishl_imm(b, offset, 4);
720          nir_intrinsic_set_align(load, 16, 0);
721 
722          /* Set a very conservative base/range of the access: 16 bytes if not
723           * indirect at all, offset to the end of the UBO if the offset is
724           * indirect, and totally unknown if the block number is indirect.
725           */
726          uint32_t base = index * 16;
727          nir_intrinsic_set_range_base(load, base);
728          if (dimind)
729             nir_intrinsic_set_range(load, ~0);
730          else if (indirect)
731             nir_intrinsic_set_range(load, c->ubo_sizes[dim->Index] - base);
732          else
733             nir_intrinsic_set_range(load, base + 16);
734       } else {
735          nir_intrinsic_set_base(load, index);
736          if (indirect) {
737             offset = ttn_src_for_indirect(c, indirect);
738             nir_intrinsic_set_range(load, c->build.shader->num_uniforms * 16 - index);
739          } else {
740             offset = nir_imm_int(b, 0);
741             nir_intrinsic_set_range(load, 1);
742          }
743       }
744       load->src[srcn++] = nir_src_for_ssa(offset);
745 
746       nir_def_init(&load->instr, &load->def, 4, 32);
747       nir_builder_instr_insert(b, &load->instr);
748 
749       src = nir_src_for_ssa(&load->def);
750       break;
751    }
752 
753    default:
754       unreachable("bad src file");
755    }
756 
757 
758    return src;
759 }
760 
761 static nir_def *
ttn_src_for_indirect(struct ttn_compile * c,struct tgsi_ind_register * indirect)762 ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
763 {
764    nir_builder *b = &c->build;
765    nir_alu_src src;
766    memset(&src, 0, sizeof(src));
767    for (int i = 0; i < 4; i++)
768       src.swizzle[i] = indirect->Swizzle;
769    src.src = ttn_src_for_file_and_index(c,
770                                         indirect->File,
771                                         indirect->Index,
772                                         NULL, NULL, NULL,
773                                         false);
774    return nir_mov_alu(b, src, 1);
775 }
776 
777 static nir_variable *
ttn_get_var(struct ttn_compile * c,struct tgsi_full_dst_register * tgsi_fdst)778 ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
779 {
780    struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
781    unsigned index = tgsi_dst->Index;
782 
783    if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
784       /* we should not have an indirect when there is no var! */
785       if (!c->temp_regs[index].var)
786          assert(!tgsi_dst->Indirect);
787       return c->temp_regs[index].var;
788    }
789 
790    return NULL;
791 }
792 
793 static nir_def *
ttn_get_src(struct ttn_compile * c,struct tgsi_full_src_register * tgsi_fsrc,int src_idx)794 ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc,
795             int src_idx)
796 {
797    nir_builder *b = &c->build;
798    struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
799    enum tgsi_opcode opcode = c->token->FullInstruction.Instruction.Opcode;
800    unsigned tgsi_src_type = tgsi_opcode_infer_src_type(opcode, src_idx);
801    bool src_is_float = (tgsi_src_type == TGSI_TYPE_FLOAT ||
802                         tgsi_src_type == TGSI_TYPE_DOUBLE ||
803                         tgsi_src_type == TGSI_TYPE_UNTYPED);
804    nir_alu_src src;
805 
806    memset(&src, 0, sizeof(src));
807 
808    if (tgsi_src->File == TGSI_FILE_NULL) {
809       return nir_imm_float(b, 0.0);
810    } else if (tgsi_src->File == TGSI_FILE_SAMPLER ||
811               tgsi_src->File == TGSI_FILE_IMAGE ||
812               tgsi_src->File == TGSI_FILE_BUFFER) {
813       /* Only the index of the resource gets used in texturing, and it will
814        * handle looking that up on its own instead of using the nir_alu_src.
815        */
816       assert(!tgsi_src->Indirect);
817       return NULL;
818    } else {
819       struct tgsi_ind_register *ind = NULL;
820       struct tgsi_dimension *dim = NULL;
821       struct tgsi_ind_register *dimind = NULL;
822       if (tgsi_src->Indirect)
823          ind = &tgsi_fsrc->Indirect;
824       if (tgsi_src->Dimension) {
825          dim = &tgsi_fsrc->Dimension;
826          if (dim->Indirect)
827             dimind = &tgsi_fsrc->DimIndirect;
828       }
829       src.src = ttn_src_for_file_and_index(c,
830                                            tgsi_src->File,
831                                            tgsi_src->Index,
832                                            ind, dim, dimind,
833                                            src_is_float);
834    }
835 
836    src.swizzle[0] = tgsi_src->SwizzleX;
837    src.swizzle[1] = tgsi_src->SwizzleY;
838    src.swizzle[2] = tgsi_src->SwizzleZ;
839    src.swizzle[3] = tgsi_src->SwizzleW;
840 
841    nir_def *def = nir_mov_alu(b, src, 4);
842 
843    if (tgsi_type_is_64bit(tgsi_src_type))
844       def = nir_bitcast_vector(b, def, 64);
845 
846    if (tgsi_src->Absolute) {
847       assert(src_is_float);
848       def = nir_fabs(b, def);
849    }
850 
851    if (tgsi_src->Negate) {
852       if (src_is_float)
853          def = nir_fneg(b, def);
854       else
855          def = nir_ineg(b, def);
856    }
857 
858    return def;
859 }
860 
861 static nir_def *
ttn_alu(nir_builder * b,nir_op op,unsigned dest_bitsize,nir_def ** src)862 ttn_alu(nir_builder *b, nir_op op, unsigned dest_bitsize, nir_def **src)
863 {
864    nir_def *def = nir_build_alu_src_arr(b, op, src);
865    if (def->bit_size == 1)
866       def = nir_ineg(b, nir_b2iN(b, def, dest_bitsize));
867    assert(def->bit_size == dest_bitsize);
868    if (dest_bitsize == 64) {
869       /* Replicate before bitcasting, so we end up with 4x32 at the end */
870       if (def->num_components == 1)
871          def = nir_replicate(b, def, 2);
872 
873       if (def->num_components > 2) {
874          /* 32 -> 64 bit conversion ops are supposed to only convert the first
875           * two components, and we need to truncate here to avoid creating a
876           * vec8 after bitcasting the destination.
877           */
878          def = nir_trim_vector(b, def, 2);
879       }
880       def = nir_bitcast_vector(b, def, 32);
881    }
882    return def;
883 }
884 
885 /* EXP - Approximate Exponential Base 2
886  *  dst.x = 2^{\lfloor src.x\rfloor}
887  *  dst.y = src.x - \lfloor src.x\rfloor
888  *  dst.z = 2^{src.x}
889  *  dst.w = 1.0
890  */
891 static nir_def *
ttn_exp(nir_builder * b,nir_def ** src)892 ttn_exp(nir_builder *b, nir_def **src)
893 {
894    nir_def *srcx = ttn_channel(b, src[0], X);
895 
896    return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
897                       nir_fsub(b, srcx, nir_ffloor(b, srcx)),
898                       nir_fexp2(b, srcx),
899                       nir_imm_float(b, 1.0));
900 }
901 
902 /* LOG - Approximate Logarithm Base 2
903  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
904  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
905  *  dst.z = \log_2{|src.x|}
906  *  dst.w = 1.0
907  */
908 static nir_def *
ttn_log(nir_builder * b,nir_def ** src)909 ttn_log(nir_builder *b, nir_def **src)
910 {
911    nir_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
912    nir_def *log2 = nir_flog2(b, abs_srcx);
913 
914    return nir_vec4(b, nir_ffloor(b, log2),
915                       nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
916                       nir_flog2(b, abs_srcx),
917                       nir_imm_float(b, 1.0));
918 }
919 
920 /* DST - Distance Vector
921  *   dst.x = 1.0
922  *   dst.y = src0.y \times src1.y
923  *   dst.z = src0.z
924  *   dst.w = src1.w
925  */
926 static nir_def *
ttn_dst(nir_builder * b,nir_def ** src)927 ttn_dst(nir_builder *b, nir_def **src)
928 {
929    return nir_vec4(b, nir_imm_float(b, 1.0),
930                       nir_fmul(b, ttn_channel(b, src[0], Y),
931                                   ttn_channel(b, src[1], Y)),
932                       ttn_channel(b, src[0], Z),
933                       ttn_channel(b, src[1], W));
934 }
935 
936 /* LIT - Light Coefficients
937  *  dst.x = 1.0
938  *  dst.y = max(src.x, 0.0)
939  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
940  *  dst.w = 1.0
941  */
942 static nir_def *
ttn_lit(nir_builder * b,nir_def ** src)943 ttn_lit(nir_builder *b, nir_def **src)
944 {
945    nir_def *src0_y = ttn_channel(b, src[0], Y);
946    nir_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
947                                               nir_imm_float(b, 128.0)),
948                                   nir_imm_float(b, -128.0));
949    nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
950                                wclamp);
951    nir_def *z = nir_bcsel(b, nir_flt_imm(b, ttn_channel(b, src[0], X), 0.0),
952                                  nir_imm_float(b, 0.0), pow);
953 
954    return nir_vec4(b, nir_imm_float(b, 1.0),
955                       nir_fmax(b, ttn_channel(b, src[0], X),
956                                   nir_imm_float(b, 0.0)),
957                       z, nir_imm_float(b, 1.0));
958 }
959 
960 static void
ttn_barrier(nir_builder * b)961 ttn_barrier(nir_builder *b)
962 {
963    nir_barrier(b, .execution_scope = SCOPE_WORKGROUP);
964 }
965 
966 static void
ttn_kill(nir_builder * b)967 ttn_kill(nir_builder *b)
968 {
969    nir_discard(b);
970    b->shader->info.fs.uses_discard = true;
971 }
972 
973 static void
ttn_kill_if(nir_builder * b,nir_def ** src)974 ttn_kill_if(nir_builder *b, nir_def **src)
975 {
976    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
977    b->exact = true;
978    nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
979    b->exact = false;
980 
981    nir_discard_if(b, cmp);
982    b->shader->info.fs.uses_discard = true;
983 }
984 
985 static void
get_texture_info(unsigned texture,enum glsl_sampler_dim * dim,bool * is_shadow,bool * is_array)986 get_texture_info(unsigned texture,
987                  enum glsl_sampler_dim *dim,
988                  bool *is_shadow,
989                  bool *is_array)
990 {
991    assert(is_array);
992    *is_array = false;
993 
994    if (is_shadow)
995       *is_shadow = false;
996 
997    switch (texture) {
998    case TGSI_TEXTURE_BUFFER:
999       *dim = GLSL_SAMPLER_DIM_BUF;
1000       break;
1001    case TGSI_TEXTURE_1D:
1002       *dim = GLSL_SAMPLER_DIM_1D;
1003       break;
1004    case TGSI_TEXTURE_1D_ARRAY:
1005       *dim = GLSL_SAMPLER_DIM_1D;
1006       *is_array = true;
1007       break;
1008    case TGSI_TEXTURE_SHADOW1D:
1009       *dim = GLSL_SAMPLER_DIM_1D;
1010       *is_shadow = true;
1011       break;
1012    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1013       *dim = GLSL_SAMPLER_DIM_1D;
1014       *is_shadow = true;
1015       *is_array = true;
1016       break;
1017    case TGSI_TEXTURE_2D:
1018       *dim = GLSL_SAMPLER_DIM_2D;
1019       break;
1020    case TGSI_TEXTURE_2D_ARRAY:
1021       *dim = GLSL_SAMPLER_DIM_2D;
1022       *is_array = true;
1023       break;
1024    case TGSI_TEXTURE_2D_MSAA:
1025       *dim = GLSL_SAMPLER_DIM_MS;
1026       break;
1027    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1028       *dim = GLSL_SAMPLER_DIM_MS;
1029       *is_array = true;
1030       break;
1031    case TGSI_TEXTURE_SHADOW2D:
1032       *dim = GLSL_SAMPLER_DIM_2D;
1033       *is_shadow = true;
1034       break;
1035    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1036       *dim = GLSL_SAMPLER_DIM_2D;
1037       *is_shadow = true;
1038       *is_array = true;
1039       break;
1040    case TGSI_TEXTURE_3D:
1041       *dim = GLSL_SAMPLER_DIM_3D;
1042       break;
1043    case TGSI_TEXTURE_CUBE:
1044       *dim = GLSL_SAMPLER_DIM_CUBE;
1045       break;
1046    case TGSI_TEXTURE_CUBE_ARRAY:
1047       *dim = GLSL_SAMPLER_DIM_CUBE;
1048       *is_array = true;
1049       break;
1050    case TGSI_TEXTURE_SHADOWCUBE:
1051       *dim = GLSL_SAMPLER_DIM_CUBE;
1052       *is_shadow = true;
1053       break;
1054    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1055       *dim = GLSL_SAMPLER_DIM_CUBE;
1056       *is_shadow = true;
1057       *is_array = true;
1058       break;
1059    case TGSI_TEXTURE_RECT:
1060       *dim = GLSL_SAMPLER_DIM_RECT;
1061       break;
1062    case TGSI_TEXTURE_SHADOWRECT:
1063       *dim = GLSL_SAMPLER_DIM_RECT;
1064       *is_shadow = true;
1065       break;
1066    default:
1067       fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
1068       abort();
1069    }
1070 }
1071 
1072 static enum glsl_base_type
base_type_for_alu_type(nir_alu_type type)1073 base_type_for_alu_type(nir_alu_type type)
1074 {
1075    type = nir_alu_type_get_base_type(type);
1076 
1077    switch (type) {
1078    case nir_type_float:
1079       return GLSL_TYPE_FLOAT;
1080    case nir_type_int:
1081       return GLSL_TYPE_INT;
1082    case nir_type_uint:
1083       return GLSL_TYPE_UINT;
1084    default:
1085       unreachable("invalid type");
1086    }
1087 }
1088 
1089 static nir_variable *
get_sampler_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_shadow,bool is_array,enum glsl_base_type base_type,nir_texop op)1090 get_sampler_var(struct ttn_compile *c, int binding,
1091                 enum glsl_sampler_dim dim,
1092                 bool is_shadow,
1093                 bool is_array,
1094                 enum glsl_base_type base_type,
1095                 nir_texop op)
1096 {
1097    nir_variable *var = c->samplers[binding];
1098    if (!var) {
1099       const struct glsl_type *type =
1100          glsl_sampler_type(dim, is_shadow, is_array, base_type);
1101       var = nir_variable_create(c->build.shader, nir_var_uniform, type,
1102                                 "sampler");
1103       var->data.binding = binding;
1104       var->data.explicit_binding = true;
1105 
1106       c->samplers[binding] = var;
1107       c->num_samplers = MAX2(c->num_samplers, binding + 1);
1108 
1109       /* Record textures used */
1110       BITSET_SET(c->build.shader->info.textures_used, binding);
1111       if (op == nir_texop_txf || op == nir_texop_txf_ms)
1112          BITSET_SET(c->build.shader->info.textures_used_by_txf, binding);
1113       BITSET_SET(c->build.shader->info.samplers_used, binding);
1114    }
1115 
1116    return var;
1117 }
1118 
1119 static nir_variable *
get_image_var(struct ttn_compile * c,int binding,enum glsl_sampler_dim dim,bool is_array,enum glsl_base_type base_type,enum gl_access_qualifier access,enum pipe_format format)1120 get_image_var(struct ttn_compile *c, int binding,
1121               enum glsl_sampler_dim dim,
1122               bool is_array,
1123               enum glsl_base_type base_type,
1124               enum gl_access_qualifier access,
1125               enum pipe_format format)
1126 {
1127    nir_variable *var = c->images[binding];
1128 
1129    if (!var) {
1130       const struct glsl_type *type = glsl_image_type(dim, is_array, base_type);
1131 
1132       var = nir_variable_create(c->build.shader, nir_var_image, type, "image");
1133       var->data.binding = binding;
1134       var->data.explicit_binding = true;
1135       var->data.access = access;
1136       var->data.image.format = format;
1137 
1138       c->images[binding] = var;
1139       c->num_images = MAX2(c->num_images, binding + 1);
1140       if (dim == GLSL_SAMPLER_DIM_MS)
1141          c->num_msaa_images = c->num_images;
1142    }
1143 
1144    return var;
1145 }
1146 
1147 static void
add_ssbo_var(struct ttn_compile * c,int binding)1148 add_ssbo_var(struct ttn_compile *c, int binding)
1149 {
1150    nir_variable *var = c->ssbo[binding];
1151 
1152    if (!var) {
1153       /* A length of 0 is used to denote unsized arrays */
1154       const struct glsl_type *type = glsl_array_type(glsl_uint_type(), 0, 0);
1155 
1156       struct glsl_struct_field field = {
1157             .type = type,
1158             .name = "data",
1159             .location = -1,
1160       };
1161 
1162       var = nir_variable_create(c->build.shader, nir_var_mem_ssbo, type, "ssbo");
1163       var->data.binding = binding;
1164       var->interface_type =
1165          glsl_interface_type(&field, 1, GLSL_INTERFACE_PACKING_STD430,
1166                              false, "data");
1167       c->ssbo[binding] = var;
1168    }
1169 }
1170 
1171 static nir_def *
ttn_tex(struct ttn_compile * c,nir_def ** src)1172 ttn_tex(struct ttn_compile *c, nir_def **src)
1173 {
1174    nir_builder *b = &c->build;
1175    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1176    nir_tex_instr *instr;
1177    nir_texop op;
1178    unsigned num_srcs, samp = 1, sview, i;
1179 
1180    switch (tgsi_inst->Instruction.Opcode) {
1181    case TGSI_OPCODE_TEX:
1182       op = nir_texop_tex;
1183       num_srcs = 1;
1184       break;
1185    case TGSI_OPCODE_TEX2:
1186       op = nir_texop_tex;
1187       num_srcs = 1;
1188       samp = 2;
1189       break;
1190    case TGSI_OPCODE_TXP:
1191       op = nir_texop_tex;
1192       num_srcs = 2;
1193       break;
1194    case TGSI_OPCODE_TXB:
1195       op = nir_texop_txb;
1196       num_srcs = 2;
1197       break;
1198    case TGSI_OPCODE_TXB2:
1199       op = nir_texop_txb;
1200       num_srcs = 2;
1201       samp = 2;
1202       break;
1203    case TGSI_OPCODE_TXL:
1204    case TGSI_OPCODE_TEX_LZ:
1205       op = nir_texop_txl;
1206       num_srcs = 2;
1207       break;
1208    case TGSI_OPCODE_TXL2:
1209       op = nir_texop_txl;
1210       num_srcs = 2;
1211       samp = 2;
1212       break;
1213    case TGSI_OPCODE_TXF:
1214    case TGSI_OPCODE_TXF_LZ:
1215       if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
1216           tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1217          op = nir_texop_txf_ms;
1218       } else {
1219          op = nir_texop_txf;
1220       }
1221       num_srcs = 2;
1222       break;
1223    case TGSI_OPCODE_TXD:
1224       op = nir_texop_txd;
1225       num_srcs = 3;
1226       samp = 3;
1227       break;
1228    case TGSI_OPCODE_LODQ:
1229       op = nir_texop_lod;
1230       num_srcs = 1;
1231       break;
1232    case TGSI_OPCODE_TG4:
1233       /* TODO: Shadow cube samplers unsupported. */
1234       assert(tgsi_inst->Texture.Texture != TGSI_TEXTURE_SHADOWCUBE_ARRAY);
1235       op = nir_texop_tg4;
1236       num_srcs = 1;
1237       samp = 2;
1238       break;
1239 
1240    default:
1241       fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
1242       abort();
1243    }
1244 
1245    if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
1246        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1247        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
1248        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1249        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
1250        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
1251        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1252       num_srcs++;
1253    }
1254 
1255    /* Deref sources */
1256    num_srcs += 2;
1257 
1258    num_srcs += tgsi_inst->Texture.NumOffsets;
1259 
1260    instr = nir_tex_instr_create(b->shader, num_srcs);
1261    instr->op = op;
1262 
1263    get_texture_info(tgsi_inst->Texture.Texture,
1264                     &instr->sampler_dim, &instr->is_shadow, &instr->is_array);
1265 
1266    instr->coord_components =
1267       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
1268 
1269    if (instr->is_array)
1270       instr->coord_components++;
1271 
1272    assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
1273 
1274    /* TODO if we supported any opc's which take an explicit SVIEW
1275     * src, we would use that here instead.  But for the "legacy"
1276     * texture opc's the SVIEW index is same as SAMP index:
1277     */
1278    sview = tgsi_inst->Src[samp].Register.Index;
1279 
1280    nir_alu_type sampler_type =
1281       sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1282 
1283    if (op == nir_texop_lod) {
1284       instr->dest_type = nir_type_float32;
1285    } else {
1286       instr->dest_type = sampler_type;
1287    }
1288 
1289    nir_variable *var =
1290       get_sampler_var(c, sview, instr->sampler_dim,
1291                       instr->is_shadow,
1292                       instr->is_array,
1293                       base_type_for_alu_type(sampler_type),
1294                       op);
1295 
1296    nir_deref_instr *deref = nir_build_deref_var(b, var);
1297 
1298    unsigned src_number = 0;
1299 
1300    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1301                                                 &deref->def);
1302    src_number++;
1303    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
1304                                                 &deref->def);
1305    src_number++;
1306 
1307    instr->src[src_number] =
1308       nir_tex_src_for_ssa(nir_tex_src_coord,
1309                           nir_trim_vector(b, src[0], instr->coord_components));
1310    src_number++;
1311 
1312    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
1313       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
1314                                                    ttn_channel(b, src[0], W));
1315       src_number++;
1316    }
1317 
1318    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
1319       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1320                                                    ttn_channel(b, src[0], W));
1321       src_number++;
1322    }
1323 
1324    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
1325       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
1326                                                    ttn_channel(b, src[1], X));
1327       src_number++;
1328    }
1329 
1330    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
1331        tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
1332       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ)
1333          instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1334       else
1335          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1336       instr->src[src_number].src_type = nir_tex_src_lod;
1337       src_number++;
1338    }
1339 
1340    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
1341       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
1342                                                    ttn_channel(b, src[1], X));
1343       src_number++;
1344    }
1345 
1346    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF ||
1347        tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ) {
1348       if (op == nir_texop_txf_ms) {
1349          instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
1350                                                       ttn_channel(b, src[0], W));
1351       } else {
1352          if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF_LZ)
1353             instr->src[src_number].src = nir_src_for_ssa(nir_imm_int(b, 0));
1354          else
1355             instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1356          instr->src[src_number].src_type = nir_tex_src_lod;
1357       }
1358       src_number++;
1359    }
1360 
1361    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
1362       instr->src[src_number] =
1363          nir_tex_src_for_ssa(nir_tex_src_ddx,
1364                nir_trim_vector(b, src[1], nir_tex_instr_src_size(instr, src_number)));
1365       src_number++;
1366       instr->src[src_number] =
1367          nir_tex_src_for_ssa(nir_tex_src_ddy,
1368                nir_trim_vector(b, src[2], nir_tex_instr_src_size(instr, src_number)));
1369       src_number++;
1370    }
1371 
1372    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
1373       if (c->cap_tg4_component_in_swizzle)
1374          instr->component = tgsi_inst->Src[samp].Register.SwizzleX;
1375       else
1376          instr->component = nir_scalar_as_uint(nir_scalar_resolved(src[1], 0));
1377    }
1378 
1379    if (instr->is_shadow) {
1380       if (instr->coord_components == 4)
1381          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
1382       else if (instr->coord_components == 3)
1383          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
1384       else
1385          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
1386 
1387       instr->src[src_number].src_type = nir_tex_src_comparator;
1388       src_number++;
1389    }
1390 
1391    for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
1392       struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
1393       /* since TexOffset ins't using tgsi_full_src_register we get to
1394        * do some extra gymnastics:
1395        */
1396       nir_alu_src src;
1397 
1398       memset(&src, 0, sizeof(src));
1399 
1400       src.src = ttn_src_for_file_and_index(c,
1401                                            tex_offset->File,
1402                                            tex_offset->Index,
1403                                            NULL, NULL, NULL,
1404                                            true);
1405 
1406       src.swizzle[0] = tex_offset->SwizzleX;
1407       src.swizzle[1] = tex_offset->SwizzleY;
1408       src.swizzle[2] = tex_offset->SwizzleZ;
1409       src.swizzle[3] = TGSI_SWIZZLE_W;
1410 
1411       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_offset,
1412                                                    nir_mov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
1413       src_number++;
1414    }
1415 
1416    assert(src_number == num_srcs);
1417    assert(src_number == instr->num_srcs);
1418 
1419    nir_def_init(&instr->instr, &instr->def,
1420                 nir_tex_instr_dest_size(instr), 32);
1421    nir_builder_instr_insert(b, &instr->instr);
1422    return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1423 }
1424 
1425 /* TGSI_OPCODE_TXQ is actually two distinct operations:
1426  *
1427  *     dst.x = texture\_width(unit, lod)
1428  *     dst.y = texture\_height(unit, lod)
1429  *     dst.z = texture\_depth(unit, lod)
1430  *     dst.w = texture\_levels(unit)
1431  *
1432  * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
1433  */
1434 static nir_def *
ttn_txq(struct ttn_compile * c,nir_def ** src)1435 ttn_txq(struct ttn_compile *c, nir_def **src)
1436 {
1437    nir_builder *b = &c->build;
1438    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1439    nir_tex_instr *txs, *qlv;
1440 
1441    txs = nir_tex_instr_create(b->shader, 2);
1442    txs->op = nir_texop_txs;
1443    txs->dest_type = nir_type_uint32;
1444    get_texture_info(tgsi_inst->Texture.Texture,
1445                     &txs->sampler_dim, &txs->is_shadow, &txs->is_array);
1446 
1447    qlv = nir_tex_instr_create(b->shader, 1);
1448    qlv->op = nir_texop_query_levels;
1449    qlv->dest_type = nir_type_uint32;
1450    get_texture_info(tgsi_inst->Texture.Texture,
1451                     &qlv->sampler_dim, &qlv->is_shadow, &qlv->is_array);
1452 
1453    assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
1454    int sview = tgsi_inst->Src[1].Register.Index;
1455 
1456    nir_alu_type sampler_type =
1457       sview < c->num_samp_types ? c->samp_types[sview] : nir_type_float32;
1458 
1459    nir_variable *var =
1460       get_sampler_var(c, sview, txs->sampler_dim,
1461                       txs->is_shadow,
1462                       txs->is_array,
1463                       base_type_for_alu_type(sampler_type),
1464                       nir_texop_txs);
1465 
1466    nir_deref_instr *deref = nir_build_deref_var(b, var);
1467 
1468    txs->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1469                                      &deref->def);
1470 
1471    qlv->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
1472                                      &deref->def);
1473 
1474    /* lod: */
1475    txs->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod,
1476                                      ttn_channel(b, src[0], X));
1477 
1478    nir_def_init(&txs->instr, &txs->def, nir_tex_instr_dest_size(txs), 32);
1479    nir_builder_instr_insert(b, &txs->instr);
1480 
1481    nir_def_init(&qlv->instr, &qlv->def, 1, 32);
1482    nir_builder_instr_insert(b, &qlv->instr);
1483 
1484    return nir_vector_insert_imm(b,
1485                                 nir_pad_vector_imm_int(b, &txs->def, 0, 4),
1486                                 &qlv->def, 3);
1487 }
1488 
1489 static enum glsl_base_type
get_image_base_type(struct tgsi_full_instruction * tgsi_inst)1490 get_image_base_type(struct tgsi_full_instruction *tgsi_inst)
1491 {
1492    const struct util_format_description *desc =
1493       util_format_description(tgsi_inst->Memory.Format);
1494 
1495    if (desc->channel[0].pure_integer) {
1496       if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED)
1497          return GLSL_TYPE_INT;
1498       else
1499          return GLSL_TYPE_UINT;
1500    }
1501    return GLSL_TYPE_FLOAT;
1502 }
1503 
1504 static enum gl_access_qualifier
get_mem_qualifier(struct tgsi_full_instruction * tgsi_inst)1505 get_mem_qualifier(struct tgsi_full_instruction *tgsi_inst)
1506 {
1507    enum gl_access_qualifier access = 0;
1508 
1509    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_COHERENT)
1510       access |= ACCESS_COHERENT;
1511    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT)
1512       access |= ACCESS_RESTRICT;
1513    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
1514       access |= ACCESS_VOLATILE;
1515    if (tgsi_inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY)
1516       access |= ACCESS_NON_TEMPORAL;
1517 
1518    return access;
1519 }
1520 
1521 static nir_def *
ttn_mem(struct ttn_compile * c,nir_def ** src)1522 ttn_mem(struct ttn_compile *c, nir_def **src)
1523 {
1524    nir_builder *b = &c->build;
1525    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1526    nir_intrinsic_instr *instr = NULL;
1527    unsigned resource_index, addr_src_index, file;
1528 
1529    switch (tgsi_inst->Instruction.Opcode) {
1530    case TGSI_OPCODE_LOAD:
1531       assert(!tgsi_inst->Src[0].Register.Indirect);
1532       resource_index = tgsi_inst->Src[0].Register.Index;
1533       file = tgsi_inst->Src[0].Register.File;
1534       addr_src_index = 1;
1535       break;
1536    case TGSI_OPCODE_STORE:
1537       assert(!tgsi_inst->Dst[0].Register.Indirect);
1538       resource_index = tgsi_inst->Dst[0].Register.Index;
1539       file = tgsi_inst->Dst[0].Register.File;
1540       addr_src_index = 0;
1541       break;
1542    default:
1543       unreachable("unexpected memory opcode");
1544    }
1545 
1546    if (file == TGSI_FILE_BUFFER) {
1547       nir_intrinsic_op op;
1548 
1549       switch (tgsi_inst->Instruction.Opcode) {
1550       case TGSI_OPCODE_LOAD:
1551          op = nir_intrinsic_load_ssbo;
1552          break;
1553       case TGSI_OPCODE_STORE:
1554          op = nir_intrinsic_store_ssbo;
1555          break;
1556       default:
1557          unreachable("unexpected buffer opcode");
1558       }
1559 
1560       add_ssbo_var(c, resource_index);
1561 
1562       instr = nir_intrinsic_instr_create(b->shader, op);
1563       instr->num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1564       nir_intrinsic_set_access(instr, get_mem_qualifier(tgsi_inst));
1565       nir_intrinsic_set_align(instr, 4, 0);
1566 
1567       unsigned i = 0;
1568       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1569          instr->src[i++] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1570                                                        instr->num_components));
1571       instr->src[i++] = nir_src_for_ssa(nir_imm_int(b, resource_index));
1572       instr->src[i++] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], X));
1573 
1574       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE)
1575          nir_intrinsic_set_write_mask(instr, tgsi_inst->Dst[0].Register.WriteMask);
1576 
1577    } else if (file == TGSI_FILE_IMAGE) {
1578       nir_intrinsic_op op;
1579 
1580       switch (tgsi_inst->Instruction.Opcode) {
1581       case TGSI_OPCODE_LOAD:
1582          op = nir_intrinsic_image_deref_load;
1583          break;
1584       case TGSI_OPCODE_STORE:
1585          op = nir_intrinsic_image_deref_store;
1586          break;
1587       default:
1588          unreachable("unexpected file opcode");
1589       }
1590 
1591       instr = nir_intrinsic_instr_create(b->shader, op);
1592 
1593       /* Set the image variable dereference. */
1594       enum glsl_sampler_dim dim;
1595       bool is_array;
1596       get_texture_info(tgsi_inst->Memory.Texture, &dim, NULL, &is_array);
1597 
1598       enum glsl_base_type base_type = get_image_base_type(tgsi_inst);
1599       enum gl_access_qualifier access = get_mem_qualifier(tgsi_inst);
1600 
1601       nir_variable *image =
1602          get_image_var(c, resource_index,
1603                        dim, is_array, base_type, access,
1604                        tgsi_inst->Memory.Format);
1605       nir_deref_instr *image_deref = nir_build_deref_var(b, image);
1606       const struct glsl_type *type = image_deref->type;
1607 
1608       nir_intrinsic_set_access(instr, image_deref->var->data.access);
1609 
1610       instr->src[0] = nir_src_for_ssa(&image_deref->def);
1611       instr->src[1] = nir_src_for_ssa(src[addr_src_index]);
1612 
1613       /* Set the sample argument, which is undefined for single-sample images. */
1614       if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) {
1615          instr->src[2] = nir_src_for_ssa(ttn_channel(b, src[addr_src_index], W));
1616       } else {
1617          instr->src[2] = nir_src_for_ssa(nir_undef(b, 1, 32));
1618       }
1619 
1620       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1621          instr->src[3] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1622       }
1623 
1624       unsigned num_components = util_last_bit(tgsi_inst->Dst[0].Register.WriteMask);
1625 
1626       if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_STORE) {
1627          instr->src[3] = nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
1628                                                      num_components));
1629          instr->src[4] = nir_src_for_ssa(nir_imm_int(b, 0)); /* LOD */
1630       }
1631 
1632       instr->num_components = num_components;
1633    } else {
1634       unreachable("unexpected file");
1635    }
1636 
1637 
1638    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_LOAD) {
1639       nir_def_init(&instr->instr, &instr->def, instr->num_components, 32);
1640       nir_builder_instr_insert(b, &instr->instr);
1641       return nir_pad_vector_imm_int(b, &instr->def, 0, 4);
1642    } else {
1643       nir_builder_instr_insert(b, &instr->instr);
1644       return NULL;
1645    }
1646 }
1647 
1648 static const nir_op op_trans[TGSI_OPCODE_LAST] = {
1649    [TGSI_OPCODE_ARL] = 0,
1650    [TGSI_OPCODE_MOV] = nir_op_mov,
1651    [TGSI_OPCODE_FBFETCH] = nir_op_mov,
1652    [TGSI_OPCODE_LIT] = 0,
1653    [TGSI_OPCODE_RCP] = nir_op_frcp,
1654    [TGSI_OPCODE_RSQ] = nir_op_frsq,
1655    [TGSI_OPCODE_EXP] = 0,
1656    [TGSI_OPCODE_LOG] = 0,
1657    [TGSI_OPCODE_MUL] = nir_op_fmul,
1658    [TGSI_OPCODE_ADD] = nir_op_fadd,
1659    [TGSI_OPCODE_DP3] = 0,
1660    [TGSI_OPCODE_DP4] = 0,
1661    [TGSI_OPCODE_DST] = 0,
1662    [TGSI_OPCODE_MIN] = nir_op_fmin,
1663    [TGSI_OPCODE_MAX] = nir_op_fmax,
1664    [TGSI_OPCODE_SLT] = nir_op_slt,
1665    [TGSI_OPCODE_SGE] = nir_op_sge,
1666    [TGSI_OPCODE_MAD] = nir_op_ffma,
1667    [TGSI_OPCODE_TEX_LZ] = 0,
1668    [TGSI_OPCODE_LRP] = 0,
1669    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
1670    [TGSI_OPCODE_FRC] = nir_op_ffract,
1671    [TGSI_OPCODE_TXF_LZ] = 0,
1672    [TGSI_OPCODE_FLR] = nir_op_ffloor,
1673    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
1674    [TGSI_OPCODE_EX2] = nir_op_fexp2,
1675    [TGSI_OPCODE_LG2] = nir_op_flog2,
1676    [TGSI_OPCODE_POW] = nir_op_fpow,
1677    [TGSI_OPCODE_COS] = nir_op_fcos,
1678    [TGSI_OPCODE_KILL] = 0,
1679    [TGSI_OPCODE_PK2H] = 0, /* XXX */
1680    [TGSI_OPCODE_PK2US] = 0, /* XXX */
1681    [TGSI_OPCODE_PK4B] = 0, /* XXX */
1682    [TGSI_OPCODE_PK4UB] = 0, /* XXX */
1683    [TGSI_OPCODE_SEQ] = nir_op_seq,
1684    [TGSI_OPCODE_SGT] = 0,
1685    [TGSI_OPCODE_SIN] = nir_op_fsin,
1686    [TGSI_OPCODE_SNE] = nir_op_sne,
1687    [TGSI_OPCODE_SLE] = 0,
1688    [TGSI_OPCODE_TEX] = 0,
1689    [TGSI_OPCODE_TXD] = 0,
1690    [TGSI_OPCODE_TXP] = 0,
1691    [TGSI_OPCODE_UP2H] = 0, /* XXX */
1692    [TGSI_OPCODE_UP2US] = 0, /* XXX */
1693    [TGSI_OPCODE_UP4B] = 0, /* XXX */
1694    [TGSI_OPCODE_UP4UB] = 0, /* XXX */
1695    [TGSI_OPCODE_ARR] = 0,
1696 
1697    /* No function calls, yet. */
1698    [TGSI_OPCODE_CAL] = 0, /* XXX */
1699    [TGSI_OPCODE_RET] = 0, /* XXX */
1700 
1701    [TGSI_OPCODE_SSG] = nir_op_fsign,
1702    [TGSI_OPCODE_CMP] = 0,
1703    [TGSI_OPCODE_TXB] = 0,
1704    [TGSI_OPCODE_DIV] = nir_op_fdiv,
1705    [TGSI_OPCODE_DP2] = 0,
1706    [TGSI_OPCODE_TXL] = 0,
1707 
1708    [TGSI_OPCODE_BRK] = 0,
1709    [TGSI_OPCODE_IF] = 0,
1710    [TGSI_OPCODE_UIF] = 0,
1711    [TGSI_OPCODE_ELSE] = 0,
1712    [TGSI_OPCODE_ENDIF] = 0,
1713 
1714    [TGSI_OPCODE_CEIL] = nir_op_fceil,
1715    [TGSI_OPCODE_I2F] = nir_op_i2f32,
1716    [TGSI_OPCODE_NOT] = nir_op_inot,
1717    [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
1718    [TGSI_OPCODE_SHL] = nir_op_ishl,
1719    [TGSI_OPCODE_AND] = nir_op_iand,
1720    [TGSI_OPCODE_OR] = nir_op_ior,
1721    [TGSI_OPCODE_MOD] = nir_op_umod,
1722    [TGSI_OPCODE_XOR] = nir_op_ixor,
1723    [TGSI_OPCODE_TXF] = 0,
1724    [TGSI_OPCODE_TXQ] = 0,
1725 
1726    [TGSI_OPCODE_CONT] = 0,
1727 
1728    [TGSI_OPCODE_EMIT] = 0, /* XXX */
1729    [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
1730 
1731    [TGSI_OPCODE_BGNLOOP] = 0,
1732    [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
1733    [TGSI_OPCODE_ENDLOOP] = 0,
1734    [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
1735 
1736    [TGSI_OPCODE_NOP] = 0,
1737    [TGSI_OPCODE_FSEQ] = nir_op_feq,
1738    [TGSI_OPCODE_FSGE] = nir_op_fge,
1739    [TGSI_OPCODE_FSLT] = nir_op_flt,
1740    [TGSI_OPCODE_FSNE] = nir_op_fneu,
1741 
1742    [TGSI_OPCODE_KILL_IF] = 0,
1743 
1744    [TGSI_OPCODE_END] = 0,
1745 
1746    [TGSI_OPCODE_F2I] = nir_op_f2i32,
1747    [TGSI_OPCODE_IDIV] = nir_op_idiv,
1748    [TGSI_OPCODE_IMAX] = nir_op_imax,
1749    [TGSI_OPCODE_IMIN] = nir_op_imin,
1750    [TGSI_OPCODE_INEG] = nir_op_ineg,
1751    [TGSI_OPCODE_ISGE] = nir_op_ige,
1752    [TGSI_OPCODE_ISHR] = nir_op_ishr,
1753    [TGSI_OPCODE_ISLT] = nir_op_ilt,
1754    [TGSI_OPCODE_F2U] = nir_op_f2u32,
1755    [TGSI_OPCODE_U2F] = nir_op_u2f32,
1756    [TGSI_OPCODE_UADD] = nir_op_iadd,
1757    [TGSI_OPCODE_UDIV] = nir_op_udiv,
1758    [TGSI_OPCODE_UMAD] = 0,
1759    [TGSI_OPCODE_UMAX] = nir_op_umax,
1760    [TGSI_OPCODE_UMIN] = nir_op_umin,
1761    [TGSI_OPCODE_UMOD] = nir_op_umod,
1762    [TGSI_OPCODE_UMUL] = nir_op_imul,
1763    [TGSI_OPCODE_USEQ] = nir_op_ieq,
1764    [TGSI_OPCODE_USGE] = nir_op_uge,
1765    [TGSI_OPCODE_USHR] = nir_op_ushr,
1766    [TGSI_OPCODE_USLT] = nir_op_ult,
1767    [TGSI_OPCODE_USNE] = nir_op_ine,
1768 
1769    [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1770    [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
1771    [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
1772    [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
1773 
1774    /* XXX: SAMPLE opcodes */
1775 
1776    [TGSI_OPCODE_UARL] = nir_op_mov,
1777    [TGSI_OPCODE_UCMP] = 0,
1778    [TGSI_OPCODE_IABS] = nir_op_iabs,
1779    [TGSI_OPCODE_ISSG] = nir_op_isign,
1780 
1781    [TGSI_OPCODE_LOAD] = 0,
1782    [TGSI_OPCODE_STORE] = 0,
1783 
1784    /* XXX: atomics */
1785 
1786    [TGSI_OPCODE_TEX2] = 0,
1787    [TGSI_OPCODE_TXB2] = 0,
1788    [TGSI_OPCODE_TXL2] = 0,
1789 
1790    [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
1791    [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
1792 
1793    [TGSI_OPCODE_TG4] = 0,
1794    [TGSI_OPCODE_LODQ] = 0,
1795 
1796    [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
1797    [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
1798    [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
1799    [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
1800    [TGSI_OPCODE_POPC] = nir_op_bit_count,
1801    [TGSI_OPCODE_LSB] = nir_op_find_lsb,
1802    [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
1803    [TGSI_OPCODE_UMSB] = nir_op_ufind_msb,
1804 
1805    [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
1806    [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
1807    [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
1808 
1809    [TGSI_OPCODE_F2D] = nir_op_f2f64,
1810    [TGSI_OPCODE_D2F] = nir_op_f2f32,
1811    [TGSI_OPCODE_DMUL] = nir_op_fmul,
1812    [TGSI_OPCODE_D2U] = nir_op_f2u32,
1813    [TGSI_OPCODE_U2D] = nir_op_u2f64,
1814 
1815    [TGSI_OPCODE_U64ADD] = nir_op_iadd,
1816    [TGSI_OPCODE_U64MUL] = nir_op_imul,
1817    [TGSI_OPCODE_U64DIV] = nir_op_udiv,
1818    [TGSI_OPCODE_U64SNE] = nir_op_ine,
1819    [TGSI_OPCODE_I64NEG] = nir_op_ineg,
1820    [TGSI_OPCODE_I64ABS] = nir_op_iabs,
1821 };
1822 
1823 static void
ttn_emit_instruction(struct ttn_compile * c)1824 ttn_emit_instruction(struct ttn_compile *c)
1825 {
1826    nir_builder *b = &c->build;
1827    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
1828    unsigned i;
1829    unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
1830    struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
1831 
1832    if (tgsi_op == TGSI_OPCODE_END)
1833       return;
1834 
1835    nir_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
1836    for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
1837       src[i] = ttn_get_src(c, &tgsi_inst->Src[i], i);
1838    }
1839 
1840    unsigned tgsi_dst_type = tgsi_opcode_infer_dst_type(tgsi_op, 0);
1841 
1842    /* The destination bitsize of the NIR opcode (not TGSI, where it's always
1843     * 32 bits). This needs to be passed into ttn_alu() because it can't be
1844     * inferred for comparison opcodes.
1845     */
1846    unsigned dst_bitsize = tgsi_type_is_64bit(tgsi_dst_type) ? 64 : 32;
1847 
1848    /* If this is non-NULL after the switch, it will be written to the
1849     * corresponding register/variable/etc after.
1850     */
1851    nir_def *dst = NULL;
1852 
1853    switch (tgsi_op) {
1854    case TGSI_OPCODE_RSQ:
1855       dst = nir_frsq(b, ttn_channel(b, src[0], X));
1856       break;
1857 
1858    case TGSI_OPCODE_SQRT:
1859       dst = nir_fsqrt(b, ttn_channel(b, src[0], X));
1860       break;
1861 
1862    case TGSI_OPCODE_RCP:
1863       dst = nir_frcp(b, ttn_channel(b, src[0], X));
1864       break;
1865 
1866    case TGSI_OPCODE_EX2:
1867       dst = nir_fexp2(b, ttn_channel(b, src[0], X));
1868       break;
1869 
1870    case TGSI_OPCODE_LG2:
1871       dst = nir_flog2(b, ttn_channel(b, src[0], X));
1872       break;
1873 
1874    case TGSI_OPCODE_POW:
1875       dst = nir_fpow(b, ttn_channel(b, src[0], X), ttn_channel(b, src[1], X));
1876       break;
1877 
1878    case TGSI_OPCODE_COS:
1879       dst = nir_fcos(b, ttn_channel(b, src[0], X));
1880       break;
1881 
1882    case TGSI_OPCODE_SIN:
1883       dst = nir_fsin(b, ttn_channel(b, src[0], X));
1884       break;
1885 
1886    case TGSI_OPCODE_ARL:
1887       dst = nir_f2i32(b, nir_ffloor(b, src[0]));
1888       break;
1889 
1890    case TGSI_OPCODE_EXP:
1891       dst = ttn_exp(b, src);
1892       break;
1893 
1894    case TGSI_OPCODE_LOG:
1895       dst = ttn_log(b, src);
1896       break;
1897 
1898    case TGSI_OPCODE_DST:
1899       dst = ttn_dst(b, src);
1900       break;
1901 
1902    case TGSI_OPCODE_LIT:
1903       dst = ttn_lit(b, src);
1904       break;
1905 
1906    case TGSI_OPCODE_DP2:
1907       dst = nir_fdot2(b, src[0], src[1]);
1908       break;
1909 
1910    case TGSI_OPCODE_DP3:
1911       dst = nir_fdot3(b, src[0], src[1]);
1912       break;
1913 
1914    case TGSI_OPCODE_DP4:
1915       dst = nir_fdot4(b, src[0], src[1]);
1916       break;
1917 
1918    case TGSI_OPCODE_UMAD:
1919       dst = nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]);
1920       break;
1921 
1922    case TGSI_OPCODE_LRP:
1923       dst = nir_flrp(b, src[2], src[1], src[0]);
1924       break;
1925 
1926    case TGSI_OPCODE_KILL:
1927       ttn_kill(b);
1928       break;
1929 
1930    case TGSI_OPCODE_ARR:
1931       dst = nir_f2i32(b, nir_fround_even(b, src[0]));
1932       break;
1933 
1934    case TGSI_OPCODE_CMP:
1935       dst = nir_bcsel(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)),
1936                       src[1], src[2]);
1937       break;
1938 
1939    case TGSI_OPCODE_UCMP:
1940       dst = nir_bcsel(b, nir_ine(b, src[0], nir_imm_int(b, 0)),
1941                       src[1], src[2]);
1942       break;
1943 
1944    case TGSI_OPCODE_SGT:
1945       dst = nir_slt(b, src[1], src[0]);
1946       break;
1947 
1948    case TGSI_OPCODE_SLE:
1949       dst = nir_sge(b, src[1], src[0]);
1950       break;
1951 
1952    case TGSI_OPCODE_KILL_IF:
1953       ttn_kill_if(b, src);
1954       break;
1955 
1956    case TGSI_OPCODE_TEX:
1957    case TGSI_OPCODE_TEX_LZ:
1958    case TGSI_OPCODE_TXP:
1959    case TGSI_OPCODE_TXL:
1960    case TGSI_OPCODE_TXB:
1961    case TGSI_OPCODE_TXD:
1962    case TGSI_OPCODE_TEX2:
1963    case TGSI_OPCODE_TXL2:
1964    case TGSI_OPCODE_TXB2:
1965    case TGSI_OPCODE_TXF:
1966    case TGSI_OPCODE_TXF_LZ:
1967    case TGSI_OPCODE_TG4:
1968    case TGSI_OPCODE_LODQ:
1969       dst = ttn_tex(c, src);
1970       break;
1971 
1972    case TGSI_OPCODE_TXQ:
1973       dst = ttn_txq(c, src);
1974       break;
1975 
1976    case TGSI_OPCODE_LOAD:
1977    case TGSI_OPCODE_STORE:
1978       dst = ttn_mem(c, src);
1979       break;
1980 
1981    case TGSI_OPCODE_NOP:
1982       break;
1983 
1984    case TGSI_OPCODE_IF:
1985       nir_push_if(b, nir_fneu_imm(b, nir_channel(b, src[0], 0), 0.0));
1986       break;
1987 
1988    case TGSI_OPCODE_UIF:
1989       nir_push_if(b, nir_ine_imm(b, nir_channel(b, src[0], 0), 0));
1990       break;
1991 
1992    case TGSI_OPCODE_ELSE:
1993       nir_push_else(&c->build, NULL);
1994       break;
1995 
1996    case TGSI_OPCODE_ENDIF:
1997       nir_pop_if(&c->build, NULL);
1998       break;
1999 
2000    case TGSI_OPCODE_BGNLOOP:
2001       nir_push_loop(&c->build);
2002       break;
2003 
2004    case TGSI_OPCODE_BRK:
2005       nir_jump(b, nir_jump_break);
2006       break;
2007 
2008    case TGSI_OPCODE_CONT:
2009       nir_jump(b, nir_jump_continue);
2010       break;
2011 
2012    case TGSI_OPCODE_ENDLOOP:
2013       nir_pop_loop(&c->build, NULL);
2014       break;
2015 
2016    case TGSI_OPCODE_BARRIER:
2017       ttn_barrier(b);
2018       break;
2019 
2020    case TGSI_OPCODE_DDX:
2021       dst = nir_ddx(b, src[0]);
2022       break;
2023 
2024    case TGSI_OPCODE_DDX_FINE:
2025       dst = nir_ddx_fine(b, src[0]);
2026       break;
2027 
2028    case TGSI_OPCODE_DDY:
2029       dst = nir_ddy(b, src[0]);
2030       break;
2031 
2032    case TGSI_OPCODE_DDY_FINE:
2033       dst = nir_ddy_fine(b, src[0]);
2034       break;
2035 
2036    default:
2037       if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
2038          dst = ttn_alu(b, op_trans[tgsi_op], dst_bitsize, src);
2039       } else {
2040          fprintf(stderr, "unknown TGSI opcode: %s\n",
2041                  tgsi_get_opcode_name(tgsi_op));
2042          abort();
2043       }
2044       break;
2045    }
2046 
2047    if (dst == NULL)
2048       return;
2049 
2050    if (tgsi_inst->Instruction.Saturate)
2051       dst = nir_fsat(b, dst);
2052 
2053    if (dst->num_components == 1)
2054       dst = nir_replicate(b, dst, 4);
2055    else if (dst->num_components == 2)
2056       dst = nir_pad_vector_imm_int(b, dst, 0, 4); /* for 64->32 conversions */
2057 
2058    assert(dst->num_components == 4);
2059 
2060    /* Finally, copy the SSA def to the NIR variable/register */
2061    nir_variable *var = ttn_get_var(c, tgsi_dst);
2062    if (var) {
2063       unsigned index = tgsi_dst->Register.Index;
2064       unsigned offset = c->temp_regs[index].offset;
2065       struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
2066                                            &tgsi_dst->Indirect : NULL;
2067       nir_store_deref(b, ttn_array_deref(c, var, offset, indirect), dst,
2068                       tgsi_dst->Register.WriteMask);
2069    } else {
2070       unsigned index = tgsi_dst->Register.Index;
2071       nir_def *reg = NULL;
2072       unsigned base_offset = 0;
2073 
2074       if (tgsi_dst->Register.File == TGSI_FILE_TEMPORARY) {
2075          assert(!c->temp_regs[index].var && "handled above");
2076          assert(!tgsi_dst->Register.Indirect);
2077 
2078          reg = c->temp_regs[index].reg;
2079          base_offset = c->temp_regs[index].offset;
2080       } else if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) {
2081          reg = c->output_regs[index].reg;
2082          base_offset = c->output_regs[index].offset;
2083       } else if (tgsi_dst->Register.File == TGSI_FILE_ADDRESS) {
2084          assert(index == 0);
2085          reg = c->addr_reg;
2086       }
2087 
2088       if (tgsi_dst->Register.Indirect) {
2089          nir_def *indirect = ttn_src_for_indirect(c, &tgsi_dst->Indirect);
2090          nir_store_reg_indirect(b, dst, reg, indirect, .base = base_offset,
2091                                 .write_mask = tgsi_dst->Register.WriteMask);
2092       } else {
2093          nir_build_store_reg(b, dst, reg, .base = base_offset,
2094                              .write_mask = tgsi_dst->Register.WriteMask);
2095       }
2096    }
2097 }
2098 
2099 /**
2100  * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
2101  * variables at the end of the shader.
2102  *
2103  * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
2104  * written, because there's no output load intrinsic, which means we couldn't
2105  * handle writemasks.
2106  */
2107 static void
ttn_add_output_stores(struct ttn_compile * c)2108 ttn_add_output_stores(struct ttn_compile *c)
2109 {
2110    nir_builder *b = &c->build;
2111 
2112    for (int i = 0; i < c->build.shader->num_outputs; i++) {
2113       nir_variable *var = c->outputs[i];
2114       if (!var)
2115          continue;
2116 
2117       nir_def *store_value =
2118          nir_build_load_reg(b, 4, 32, c->output_regs[i].reg,
2119                             .base = c->output_regs[i].offset);
2120 
2121       uint32_t store_mask = BITFIELD_MASK(store_value->num_components);
2122       if (c->build.shader->info.stage == MESA_SHADER_FRAGMENT) {
2123          /* TGSI uses TGSI_SEMANTIC_POSITION.z for the depth output
2124           * and TGSI_SEMANTIC_STENCIL.y for the stencil output,
2125           * while NIR uses a single-component output.
2126           */
2127          if (var->data.location == FRAG_RESULT_DEPTH)
2128             store_value = nir_channel(b, store_value, 2);
2129          else if (var->data.location == FRAG_RESULT_STENCIL)
2130             store_value = nir_channel(b, store_value, 1);
2131          else if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
2132             store_value = nir_channel(b, store_value, 0);
2133       } else {
2134          /* FOGC, LAYER, and PSIZ are scalar values */
2135          if (var->data.location == VARYING_SLOT_FOGC ||
2136              var->data.location == VARYING_SLOT_LAYER ||
2137              var->data.location == VARYING_SLOT_PSIZ) {
2138             store_value = nir_channel(b, store_value, 0);
2139          }
2140          if (var->data.location == VARYING_SLOT_CLIP_DIST0)
2141             store_mask = BITFIELD_MASK(MIN2(c->build.shader->info.clip_distance_array_size, 4));
2142          else if (var->data.location == VARYING_SLOT_CLIP_DIST1) {
2143             if (c->build.shader->info.clip_distance_array_size > 4)
2144                store_mask = BITFIELD_MASK(c->build.shader->info.clip_distance_array_size - 4);
2145             else
2146                store_mask = 0;
2147          }
2148       }
2149 
2150       if (b->shader->options->compact_arrays &&
2151           (var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2152            var->data.location == VARYING_SLOT_CLIP_DIST1)) {
2153          if (!store_mask)
2154             continue;
2155 
2156          nir_deref_instr *deref = nir_build_deref_var(b, c->clipdist);
2157          nir_def *zero = nir_imm_zero(b, 1, 32);
2158          unsigned offset = var->data.location == VARYING_SLOT_CLIP_DIST1 ? 4 : 0;
2159          unsigned size = var->data.location == VARYING_SLOT_CLIP_DIST1 ?
2160                           b->shader->info.clip_distance_array_size :
2161                           MIN2(4, b->shader->info.clip_distance_array_size);
2162          for (unsigned i = offset; i < size; i++) {
2163             /* deref the array member and store each component */
2164             nir_deref_instr *component_deref = nir_build_deref_array_imm(b, deref, i);
2165             nir_def *val = zero;
2166             if (store_mask & BITFIELD_BIT(i - offset))
2167                val = nir_channel(b, store_value, i - offset);
2168             nir_store_deref(b, component_deref, val, 0x1);
2169          }
2170       } else {
2171          nir_store_deref(b, nir_build_deref_var(b, var), store_value, store_mask);
2172       }
2173    }
2174 }
2175 
2176 /**
2177  * Parses the given TGSI tokens.
2178  */
2179 static void
ttn_parse_tgsi(struct ttn_compile * c,const void * tgsi_tokens)2180 ttn_parse_tgsi(struct ttn_compile *c, const void *tgsi_tokens)
2181 {
2182    struct tgsi_parse_context parser;
2183    ASSERTED int ret;
2184 
2185    ret = tgsi_parse_init(&parser, tgsi_tokens);
2186    assert(ret == TGSI_PARSE_OK);
2187 
2188    while (!tgsi_parse_end_of_tokens(&parser)) {
2189       tgsi_parse_token(&parser);
2190       c->token = &parser.FullToken;
2191 
2192       switch (parser.FullToken.Token.Type) {
2193       case TGSI_TOKEN_TYPE_DECLARATION:
2194          ttn_emit_declaration(c);
2195          break;
2196 
2197       case TGSI_TOKEN_TYPE_INSTRUCTION:
2198          ttn_emit_instruction(c);
2199          break;
2200 
2201       case TGSI_TOKEN_TYPE_IMMEDIATE:
2202          ttn_emit_immediate(c);
2203          break;
2204       }
2205    }
2206 
2207    tgsi_parse_free(&parser);
2208 }
2209 
2210 static void
ttn_read_pipe_caps(struct ttn_compile * c,struct pipe_screen * screen)2211 ttn_read_pipe_caps(struct ttn_compile *c,
2212                    struct pipe_screen *screen)
2213 {
2214    c->cap_samplers_as_deref = screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF);
2215    c->cap_face_is_sysval = screen->get_param(screen, PIPE_CAP_FS_FACE_IS_INTEGER_SYSVAL);
2216    c->cap_position_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POSITION_IS_SYSVAL);
2217    c->cap_point_is_sysval = screen->get_param(screen, PIPE_CAP_FS_POINT_IS_SYSVAL);
2218    c->cap_integers = screen->get_shader_param(screen, c->scan->processor, PIPE_SHADER_CAP_INTEGERS);
2219    c->cap_tg4_component_in_swizzle =
2220        screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE);
2221 }
2222 
2223 #define BITSET_SET32(bitset, u32_mask) do { \
2224    STATIC_ASSERT(sizeof((bitset)[0]) >= sizeof(u32_mask)); \
2225    BITSET_ZERO(bitset); \
2226    (bitset)[0] = (u32_mask); \
2227 } while (0)
2228 
2229 /**
2230  * Initializes a TGSI-to-NIR compiler.
2231  */
2232 static struct ttn_compile *
ttn_compile_init(const void * tgsi_tokens,const nir_shader_compiler_options * options,struct pipe_screen * screen)2233 ttn_compile_init(const void *tgsi_tokens,
2234                  const nir_shader_compiler_options *options,
2235                  struct pipe_screen *screen)
2236 {
2237    struct ttn_compile *c;
2238    struct nir_shader *s;
2239    struct tgsi_shader_info scan;
2240 
2241    assert(options || screen);
2242    c = rzalloc(NULL, struct ttn_compile);
2243 
2244    tgsi_scan_shader(tgsi_tokens, &scan);
2245    c->scan = &scan;
2246 
2247    if (!options) {
2248       options =
2249          screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, scan.processor);
2250    }
2251 
2252    c->build = nir_builder_init_simple_shader(tgsi_processor_to_shader_stage(scan.processor),
2253                                              options, "TTN");
2254 
2255    s = c->build.shader;
2256 
2257    if (screen) {
2258       ttn_read_pipe_caps(c, screen);
2259    } else {
2260       /* TTN used to be hard coded to always make FACE a sysval,
2261        * so it makes sense to preserve that behavior so users don't break. */
2262       c->cap_face_is_sysval = true;
2263    }
2264 
2265    s->info.subgroup_size = SUBGROUP_SIZE_UNIFORM;
2266 
2267    if (s->info.stage == MESA_SHADER_FRAGMENT)
2268       s->info.fs.untyped_color_outputs = true;
2269 
2270    s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
2271    s->num_uniforms = scan.const_file_max[0] + 1;
2272    s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
2273    s->info.num_ssbos = util_last_bit(scan.shader_buffers_declared);
2274    s->info.num_ubos = util_last_bit(scan.const_buffers_declared >> 1);
2275    s->info.num_images = util_last_bit(scan.images_declared);
2276    BITSET_SET32(s->info.images_used, scan.images_declared);
2277    BITSET_SET32(s->info.image_buffers, scan.images_buffers);
2278    BITSET_SET32(s->info.msaa_images, scan.msaa_images_declared);
2279    s->info.num_textures = util_last_bit(scan.samplers_declared);
2280    BITSET_SET32(s->info.textures_used, scan.samplers_declared);
2281    BITSET_ZERO(s->info.textures_used_by_txf); /* No scan information yet */
2282    BITSET_SET32(s->info.samplers_used, scan.samplers_declared);
2283    s->info.internal = false;
2284 
2285    /* Default for TGSI is separate, this is assumed throughout the tree */
2286    s->info.separate_shader = true;
2287 
2288    for (unsigned i = 0; i < TGSI_PROPERTY_COUNT; i++) {
2289       unsigned value = scan.properties[i];
2290 
2291       switch (i) {
2292       case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2293          break; /* handled in ttn_emit_declaration */
2294       case TGSI_PROPERTY_FS_COORD_ORIGIN:
2295          if (s->info.stage == MESA_SHADER_FRAGMENT)
2296             s->info.fs.origin_upper_left = value == TGSI_FS_COORD_ORIGIN_UPPER_LEFT;
2297          break;
2298       case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
2299          if (s->info.stage == MESA_SHADER_FRAGMENT)
2300             s->info.fs.pixel_center_integer = value == TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
2301          break;
2302       case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
2303          if (s->info.stage == MESA_SHADER_FRAGMENT)
2304             s->info.fs.depth_layout = ttn_get_depth_layout(value);
2305          break;
2306       case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
2307          if (s->info.stage == MESA_SHADER_VERTEX)
2308             s->info.vs.window_space_position = value;
2309          break;
2310       case TGSI_PROPERTY_NEXT_SHADER:
2311          s->info.next_stage = tgsi_processor_to_shader_stage(value);
2312          break;
2313       case TGSI_PROPERTY_VS_BLIT_SGPRS_AMD:
2314          if (s->info.stage == MESA_SHADER_VERTEX)
2315             s->info.vs.blit_sgprs_amd = value;
2316          break;
2317       case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
2318          if (s->info.stage == MESA_SHADER_COMPUTE)
2319             s->info.workgroup_size[0] = value;
2320          break;
2321       case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
2322          if (s->info.stage == MESA_SHADER_COMPUTE)
2323             s->info.workgroup_size[1] = value;
2324          break;
2325       case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
2326          if (s->info.stage == MESA_SHADER_COMPUTE)
2327             s->info.workgroup_size[2] = value;
2328          break;
2329       case TGSI_PROPERTY_CS_USER_DATA_COMPONENTS_AMD:
2330          if (s->info.stage == MESA_SHADER_COMPUTE)
2331             s->info.cs.user_data_components_amd = value;
2332          break;
2333       case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2334          s->info.clip_distance_array_size = value;
2335          break;
2336       case TGSI_PROPERTY_LEGACY_MATH_RULES:
2337          s->info.use_legacy_math_rules = value;
2338          break;
2339       default:
2340          if (value) {
2341             fprintf(stderr, "tgsi_to_nir: unhandled TGSI property %u = %u\n",
2342                     i, value);
2343             unreachable("unhandled TGSI property");
2344          }
2345       }
2346    }
2347 
2348    if (s->info.stage == MESA_SHADER_COMPUTE &&
2349        (!s->info.workgroup_size[0] ||
2350         !s->info.workgroup_size[1] ||
2351         !s->info.workgroup_size[2]))
2352       s->info.workgroup_size_variable = true;
2353 
2354    c->inputs = rzalloc_array(c, struct nir_variable *, s->num_inputs);
2355    c->outputs = rzalloc_array(c, struct nir_variable *, s->num_outputs);
2356 
2357    c->output_regs = rzalloc_array(c, struct ttn_reg_info,
2358                                   scan.file_max[TGSI_FILE_OUTPUT] + 1);
2359    c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
2360                                 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
2361    c->imm_defs = rzalloc_array(c, nir_def *,
2362                                scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
2363 
2364    c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
2365    c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);
2366 
2367    ttn_parse_tgsi(c, tgsi_tokens);
2368    ttn_add_output_stores(c);
2369 
2370    nir_validate_shader(c->build.shader, "TTN: after parsing TGSI and creating the NIR shader");
2371 
2372    return c;
2373 }
2374 
2375 static void
ttn_optimize_nir(nir_shader * nir)2376 ttn_optimize_nir(nir_shader *nir)
2377 {
2378    bool progress;
2379 
2380    do {
2381       progress = false;
2382 
2383       NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2384 
2385       /* Linking deals with unused inputs/outputs, but here we can remove
2386        * things local to the shader in the hopes that we can cleanup other
2387        * things. This pass will also remove variables with only stores, so we
2388        * might be able to make progress after it.
2389        */
2390       NIR_PASS(progress, nir, nir_remove_dead_variables,
2391                nir_var_function_temp | nir_var_shader_temp |
2392                nir_var_mem_shared,
2393                NULL);
2394 
2395       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
2396       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
2397 
2398       if (nir->options->lower_to_scalar) {
2399          NIR_PASS_V(nir, nir_lower_alu_to_scalar,
2400                     nir->options->lower_to_scalar_filter, NULL);
2401          NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);
2402       }
2403 
2404       NIR_PASS_V(nir, nir_lower_alu);
2405       NIR_PASS_V(nir, nir_lower_pack);
2406       NIR_PASS(progress, nir, nir_copy_prop);
2407       NIR_PASS(progress, nir, nir_opt_remove_phis);
2408       NIR_PASS(progress, nir, nir_opt_dce);
2409       if (nir_opt_loop(nir)) {
2410          progress = true;
2411          NIR_PASS(progress, nir, nir_copy_prop);
2412          NIR_PASS(progress, nir, nir_opt_dce);
2413       }
2414       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
2415       NIR_PASS(progress, nir, nir_opt_dead_cf);
2416       NIR_PASS(progress, nir, nir_opt_cse);
2417       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
2418 
2419       NIR_PASS(progress, nir, nir_opt_phi_precision);
2420       NIR_PASS(progress, nir, nir_opt_algebraic);
2421       NIR_PASS(progress, nir, nir_opt_constant_folding);
2422 
2423       if (!nir->info.flrp_lowered) {
2424          unsigned lower_flrp =
2425             (nir->options->lower_flrp16 ? 16 : 0) |
2426             (nir->options->lower_flrp32 ? 32 : 0) |
2427             (nir->options->lower_flrp64 ? 64 : 0);
2428 
2429          if (lower_flrp) {
2430             bool lower_flrp_progress = false;
2431 
2432             NIR_PASS(lower_flrp_progress, nir, nir_lower_flrp,
2433                      lower_flrp,
2434                      false /* always_precise */);
2435             if (lower_flrp_progress) {
2436                NIR_PASS(progress, nir,
2437                         nir_opt_constant_folding);
2438                progress = true;
2439             }
2440          }
2441 
2442          /* Nothing should rematerialize any flrps, so we only need to do this
2443           * lowering once.
2444           */
2445          nir->info.flrp_lowered = true;
2446       }
2447 
2448       NIR_PASS(progress, nir, nir_opt_undef);
2449       NIR_PASS(progress, nir, nir_opt_conditional_discard);
2450       if (nir->options->max_unroll_iterations) {
2451          NIR_PASS(progress, nir, nir_opt_loop_unroll);
2452       }
2453    } while (progress);
2454 }
2455 
2456 static bool
lower_clipdistance_to_array(nir_shader * nir)2457 lower_clipdistance_to_array(nir_shader *nir)
2458 {
2459    bool progress = false;
2460    nir_variable *dist0 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST0);
2461    nir_variable *dist1 = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_CLIP_DIST1);
2462    /* resize VARYING_SLOT_CLIP_DIST0 to the full array size */
2463    dist0->type = glsl_array_type(glsl_float_type(), nir->info.clip_distance_array_size, sizeof(float));
2464    struct set *deletes = _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
2465    nir_foreach_function_impl(impl, nir) {
2466       bool func_progress = false;
2467       nir_builder b = nir_builder_at(nir_before_impl(impl));
2468       /* create a new deref for the arrayed clipdistance variable at the start of the function */
2469       nir_deref_instr *clipdist_deref = nir_build_deref_var(&b, dist0);
2470       nir_def *zero = nir_imm_zero(&b, 1, 32);
2471       nir_foreach_block(block, impl) {
2472          nir_foreach_instr_safe(instr, block) {
2473             /* filter through until a clipdistance store is reached */
2474             if (instr->type != nir_instr_type_intrinsic)
2475                continue;
2476             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2477             if (intr->intrinsic != nir_intrinsic_store_deref)
2478                continue;
2479             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
2480             nir_variable *var = nir_deref_instr_get_variable(deref);
2481             if (var != dist0 && (!dist1 || var != dist1))
2482                continue;
2483             b.cursor = nir_before_instr(instr);
2484             uint32_t wrmask = nir_intrinsic_write_mask(intr);
2485             unsigned offset = var == dist1 ? 4 : 0;
2486             /* iterate over the store's writemask for components */
2487             for (unsigned i = 0; i < nir->info.clip_distance_array_size; i++) {
2488                /* deref the array member and store each component */
2489                nir_deref_instr *component_deref = nir_build_deref_array_imm(&b, clipdist_deref, i);
2490                nir_def *val = zero;
2491                if (wrmask & BITFIELD_BIT(i - offset))
2492                   val = nir_channel(&b, intr->src[1].ssa, i - offset);
2493                nir_store_deref(&b, component_deref, val, 0x1);
2494             }
2495             func_progress = true;
2496             /* immediately remove the old store, save the original deref */
2497             nir_instr_remove(instr);
2498             _mesa_set_add(deletes, deref);
2499          }
2500       }
2501       if (func_progress)
2502          nir_metadata_preserve(impl, nir_metadata_none);
2503       /* derefs must be queued for deletion to avoid deleting the same deref repeatedly */
2504       set_foreach_remove(deletes, he)
2505          nir_instr_remove((void*)he->key);
2506    }
2507    /* VARYING_SLOT_CLIP_DIST1 is no longer used and can be removed */
2508    if (dist1)
2509       exec_node_remove(&dist1->node);
2510    return progress;
2511 }
2512 
2513 /**
2514  * Finalizes the NIR in a similar way as st_glsl_to_nir does.
2515  *
2516  * Drivers expect that these passes are already performed,
2517  * so we have to do it here too.
2518  */
2519 static void
ttn_finalize_nir(struct ttn_compile * c,struct pipe_screen * screen)2520 ttn_finalize_nir(struct ttn_compile *c, struct pipe_screen *screen)
2521 {
2522    struct nir_shader *nir = c->build.shader;
2523 
2524    MESA_TRACE_FUNC();
2525 
2526    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
2527    NIR_PASS_V(nir, nir_lower_reg_intrinsics_to_ssa);
2528 
2529    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
2530    NIR_PASS_V(nir, nir_split_var_copies);
2531    NIR_PASS_V(nir, nir_lower_var_copies);
2532    NIR_PASS_V(nir, nir_lower_system_values);
2533    NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
2534 
2535    if (!screen->get_param(screen, PIPE_CAP_TEXRECT)) {
2536       const struct nir_lower_tex_options opts = { .lower_rect = true, };
2537       NIR_PASS_V(nir, nir_lower_tex, &opts);
2538    }
2539 
2540    /* driver needs clipdistance as array<float> */
2541    if ((nir->info.outputs_written &
2542         (BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0) | BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1))) &&
2543         nir->options->compact_arrays) {
2544       NIR_PASS_V(nir, lower_clipdistance_to_array);
2545    }
2546 
2547    if (nir->options->lower_uniforms_to_ubo)
2548       NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, !c->cap_integers);
2549 
2550    if (nir->options->lower_int64_options)
2551       NIR_PASS_V(nir, nir_lower_int64);
2552 
2553    if (!c->cap_samplers_as_deref)
2554       NIR_PASS_V(nir, nir_lower_samplers);
2555 
2556    if (screen->finalize_nir) {
2557       char *msg = screen->finalize_nir(screen, nir);
2558       free(msg);
2559    } else {
2560       ttn_optimize_nir(nir);
2561    }
2562    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2563 
2564    nir->info.num_images = c->num_images;
2565    nir->info.num_textures = c->num_samplers;
2566 
2567    nir_validate_shader(nir, "TTN: after all optimizations");
2568 }
2569 
save_nir_to_disk_cache(struct disk_cache * cache,uint8_t key[CACHE_KEY_SIZE],const nir_shader * s)2570 static void save_nir_to_disk_cache(struct disk_cache *cache,
2571                                    uint8_t key[CACHE_KEY_SIZE],
2572                                    const nir_shader *s)
2573 {
2574    struct blob blob = {0};
2575 
2576    blob_init(&blob);
2577    /* Because we cannot fully trust disk_cache_put
2578     * (EGL_ANDROID_blob_cache) we add the shader size,
2579     * which we'll check after disk_cache_get().
2580     */
2581    if (blob_reserve_uint32(&blob) != 0) {
2582       blob_finish(&blob);
2583       return;
2584    }
2585 
2586    nir_serialize(&blob, s, true);
2587    *(uint32_t *)blob.data = blob.size;
2588 
2589    disk_cache_put(cache, key, blob.data, blob.size, NULL);
2590    blob_finish(&blob);
2591 }
2592 
2593 static nir_shader *
load_nir_from_disk_cache(struct disk_cache * cache,struct pipe_screen * screen,uint8_t key[CACHE_KEY_SIZE],unsigned processor)2594 load_nir_from_disk_cache(struct disk_cache *cache,
2595                          struct pipe_screen *screen,
2596                          uint8_t key[CACHE_KEY_SIZE],
2597                          unsigned processor)
2598 {
2599    const nir_shader_compiler_options *options =
2600       screen->get_compiler_options(screen, PIPE_SHADER_IR_NIR, processor);
2601    struct blob_reader blob_reader;
2602    size_t size;
2603    nir_shader *s;
2604 
2605    uint32_t *buffer = (uint32_t *)disk_cache_get(cache, key, &size);
2606    if (!buffer)
2607       return NULL;
2608 
2609    /* Match found. No need to check crc32 or other things.
2610     * disk_cache_get is supposed to do that for us.
2611     * However we do still check if the first element is indeed the size,
2612     * as we cannot fully trust disk_cache_get (EGL_ANDROID_blob_cache) */
2613    if (buffer[0] != size) {
2614       free(buffer);
2615       return NULL;
2616    }
2617 
2618    size -= 4;
2619    blob_reader_init(&blob_reader, buffer + 1, size);
2620    s = nir_deserialize(NULL, options, &blob_reader);
2621    free(buffer); /* buffer was malloc-ed */
2622    return s;
2623 }
2624 
2625 struct nir_shader *
tgsi_to_nir(const void * tgsi_tokens,struct pipe_screen * screen,bool allow_disk_cache)2626 tgsi_to_nir(const void *tgsi_tokens,
2627             struct pipe_screen *screen,
2628             bool allow_disk_cache)
2629 {
2630    struct disk_cache *cache = NULL;
2631    struct ttn_compile *c;
2632    struct nir_shader *s = NULL;
2633    uint8_t key[CACHE_KEY_SIZE];
2634    unsigned processor;
2635 
2636    if (allow_disk_cache)
2637       cache = screen->get_disk_shader_cache(screen);
2638 
2639    /* Look first in the cache */
2640    if (cache) {
2641       disk_cache_compute_key(cache,
2642                              tgsi_tokens,
2643                              tgsi_num_tokens(tgsi_tokens) * sizeof(struct tgsi_token),
2644                              key);
2645       processor = tgsi_get_processor_type(tgsi_tokens);
2646       s = load_nir_from_disk_cache(cache, screen, key, processor);
2647    }
2648 
2649    if (s)
2650       return s;
2651 
2652 #ifndef NDEBUG
2653    nir_process_debug_variable();
2654 #endif
2655 
2656    if (NIR_DEBUG(TGSI)) {
2657       fprintf(stderr, "TGSI before translation to NIR:\n");
2658       tgsi_dump(tgsi_tokens, 0);
2659    }
2660 
2661    /* Not in the cache */
2662 
2663    c = ttn_compile_init(tgsi_tokens, NULL, screen);
2664    s = c->build.shader;
2665    ttn_finalize_nir(c, screen);
2666    ralloc_free(c);
2667 
2668    if (NIR_DEBUG(TGSI)) {
2669       mesa_logi("NIR after translation from TGSI:\n");
2670       nir_log_shaderi(s);
2671    }
2672 
2673    if (cache)
2674       save_nir_to_disk_cache(cache, key, s);
2675 
2676    return s;
2677 }
2678 
2679 struct nir_shader *
tgsi_to_nir_noscreen(const void * tgsi_tokens,const nir_shader_compiler_options * options)2680 tgsi_to_nir_noscreen(const void *tgsi_tokens,
2681                      const nir_shader_compiler_options *options)
2682 {
2683    struct ttn_compile *c;
2684    struct nir_shader *s;
2685 
2686    c = ttn_compile_init(tgsi_tokens, options, NULL);
2687    s = c->build.shader;
2688    ralloc_free(c);
2689 
2690    return s;
2691 }
2692