xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/nir_to_rc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014-2015 Broadcom
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "compiler/nir/nir.h"
7 #include "compiler/nir/nir_deref.h"
8 #include "compiler/nir/nir_legacy.h"
9 #include "compiler/nir/nir_worklist.h"
10 #include "nir_to_rc.h"
11 #include "r300_nir.h"
12 #include "r300_screen.h"
13 #include "pipe/p_screen.h"
14 #include "pipe/p_state.h"
15 #include "tgsi/tgsi_dump.h"
16 #include "tgsi/tgsi_from_mesa.h"
17 #include "tgsi/tgsi_info.h"
18 #include "tgsi/tgsi_parse.h"
19 #include "tgsi/tgsi_ureg.h"
20 #include "tgsi/tgsi_util.h"
21 #include "util/u_debug.h"
22 #include "util/u_math.h"
23 #include "util/u_memory.h"
24 #include "util/u_dynarray.h"
25 
26 struct ntr_insn {
27    enum tgsi_opcode opcode;
28    struct ureg_dst dst[2];
29    struct ureg_src src[4];
30    enum tgsi_texture_type tex_target;
31    enum tgsi_return_type tex_return_type;
32    struct tgsi_texture_offset tex_offset[4];
33 
34    unsigned mem_qualifier;
35    enum pipe_format mem_format;
36 
37    bool is_tex : 1;
38    bool precise : 1;
39 };
40 
41 struct ntr_block {
42    /* Array of struct ntr_insn */
43    struct util_dynarray insns;
44    int start_ip;
45    int end_ip;
46 };
47 
48 struct ntr_reg_interval {
49    uint32_t start, end;
50 };
51 
52 struct ntr_compile {
53    nir_shader *s;
54    nir_function_impl *impl;
55    struct pipe_screen *screen;
56    struct ureg_program *ureg;
57 
58    /* Options */
59    bool lower_fabs;
60 
61    bool addr_declared[3];
62    struct ureg_dst addr_reg[3];
63 
64    /* if condition set up at the end of a block, for ntr_emit_if(). */
65    struct ureg_src if_cond;
66 
67    /* TGSI temps for our NIR SSA and register values. */
68    struct ureg_dst *reg_temp;
69    struct ureg_src *ssa_temp;
70 
71    struct ntr_reg_interval *liveness;
72 
73    /* Map from nir_block to ntr_block */
74    struct hash_table *blocks;
75    struct ntr_block *cur_block;
76    unsigned current_if_else;
77    unsigned cf_label;
78 
79    /* Whether we're currently emitting instructiosn for a precise NIR instruction. */
80    bool precise;
81 
82    unsigned num_temps;
83    unsigned first_non_array_temp;
84 
85    /* Mappings from driver_location to TGSI input/output number.
86     *
87     * We'll be declaring TGSI input/outputs in an arbitrary order, and they get
88     * their numbers assigned incrementally, unlike inputs or constants.
89     */
90    struct ureg_src *input_index_map;
91    uint64_t centroid_inputs;
92 
93    uint32_t first_ubo;
94 };
95 
96 static struct ureg_dst
ntr_temp(struct ntr_compile * c)97 ntr_temp(struct ntr_compile *c)
98 {
99    return ureg_dst_register(TGSI_FILE_TEMPORARY, c->num_temps++);
100 }
101 
102 static struct ntr_block *
ntr_block_from_nir(struct ntr_compile * c,struct nir_block * block)103 ntr_block_from_nir(struct ntr_compile *c, struct nir_block *block)
104 {
105    struct hash_entry *entry = _mesa_hash_table_search(c->blocks, block);
106    return entry->data;
107 }
108 
109 static void ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list);
110 static void ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list);
111 
112 static struct ntr_insn *
ntr_insn(struct ntr_compile * c,enum tgsi_opcode opcode,struct ureg_dst dst,struct ureg_src src0,struct ureg_src src1,struct ureg_src src2,struct ureg_src src3)113 ntr_insn(struct ntr_compile *c, enum tgsi_opcode opcode,
114          struct ureg_dst dst,
115          struct ureg_src src0, struct ureg_src src1,
116          struct ureg_src src2, struct ureg_src src3)
117 {
118    struct ntr_insn insn = {
119       .opcode = opcode,
120       .dst = { dst, ureg_dst_undef() },
121       .src = { src0, src1, src2, src3 },
122       .precise = c->precise,
123    };
124    util_dynarray_append(&c->cur_block->insns, struct ntr_insn, insn);
125    return util_dynarray_top_ptr(&c->cur_block->insns, struct ntr_insn);
126 }
127 
128 #define OP00( op )                                                                     \
129 static inline void ntr_##op(struct ntr_compile *c)                                     \
130 {                                                                                      \
131    ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
132 }
133 
134 #define OP01( op )                                                                     \
135 static inline void ntr_##op(struct ntr_compile *c,                                     \
136                      struct ureg_src src0)                                             \
137 {                                                                                      \
138    ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
139 }
140 
141 
142 #define OP10( op )                                                                     \
143 static inline void ntr_##op(struct ntr_compile *c,                                     \
144                      struct ureg_dst dst)                                              \
145 {                                                                                      \
146    ntr_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
147 }
148 
149 #define OP11( op )                                                                     \
150 static inline void ntr_##op(struct ntr_compile *c,                                     \
151                      struct ureg_dst dst,                                              \
152                      struct ureg_src src0)                                             \
153 {                                                                                      \
154    ntr_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(), ureg_src_undef()); \
155 }
156 
157 #define OP12( op )                                                                     \
158 static inline void ntr_##op(struct ntr_compile *c,                                     \
159                      struct ureg_dst dst,                                              \
160                      struct ureg_src src0,                                             \
161                      struct ureg_src src1)                                             \
162 {                                                                                      \
163    ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef()); \
164 }
165 
166 #define OP13( op )                                                                     \
167 static inline void ntr_##op(struct ntr_compile *c,                                     \
168                      struct ureg_dst dst,                                              \
169                      struct ureg_src src0,                                             \
170                      struct ureg_src src1,                                             \
171                      struct ureg_src src2)                                             \
172 {                                                                                      \
173    ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef());             \
174 }
175 
176 #define OP14( op )                                                                     \
177 static inline void ntr_##op(struct ntr_compile *c,                                     \
178                      struct ureg_dst dst,                                              \
179                      struct ureg_src src0,                                             \
180                      struct ureg_src src1,                                             \
181                      struct ureg_src src2,                                             \
182                      struct ureg_src src3)                                             \
183 {                                                                                      \
184    ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3);                         \
185 }
186 
187 /* We hand-craft our tex instructions */
188 #define OP12_TEX(op)
189 #define OP14_TEX(op)
190 
191 /* Use a template include to generate a correctly-typed ntr_OP()
192  * function for each TGSI opcode:
193  */
194 #include "gallium/auxiliary/tgsi/tgsi_opcode_tmp.h"
195 
196 /**
197  * Interprets a nir_load_const used as a NIR src as a uint.
198  *
199  * For non-native-integers drivers, nir_load_const_instrs used by an integer ALU
200  * instruction (or in a phi-web used by an integer ALU instruction) were
201  * converted to floats and the ALU instruction swapped to the float equivalent.
202  * However, this means that integer load_consts used by intrinsics (which don't
203  * normally get that conversion) may have been reformatted to be floats.  Given
204  * that all of our intrinsic nir_src_as_uint() calls are expected to be small,
205  * we can just look and see if they look like floats and convert them back to
206  * ints.
207  */
208 static uint32_t
ntr_src_as_uint(struct ntr_compile * c,nir_src src)209 ntr_src_as_uint(struct ntr_compile *c, nir_src src)
210 {
211    uint32_t val = nir_src_as_uint(src);
212    if (val >= fui(1.0))
213       val = (uint32_t)uif(val);
214    return val;
215 }
216 
217 /* Per-channel masks of def/use within the block, and the per-channel
218  * livein/liveout for the block as a whole.
219  */
220 struct ntr_live_reg_block_state {
221    uint8_t *def, *use, *livein, *liveout, *defin, *defout;
222 };
223 
224 struct ntr_live_reg_state {
225    unsigned bitset_words;
226 
227    struct ntr_reg_interval *regs;
228 
229    /* Used in propagate_across_edge() */
230    BITSET_WORD *tmp_live;
231 
232    struct ntr_live_reg_block_state *blocks;
233 
234    nir_block_worklist worklist;
235 };
236 
237 static void
ntr_live_reg_mark_use(struct ntr_compile * c,struct ntr_live_reg_block_state * bs,int ip,unsigned index,unsigned used_mask)238 ntr_live_reg_mark_use(struct ntr_compile *c, struct ntr_live_reg_block_state *bs,
239                       int ip, unsigned index, unsigned used_mask)
240 {
241    bs->use[index] |= used_mask & ~bs->def[index];
242 
243    c->liveness[index].start = MIN2(c->liveness[index].start, ip);
244    c->liveness[index].end = MAX2(c->liveness[index].end, ip);
245 
246 }
247 static void
ntr_live_reg_setup_def_use(struct ntr_compile * c,nir_function_impl * impl,struct ntr_live_reg_state * state)248 ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl, struct ntr_live_reg_state *state)
249 {
250    for (int i = 0; i < impl->num_blocks; i++) {
251       state->blocks[i].def = rzalloc_array(state->blocks, uint8_t, c->num_temps);
252       state->blocks[i].defin = rzalloc_array(state->blocks, uint8_t, c->num_temps);
253       state->blocks[i].defout = rzalloc_array(state->blocks, uint8_t, c->num_temps);
254       state->blocks[i].use = rzalloc_array(state->blocks, uint8_t, c->num_temps);
255       state->blocks[i].livein = rzalloc_array(state->blocks, uint8_t, c->num_temps);
256       state->blocks[i].liveout = rzalloc_array(state->blocks, uint8_t, c->num_temps);
257    }
258 
259    int ip = 0;
260    nir_foreach_block(block, impl) {
261       struct ntr_live_reg_block_state *bs = &state->blocks[block->index];
262       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
263 
264       ntr_block->start_ip = ip;
265 
266       util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) {
267          const struct tgsi_opcode_info *opcode_info =
268             tgsi_get_opcode_info(insn->opcode);
269 
270          /* Set up use[] for the srcs.
271           *
272           * Uses are the channels of the reg read in the block that don't have a
273           * preceding def to screen them off.  Note that we don't do per-element
274           * tracking of array regs, so they're never screened off.
275           */
276          for (int i = 0; i < opcode_info->num_src; i++) {
277             if (insn->src[i].File != TGSI_FILE_TEMPORARY)
278                continue;
279             int index = insn->src[i].Index;
280 
281             uint32_t used_mask = tgsi_util_get_src_usage_mask(insn->opcode, i,
282                                                               insn->dst->WriteMask,
283                                                               insn->src[i].SwizzleX,
284                                                               insn->src[i].SwizzleY,
285                                                               insn->src[i].SwizzleZ,
286                                                               insn->src[i].SwizzleW,
287                                                               insn->tex_target,
288                                                               insn->tex_target);
289 
290             assert(!insn->src[i].Indirect || index < c->first_non_array_temp);
291             ntr_live_reg_mark_use(c, bs, ip, index, used_mask);
292          }
293 
294          if (insn->is_tex) {
295             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
296                if (insn->tex_offset[i].File == TGSI_FILE_TEMPORARY)
297                   ntr_live_reg_mark_use(c, bs, ip, insn->tex_offset[i].Index, 0xf);
298             }
299          }
300 
301          /* Set up def[] for the srcs.
302           *
303           * Defs are the unconditionally-written (not R/M/W) channels of the reg in
304           * the block that don't have a preceding use.
305           */
306          for (int i = 0; i < opcode_info->num_dst; i++) {
307             if (insn->dst[i].File != TGSI_FILE_TEMPORARY)
308                continue;
309             int index = insn->dst[i].Index;
310             uint32_t writemask = insn->dst[i].WriteMask;
311 
312             bs->def[index] |= writemask & ~bs->use[index];
313             bs->defout[index] |= writemask;
314 
315             assert(!insn->dst[i].Indirect || index < c->first_non_array_temp);
316             c->liveness[index].start = MIN2(c->liveness[index].start, ip);
317             c->liveness[index].end = MAX2(c->liveness[index].end, ip);
318          }
319          ip++;
320       }
321 
322       ntr_block->end_ip = ip;
323    }
324 }
325 
326 static void
ntr_live_regs(struct ntr_compile * c,nir_function_impl * impl)327 ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl)
328 {
329    nir_metadata_require(impl, nir_metadata_block_index);
330 
331    c->liveness = rzalloc_array(c, struct ntr_reg_interval, c->num_temps);
332 
333    struct ntr_live_reg_state state = {
334        .blocks = rzalloc_array(impl, struct ntr_live_reg_block_state, impl->num_blocks),
335    };
336 
337    /* The intervals start out with start > end (indicating unused) */
338    for (int i = 0; i < c->num_temps; i++)
339       c->liveness[i].start = ~0;
340 
341    ntr_live_reg_setup_def_use(c, impl, &state);
342 
343    /* Make a forward-order worklist of all the blocks. */
344    nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
345    nir_foreach_block(block, impl) {
346       nir_block_worklist_push_tail(&state.worklist, block);
347    }
348 
349    /* Propagate defin/defout down the CFG to calculate the live variables
350     * potentially defined along any possible control flow path.  We'll use this
351     * to keep things like conditional defs of the reg (or array regs where we
352     * don't track defs!) from making the reg's live range extend back to the
353     * start of the program.
354     */
355    while (!nir_block_worklist_is_empty(&state.worklist)) {
356       nir_block *block = nir_block_worklist_pop_head(&state.worklist);
357       for (int j = 0; j < ARRAY_SIZE(block->successors); j++) {
358          nir_block *succ = block->successors[j];
359          if (!succ || succ->index == impl->num_blocks)
360             continue;
361 
362          for (int i = 0; i < c->num_temps; i++) {
363             uint8_t new_def = state.blocks[block->index].defout[i] & ~state.blocks[succ->index].defin[i];
364 
365             if (new_def) {
366                state.blocks[succ->index].defin[i] |= new_def;
367                state.blocks[succ->index].defout[i] |= new_def;
368                nir_block_worklist_push_tail(&state.worklist, succ);
369             }
370          }
371       }
372    }
373 
374    /* Make a reverse-order worklist of all the blocks. */
375    nir_foreach_block(block, impl) {
376       nir_block_worklist_push_head(&state.worklist, block);
377    }
378 
379    /* We're now ready to work through the worklist and update the liveness sets
380     * of each of the blocks.  As long as we keep the worklist up-to-date as we
381     * go, everything will get covered.
382     */
383    while (!nir_block_worklist_is_empty(&state.worklist)) {
384       /* We pop them off in the reverse order we pushed them on.  This way
385        * the first walk of the instructions is backwards so we only walk
386        * once in the case of no control flow.
387        */
388       nir_block *block = nir_block_worklist_pop_head(&state.worklist);
389       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
390       struct ntr_live_reg_block_state *bs = &state.blocks[block->index];
391 
392       for (int i = 0; i < c->num_temps; i++) {
393          /* Collect livein from our successors to include in our liveout. */
394          for (int j = 0; j < ARRAY_SIZE(block->successors); j++) {
395             nir_block *succ = block->successors[j];
396             if (!succ || succ->index == impl->num_blocks)
397                continue;
398             struct ntr_live_reg_block_state *sbs = &state.blocks[succ->index];
399 
400             uint8_t new_liveout = sbs->livein[i] & ~bs->liveout[i];
401             if (new_liveout) {
402                if (state.blocks[block->index].defout[i])
403                   c->liveness[i].end = MAX2(c->liveness[i].end, ntr_block->end_ip);
404                bs->liveout[i] |= sbs->livein[i];
405             }
406          }
407 
408          /* Propagate use requests from either our block's uses or our
409           * non-screened-off liveout up to our predecessors.
410           */
411          uint8_t new_livein = ((bs->use[i] | (bs->liveout[i] & ~bs->def[i])) &
412                                ~bs->livein[i]);
413          if (new_livein) {
414             bs->livein[i] |= new_livein;
415             set_foreach(block->predecessors, entry) {
416                nir_block *pred = (void *)entry->key;
417                nir_block_worklist_push_tail(&state.worklist, pred);
418             }
419 
420             if (new_livein & state.blocks[block->index].defin[i])
421                c->liveness[i].start = MIN2(c->liveness[i].start, ntr_block->start_ip);
422          }
423       }
424    }
425 
426    ralloc_free(state.blocks);
427    nir_block_worklist_fini(&state.worklist);
428 }
429 
430 static void
ntr_ra_check(struct ntr_compile * c,unsigned * ra_map,BITSET_WORD * released,int ip,unsigned index)431 ntr_ra_check(struct ntr_compile *c, unsigned *ra_map, BITSET_WORD *released, int ip, unsigned index)
432 {
433    if (index < c->first_non_array_temp)
434       return;
435 
436    if (c->liveness[index].start == ip && ra_map[index] == ~0)
437       ra_map[index] = ureg_DECL_temporary(c->ureg).Index;
438 
439    if (c->liveness[index].end == ip && !BITSET_TEST(released, index)) {
440       ureg_release_temporary(c->ureg, ureg_dst_register(TGSI_FILE_TEMPORARY, ra_map[index]));
441       BITSET_SET(released, index);
442    }
443 }
444 
445 static void
ntr_allocate_regs(struct ntr_compile * c,nir_function_impl * impl)446 ntr_allocate_regs(struct ntr_compile *c, nir_function_impl *impl)
447 {
448    ntr_live_regs(c, impl);
449 
450    unsigned *ra_map = ralloc_array(c, unsigned, c->num_temps);
451    unsigned *released = rzalloc_array(c, BITSET_WORD, BITSET_WORDS(c->num_temps));
452 
453    /* No RA on NIR array regs */
454    for (int i = 0; i < c->first_non_array_temp; i++)
455       ra_map[i] = i;
456 
457    for (int i = c->first_non_array_temp; i < c->num_temps; i++)
458       ra_map[i] = ~0;
459 
460    int ip = 0;
461    nir_foreach_block(block, impl) {
462       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
463 
464       for (int i = 0; i < c->num_temps; i++)
465          ntr_ra_check(c, ra_map, released, ip, i);
466 
467       util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) {
468          const struct tgsi_opcode_info *opcode_info =
469             tgsi_get_opcode_info(insn->opcode);
470 
471          for (int i = 0; i < opcode_info->num_src; i++) {
472             if (insn->src[i].File == TGSI_FILE_TEMPORARY) {
473                ntr_ra_check(c, ra_map, released, ip, insn->src[i].Index);
474                insn->src[i].Index = ra_map[insn->src[i].Index];
475             }
476          }
477 
478          if (insn->is_tex) {
479             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
480                if (insn->tex_offset[i].File == TGSI_FILE_TEMPORARY) {
481                   ntr_ra_check(c, ra_map, released, ip, insn->tex_offset[i].Index);
482                   insn->tex_offset[i].Index = ra_map[insn->tex_offset[i].Index];
483                }
484             }
485          }
486 
487          for (int i = 0; i < opcode_info->num_dst; i++) {
488             if (insn->dst[i].File == TGSI_FILE_TEMPORARY) {
489                ntr_ra_check(c, ra_map, released, ip, insn->dst[i].Index);
490                insn->dst[i].Index = ra_map[insn->dst[i].Index];
491             }
492          }
493          ip++;
494       }
495 
496       for (int i = 0; i < c->num_temps; i++)
497          ntr_ra_check(c, ra_map, released, ip, i);
498    }
499 }
500 
501 static void
ntr_allocate_regs_unoptimized(struct ntr_compile * c,nir_function_impl * impl)502 ntr_allocate_regs_unoptimized(struct ntr_compile *c, nir_function_impl *impl)
503 {
504    for (int i = c->first_non_array_temp; i < c->num_temps; i++)
505       ureg_DECL_temporary(c->ureg);
506 }
507 
508 /* TGSI varying declarations have a component usage mask associated (used by
509  * r600 and svga).
510  */
511 static uint32_t
ntr_tgsi_var_usage_mask(const struct nir_variable * var)512 ntr_tgsi_var_usage_mask(const struct nir_variable *var)
513 {
514    const struct glsl_type *type_without_array =
515       glsl_without_array(var->type);
516    unsigned num_components = glsl_get_vector_elements(type_without_array);
517    if (num_components == 0) /* structs */
518       num_components = 4;
519 
520    return u_bit_consecutive(var->data.location_frac, num_components);
521 }
522 
523 static struct ureg_dst
ntr_output_decl(struct ntr_compile * c,nir_intrinsic_instr * instr,uint32_t * frac)524 ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *frac)
525 {
526    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
527    int base = nir_intrinsic_base(instr);
528    *frac = nir_intrinsic_component(instr);
529 
530    struct ureg_dst out;
531    if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
532       unsigned semantic_name, semantic_index;
533       tgsi_get_gl_frag_result_semantic(semantics.location,
534                                        &semantic_name, &semantic_index);
535       semantic_index += semantics.dual_source_blend_index;
536 
537       switch (semantics.location) {
538       case FRAG_RESULT_DEPTH:
539          *frac = 2; /* z write is the to the .z channel in TGSI */
540          break;
541       case FRAG_RESULT_STENCIL:
542          *frac = 1;
543          break;
544       default:
545          break;
546       }
547 
548       out = ureg_DECL_output(c->ureg, semantic_name, semantic_index);
549    } else {
550       unsigned semantic_name, semantic_index;
551 
552       tgsi_get_gl_varying_semantic(semantics.location, true,
553                                    &semantic_name, &semantic_index);
554 
555       uint32_t usage_mask = u_bit_consecutive(*frac, instr->num_components);
556       uint32_t gs_streams = semantics.gs_streams;
557       for (int i = 0; i < 4; i++) {
558          if (!(usage_mask & (1 << i)))
559             gs_streams &= ~(0x3 << 2 * i);
560       }
561 
562       /* No driver appears to use array_id of outputs. */
563       unsigned array_id = 0;
564 
565       /* This bit is lost in the i/o semantics, but it's unused in in-tree
566        * drivers.
567        */
568       bool invariant = semantics.invariant;
569 
570       out = ureg_DECL_output_layout(c->ureg,
571                                     semantic_name, semantic_index,
572                                     gs_streams,
573                                     base,
574                                     usage_mask,
575                                     array_id,
576                                     semantics.num_slots,
577                                     invariant);
578    }
579 
580    unsigned write_mask;
581    if (nir_intrinsic_has_write_mask(instr))
582       write_mask = nir_intrinsic_write_mask(instr);
583    else
584       write_mask = ((1 << instr->num_components) - 1) << *frac;
585 
586    write_mask = write_mask << *frac;
587    return ureg_writemask(out, write_mask);
588 }
589 
590 static bool
ntr_try_store_in_tgsi_output_with_use(struct ntr_compile * c,struct ureg_dst * dst,nir_src * src)591 ntr_try_store_in_tgsi_output_with_use(struct ntr_compile *c,
592                                       struct ureg_dst *dst,
593                                       nir_src *src)
594 {
595    *dst = ureg_dst_undef();
596 
597    if (nir_src_is_if(src))
598       return false;
599 
600    if (nir_src_parent_instr(src)->type != nir_instr_type_intrinsic)
601       return false;
602 
603    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(nir_src_parent_instr(src));
604    if (intr->intrinsic != nir_intrinsic_store_output ||
605        !nir_src_is_const(intr->src[1])) {
606       return false;
607    }
608 
609    uint32_t frac;
610    *dst = ntr_output_decl(c, intr, &frac);
611    dst->Index += ntr_src_as_uint(c, intr->src[1]);
612 
613    return frac == 0;
614 }
615 
616 /* If this reg is used only for storing an output, then in the simple
617  * cases we can write directly to the TGSI output instead of having
618  * store_output emit its own MOV.
619  */
620 static bool
ntr_try_store_reg_in_tgsi_output(struct ntr_compile * c,struct ureg_dst * dst,nir_intrinsic_instr * reg_decl)621 ntr_try_store_reg_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst,
622                                  nir_intrinsic_instr *reg_decl)
623 {
624    assert(reg_decl->intrinsic == nir_intrinsic_decl_reg);
625 
626    *dst = ureg_dst_undef();
627 
628    /* Look for a single use for try_store_in_tgsi_output */
629    nir_src *use = NULL;
630    nir_foreach_reg_load(src, reg_decl) {
631       nir_intrinsic_instr *load = nir_instr_as_intrinsic(nir_src_parent_instr(src));
632       nir_foreach_use_including_if(load_use, &load->def) {
633          /* We can only have one use */
634          if (use != NULL)
635             return false;
636 
637          use = load_use;
638       }
639    }
640 
641    if (use == NULL)
642       return false;
643 
644    return ntr_try_store_in_tgsi_output_with_use(c, dst, use);
645 }
646 
647 /* If this SSA def is used only for storing an output, then in the simple
648  * cases we can write directly to the TGSI output instead of having
649  * store_output emit its own MOV.
650  */
651 static bool
ntr_try_store_ssa_in_tgsi_output(struct ntr_compile * c,struct ureg_dst * dst,nir_def * def)652 ntr_try_store_ssa_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst,
653                                  nir_def *def)
654 {
655    *dst = ureg_dst_undef();
656 
657    if (!list_is_singular(&def->uses))
658       return false;
659 
660    nir_foreach_use_including_if(use, def) {
661       return ntr_try_store_in_tgsi_output_with_use(c, dst, use);
662    }
663    unreachable("We have one use");
664 }
665 
666 static void
ntr_setup_inputs(struct ntr_compile * c)667 ntr_setup_inputs(struct ntr_compile *c)
668 {
669    if (c->s->info.stage != MESA_SHADER_FRAGMENT)
670       return;
671 
672    unsigned num_inputs = 0;
673    int num_input_arrays = 0;
674 
675    nir_foreach_shader_in_variable(var, c->s) {
676       const struct glsl_type *type = var->type;
677       unsigned array_len =
678          glsl_count_attribute_slots(type, false);
679 
680       num_inputs = MAX2(num_inputs, var->data.driver_location + array_len);
681    }
682 
683    c->input_index_map = ralloc_array(c, struct ureg_src, num_inputs);
684 
685    nir_foreach_shader_in_variable(var, c->s) {
686       const struct glsl_type *type = var->type;
687       unsigned array_len =
688          glsl_count_attribute_slots(type, false);
689 
690       unsigned interpolation = TGSI_INTERPOLATE_CONSTANT;
691       unsigned sample_loc;
692       struct ureg_src decl;
693 
694       if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
695          interpolation =
696             tgsi_get_interp_mode(var->data.interpolation,
697                                  var->data.location == VARYING_SLOT_COL0 ||
698                                  var->data.location == VARYING_SLOT_COL1);
699 
700          if (var->data.location == VARYING_SLOT_POS)
701             interpolation = TGSI_INTERPOLATE_LINEAR;
702       }
703 
704       unsigned semantic_name, semantic_index;
705       tgsi_get_gl_varying_semantic(var->data.location, true,
706                                    &semantic_name, &semantic_index);
707 
708       if (var->data.sample) {
709          sample_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
710       } else if (var->data.centroid) {
711          sample_loc = TGSI_INTERPOLATE_LOC_CENTROID;
712          c->centroid_inputs |= (BITSET_MASK(array_len) <<
713                                 var->data.driver_location);
714       } else {
715          sample_loc = TGSI_INTERPOLATE_LOC_CENTER;
716       }
717 
718       unsigned array_id = 0;
719       if (glsl_type_is_array(type))
720          array_id = ++num_input_arrays;
721 
722       uint32_t usage_mask = ntr_tgsi_var_usage_mask(var);
723 
724       decl = ureg_DECL_fs_input_centroid_layout(c->ureg,
725                                                 semantic_name,
726                                                 semantic_index,
727                                                 interpolation,
728                                                 sample_loc,
729                                                 var->data.driver_location,
730                                                 usage_mask,
731                                                 array_id, array_len);
732 
733       if (semantic_name == TGSI_SEMANTIC_FACE) {
734          struct ureg_dst temp = ntr_temp(c);
735          /* tgsi docs say that floating point FACE will be positive for
736           * frontface and negative for backface, but realistically
737           * GLSL-to-TGSI had been doing MOV_SAT to turn it into 0.0 vs 1.0.
738           * Copy that behavior, since some drivers (r300) have been doing a
739           * 0.0 vs 1.0 backface (and I don't think anybody has a non-1.0
740           * front face).
741           */
742          temp.Saturate = true;
743          ntr_MOV(c, temp, decl);
744          decl = ureg_src(temp);
745       }
746 
747       for (unsigned i = 0; i < array_len; i++) {
748          c->input_index_map[var->data.driver_location + i] = decl;
749          c->input_index_map[var->data.driver_location + i].Index += i;
750       }
751    }
752 }
753 
754 static int
ntr_sort_by_location(const nir_variable * a,const nir_variable * b)755 ntr_sort_by_location(const nir_variable *a, const nir_variable *b)
756 {
757    return a->data.location - b->data.location;
758 }
759 
760 /**
761  * Workaround for virglrenderer requiring that TGSI FS output color variables
762  * are declared in order.  Besides, it's a lot nicer to read the TGSI this way.
763  */
764 static void
ntr_setup_outputs(struct ntr_compile * c)765 ntr_setup_outputs(struct ntr_compile *c)
766 {
767    if (c->s->info.stage != MESA_SHADER_FRAGMENT)
768       return;
769 
770    nir_sort_variables_with_modes(c->s, ntr_sort_by_location, nir_var_shader_out);
771 
772    nir_foreach_shader_out_variable(var, c->s) {
773       if (var->data.location == FRAG_RESULT_COLOR)
774          ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
775 
776       unsigned semantic_name, semantic_index;
777       tgsi_get_gl_frag_result_semantic(var->data.location,
778                                        &semantic_name, &semantic_index);
779 
780       (void)ureg_DECL_output(c->ureg, semantic_name, semantic_index);
781    }
782 }
783 
784 static enum tgsi_texture_type
tgsi_texture_type_from_sampler_dim(enum glsl_sampler_dim dim,bool is_array,bool is_shadow)785 tgsi_texture_type_from_sampler_dim(enum glsl_sampler_dim dim, bool is_array, bool is_shadow)
786 {
787    switch (dim) {
788    case GLSL_SAMPLER_DIM_1D:
789       if (is_shadow)
790          return is_array ? TGSI_TEXTURE_SHADOW1D_ARRAY : TGSI_TEXTURE_SHADOW1D;
791       else
792          return is_array ? TGSI_TEXTURE_1D_ARRAY : TGSI_TEXTURE_1D;
793    case GLSL_SAMPLER_DIM_2D:
794    case GLSL_SAMPLER_DIM_EXTERNAL:
795       if (is_shadow)
796          return is_array ? TGSI_TEXTURE_SHADOW2D_ARRAY : TGSI_TEXTURE_SHADOW2D;
797       else
798          return is_array ? TGSI_TEXTURE_2D_ARRAY : TGSI_TEXTURE_2D;
799    case GLSL_SAMPLER_DIM_3D:
800       return TGSI_TEXTURE_3D;
801    case GLSL_SAMPLER_DIM_CUBE:
802       if (is_shadow)
803          return is_array ? TGSI_TEXTURE_SHADOWCUBE_ARRAY : TGSI_TEXTURE_SHADOWCUBE;
804       else
805          return is_array ? TGSI_TEXTURE_CUBE_ARRAY : TGSI_TEXTURE_CUBE;
806    case GLSL_SAMPLER_DIM_RECT:
807       if (is_shadow)
808          return TGSI_TEXTURE_SHADOWRECT;
809       else
810          return TGSI_TEXTURE_RECT;
811    case GLSL_SAMPLER_DIM_MS:
812       return is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : TGSI_TEXTURE_2D_MSAA;
813    case GLSL_SAMPLER_DIM_BUF:
814       return TGSI_TEXTURE_BUFFER;
815    default:
816       unreachable("unknown sampler dim");
817    }
818 }
819 
820 static enum tgsi_return_type
tgsi_return_type_from_base_type(enum glsl_base_type type)821 tgsi_return_type_from_base_type(enum glsl_base_type type)
822 {
823    switch (type) {
824    case GLSL_TYPE_INT:
825       return TGSI_RETURN_TYPE_SINT;
826    case GLSL_TYPE_UINT:
827       return TGSI_RETURN_TYPE_UINT;
828    case GLSL_TYPE_FLOAT:
829      return TGSI_RETURN_TYPE_FLOAT;
830    default:
831       unreachable("unexpected texture type");
832    }
833 }
834 
835 static void
ntr_setup_uniforms(struct ntr_compile * c)836 ntr_setup_uniforms(struct ntr_compile *c)
837 {
838    nir_foreach_uniform_variable(var, c->s) {
839       if (glsl_type_is_sampler(glsl_without_array(var->type)) ||
840           glsl_type_is_texture(glsl_without_array(var->type))) {
841          /* Don't use this size for the check for samplers -- arrays of structs
842           * containing samplers should be ignored, and just the separate lowered
843           * sampler uniform decl used.
844           */
845          int size = glsl_type_get_sampler_count(var->type) +
846                     glsl_type_get_texture_count(var->type);
847 
848          const struct glsl_type *stype = glsl_without_array(var->type);
849          enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(glsl_get_sampler_dim(stype),
850                                                                             glsl_sampler_type_is_array(stype),
851                                                                             glsl_sampler_type_is_shadow(stype));
852          enum tgsi_return_type ret_type = tgsi_return_type_from_base_type(glsl_get_sampler_result_type(stype));
853          for (int i = 0; i < size; i++) {
854             ureg_DECL_sampler_view(c->ureg, var->data.binding + i,
855                target, ret_type, ret_type, ret_type, ret_type);
856             ureg_DECL_sampler(c->ureg, var->data.binding + i);
857          }
858 
859       /* lower_uniforms_to_ubo lowered non-sampler uniforms to UBOs, so CB0
860        * size declaration happens with other UBOs below.
861        */
862       }
863    }
864 
865    c->first_ubo = ~0;
866 
867    unsigned ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS] = {0};
868    nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ubo) {
869       int ubo = var->data.driver_location;
870       if (ubo == -1)
871          continue;
872 
873       if (!(ubo == 0 && c->s->info.first_ubo_is_default_ubo))
874          c->first_ubo = MIN2(c->first_ubo, ubo);
875 
876       unsigned size = glsl_get_explicit_size(var->interface_type, false);
877       ubo_sizes[ubo] = size;
878    }
879 
880    for (int i = 0; i < ARRAY_SIZE(ubo_sizes); i++) {
881       if (ubo_sizes[i])
882          ureg_DECL_constant2D(c->ureg, 0, DIV_ROUND_UP(ubo_sizes[i], 16) - 1, i);
883    }
884 }
885 
886 static void
ntr_setup_registers(struct ntr_compile * c)887 ntr_setup_registers(struct ntr_compile *c)
888 {
889    assert(c->num_temps == 0);
890 
891    nir_foreach_reg_decl_safe(nir_reg, nir_shader_get_entrypoint(c->s)) {
892       /* Permanently allocate all the array regs at the start. */
893       unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg);
894       unsigned index = nir_reg->def.index;
895 
896       if (num_array_elems != 0) {
897          struct ureg_dst decl = ureg_DECL_array_temporary(c->ureg, num_array_elems, true);
898          c->reg_temp[index] = decl;
899          assert(c->num_temps == decl.Index);
900          c->num_temps += num_array_elems;
901       }
902    }
903    c->first_non_array_temp = c->num_temps;
904 
905    /* After that, allocate non-array regs in our virtual space that we'll
906     * register-allocate before ureg emit.
907     */
908    nir_foreach_reg_decl_safe(nir_reg, nir_shader_get_entrypoint(c->s)) {
909       unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg);
910       unsigned num_components = nir_intrinsic_num_components(nir_reg);
911       unsigned index = nir_reg->def.index;
912 
913       /* We already handled arrays */
914       if (num_array_elems == 0) {
915          struct ureg_dst decl;
916          uint32_t write_mask = BITFIELD_MASK(num_components);
917 
918          if (!ntr_try_store_reg_in_tgsi_output(c, &decl, nir_reg)) {
919             decl = ureg_writemask(ntr_temp(c), write_mask);
920          }
921          c->reg_temp[index] = decl;
922       }
923    }
924 }
925 
926 static struct ureg_src
ntr_get_load_const_src(struct ntr_compile * c,nir_load_const_instr * instr)927 ntr_get_load_const_src(struct ntr_compile *c, nir_load_const_instr *instr)
928 {
929    int num_components = instr->def.num_components;
930 
931    float values[4];
932    assert(instr->def.bit_size == 32);
933    for (int i = 0; i < num_components; i++)
934       values[i] = uif(instr->value[i].u32);
935 
936    return ureg_DECL_immediate(c->ureg, values, num_components);
937 }
938 
939 static struct ureg_src
ntr_reladdr(struct ntr_compile * c,struct ureg_src addr,int addr_index)940 ntr_reladdr(struct ntr_compile *c, struct ureg_src addr, int addr_index)
941 {
942    assert(addr_index < ARRAY_SIZE(c->addr_reg));
943 
944    for (int i = 0; i <= addr_index; i++) {
945       if (!c->addr_declared[i]) {
946          c->addr_reg[i] = ureg_writemask(ureg_DECL_address(c->ureg),
947                                              TGSI_WRITEMASK_X);
948          c->addr_declared[i] = true;
949       }
950    }
951 
952    ntr_ARL(c, c->addr_reg[addr_index], addr);
953    return ureg_scalar(ureg_src(c->addr_reg[addr_index]), 0);
954 }
955 
956 /* Forward declare for recursion with indirects */
957 static struct ureg_src
958 ntr_get_src(struct ntr_compile *c, nir_src src);
959 
960 static struct ureg_src
ntr_get_chased_src(struct ntr_compile * c,nir_legacy_src * src)961 ntr_get_chased_src(struct ntr_compile *c, nir_legacy_src *src)
962 {
963    if (src->is_ssa) {
964       if (src->ssa->parent_instr->type == nir_instr_type_load_const)
965          return ntr_get_load_const_src(c, nir_instr_as_load_const(src->ssa->parent_instr));
966 
967       return c->ssa_temp[src->ssa->index];
968    } else {
969       struct ureg_dst reg_temp = c->reg_temp[src->reg.handle->index];
970       reg_temp.Index += src->reg.base_offset;
971 
972       if (src->reg.indirect) {
973          struct ureg_src offset = ntr_get_src(c, nir_src_for_ssa(src->reg.indirect));
974          return ureg_src_indirect(ureg_src(reg_temp),
975                                   ntr_reladdr(c, offset, 0));
976       } else {
977          return ureg_src(reg_temp);
978       }
979    }
980 }
981 
982 static struct ureg_src
ntr_get_src(struct ntr_compile * c,nir_src src)983 ntr_get_src(struct ntr_compile *c, nir_src src)
984 {
985    nir_legacy_src chased = nir_legacy_chase_src(&src);
986    return ntr_get_chased_src(c, &chased);
987 }
988 
989 static struct ureg_src
ntr_get_alu_src(struct ntr_compile * c,nir_alu_instr * instr,int i)990 ntr_get_alu_src(struct ntr_compile *c, nir_alu_instr *instr, int i)
991 {
992    /* We only support 32-bit float modifiers.  The only other modifier type
993     * officially supported by TGSI is 32-bit integer negates, but even those are
994     * broken on virglrenderer, so skip lowering all integer and f64 float mods.
995     *
996     * The lower_fabs requests that we not have native source modifiers
997     * for fabs, and instead emit MAX(a,-a) for nir_op_fabs.
998     */
999    nir_legacy_alu_src src =
1000       nir_legacy_chase_alu_src(&instr->src[i], !c->lower_fabs);
1001    struct ureg_src usrc = ntr_get_chased_src(c, &src.src);
1002 
1003    usrc = ureg_swizzle(usrc,
1004                        src.swizzle[0],
1005                        src.swizzle[1],
1006                        src.swizzle[2],
1007                        src.swizzle[3]);
1008 
1009    if (src.fabs)
1010       usrc = ureg_abs(usrc);
1011    if (src.fneg)
1012       usrc = ureg_negate(usrc);
1013 
1014    return usrc;
1015 }
1016 
1017 /* Reswizzles a source so that the unset channels in the write mask still refer
1018  * to one of the channels present in the write mask.
1019  */
1020 static struct ureg_src
ntr_swizzle_for_write_mask(struct ureg_src src,uint32_t write_mask)1021 ntr_swizzle_for_write_mask(struct ureg_src src, uint32_t write_mask)
1022 {
1023    assert(write_mask);
1024    int first_chan = ffs(write_mask) - 1;
1025    return ureg_swizzle(src,
1026                        (write_mask & TGSI_WRITEMASK_X) ? TGSI_SWIZZLE_X : first_chan,
1027                        (write_mask & TGSI_WRITEMASK_Y) ? TGSI_SWIZZLE_Y : first_chan,
1028                        (write_mask & TGSI_WRITEMASK_Z) ? TGSI_SWIZZLE_Z : first_chan,
1029                        (write_mask & TGSI_WRITEMASK_W) ? TGSI_SWIZZLE_W : first_chan);
1030 }
1031 
1032 static struct ureg_dst
ntr_get_ssa_def_decl(struct ntr_compile * c,nir_def * ssa)1033 ntr_get_ssa_def_decl(struct ntr_compile *c, nir_def *ssa)
1034 {
1035    uint32_t writemask;
1036    /* Fix writemask for nir_intrinsic_load_ubo_vec4 according to uses. */
1037    if (ssa->parent_instr->type == nir_instr_type_intrinsic &&
1038        nir_instr_as_intrinsic(ssa->parent_instr)->intrinsic == nir_intrinsic_load_ubo_vec4)
1039       writemask = nir_def_components_read(ssa);
1040    else
1041       writemask = BITSET_MASK(ssa->num_components);
1042 
1043    struct ureg_dst dst;
1044    if (!ntr_try_store_ssa_in_tgsi_output(c, &dst, ssa))
1045       dst = ntr_temp(c);
1046 
1047    c->ssa_temp[ssa->index] = ntr_swizzle_for_write_mask(ureg_src(dst), writemask);
1048 
1049    return ureg_writemask(dst, writemask);
1050 }
1051 
1052 static struct ureg_dst
ntr_get_chased_dest_decl(struct ntr_compile * c,nir_legacy_dest * dest)1053 ntr_get_chased_dest_decl(struct ntr_compile *c, nir_legacy_dest *dest)
1054 {
1055    if (dest->is_ssa)
1056       return ntr_get_ssa_def_decl(c, dest->ssa);
1057    else
1058       return c->reg_temp[dest->reg.handle->index];
1059 }
1060 
1061 static struct ureg_dst
ntr_get_chased_dest(struct ntr_compile * c,nir_legacy_dest * dest)1062 ntr_get_chased_dest(struct ntr_compile *c, nir_legacy_dest *dest)
1063 {
1064    struct ureg_dst dst = ntr_get_chased_dest_decl(c, dest);
1065 
1066    if (!dest->is_ssa) {
1067       dst.Index += dest->reg.base_offset;
1068 
1069       if (dest->reg.indirect) {
1070          struct ureg_src offset = ntr_get_src(c, nir_src_for_ssa(dest->reg.indirect));
1071          dst = ureg_dst_indirect(dst, ntr_reladdr(c, offset, 0));
1072       }
1073    }
1074 
1075    return dst;
1076 }
1077 
1078 static struct ureg_dst
ntr_get_dest(struct ntr_compile * c,nir_def * def)1079 ntr_get_dest(struct ntr_compile *c, nir_def *def)
1080 {
1081    nir_legacy_dest chased = nir_legacy_chase_dest(def);
1082    return ntr_get_chased_dest(c, &chased);
1083 }
1084 
1085 static struct ureg_dst
ntr_get_alu_dest(struct ntr_compile * c,nir_def * def)1086 ntr_get_alu_dest(struct ntr_compile *c, nir_def *def)
1087 {
1088    nir_legacy_alu_dest chased = nir_legacy_chase_alu_dest(def);
1089    struct ureg_dst dst = ntr_get_chased_dest(c, &chased.dest);
1090 
1091    if (chased.fsat)
1092       dst.Saturate = true;
1093 
1094    /* Only registers get write masks */
1095    if (chased.dest.is_ssa)
1096       return dst;
1097 
1098    return ureg_writemask(dst, chased.write_mask);
1099 }
1100 
1101 /* For an SSA dest being populated by a constant src, replace the storage with
1102  * a copy of the ureg_src.
1103  */
1104 static void
ntr_store_def(struct ntr_compile * c,nir_def * def,struct ureg_src src)1105 ntr_store_def(struct ntr_compile *c, nir_def *def, struct ureg_src src)
1106 {
1107    if (!src.Indirect && !src.DimIndirect) {
1108       switch (src.File) {
1109       case TGSI_FILE_IMMEDIATE:
1110       case TGSI_FILE_INPUT:
1111       case TGSI_FILE_CONSTANT:
1112       case TGSI_FILE_SYSTEM_VALUE:
1113          c->ssa_temp[def->index] = src;
1114          return;
1115       }
1116    }
1117 
1118    ntr_MOV(c, ntr_get_ssa_def_decl(c, def), src);
1119 }
1120 
1121 static void
ntr_store(struct ntr_compile * c,nir_def * def,struct ureg_src src)1122 ntr_store(struct ntr_compile *c, nir_def *def, struct ureg_src src)
1123 {
1124    nir_legacy_dest chased = nir_legacy_chase_dest(def);
1125 
1126    if (chased.is_ssa)
1127       ntr_store_def(c, chased.ssa, src);
1128    else {
1129       struct ureg_dst dst = ntr_get_chased_dest(c, &chased);
1130       ntr_MOV(c, dst, src);
1131    }
1132 }
1133 
1134 static void
ntr_emit_scalar(struct ntr_compile * c,unsigned tgsi_op,struct ureg_dst dst,struct ureg_src src0,struct ureg_src src1)1135 ntr_emit_scalar(struct ntr_compile *c, unsigned tgsi_op,
1136                 struct ureg_dst dst,
1137                 struct ureg_src src0,
1138                 struct ureg_src src1)
1139 {
1140    unsigned i;
1141 
1142    /* POW is the only 2-operand scalar op. */
1143    if (tgsi_op != TGSI_OPCODE_POW)
1144       src1 = src0;
1145 
1146    for (i = 0; i < 4; i++) {
1147       if (dst.WriteMask & (1 << i)) {
1148          ntr_insn(c, tgsi_op,
1149                   ureg_writemask(dst, 1 << i),
1150                   ureg_scalar(src0, i),
1151                   ureg_scalar(src1, i),
1152                   ureg_src_undef(), ureg_src_undef());
1153       }
1154    }
1155 }
1156 
1157 static void
ntr_emit_alu(struct ntr_compile * c,nir_alu_instr * instr)1158 ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr)
1159 {
1160    struct ureg_src src[4];
1161    struct ureg_dst dst;
1162    unsigned i;
1163    int num_srcs = nir_op_infos[instr->op].num_inputs;
1164 
1165    /* Don't try to translate folded fsat since their source won't be valid */
1166    if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr))
1167       return;
1168 
1169    c->precise = instr->exact;
1170 
1171    assert(num_srcs <= ARRAY_SIZE(src));
1172    for (i = 0; i < num_srcs; i++)
1173       src[i] = ntr_get_alu_src(c, instr, i);
1174    for (; i < ARRAY_SIZE(src); i++)
1175       src[i] = ureg_src_undef();
1176 
1177    dst = ntr_get_alu_dest(c, &instr->def);
1178 
1179    static enum tgsi_opcode op_map[] = {
1180       [nir_op_mov] = TGSI_OPCODE_MOV,
1181 
1182       [nir_op_fdot2_replicated] = TGSI_OPCODE_DP2,
1183       [nir_op_fdot3_replicated] = TGSI_OPCODE_DP3,
1184       [nir_op_fdot4_replicated] = TGSI_OPCODE_DP4,
1185       [nir_op_ffloor] = TGSI_OPCODE_FLR,
1186       [nir_op_ffract] = TGSI_OPCODE_FRC,
1187       [nir_op_fceil] = TGSI_OPCODE_CEIL,
1188       [nir_op_fround_even] = TGSI_OPCODE_ROUND,
1189 
1190       [nir_op_slt] = TGSI_OPCODE_SLT,
1191       [nir_op_sge] = TGSI_OPCODE_SGE,
1192       [nir_op_seq] = TGSI_OPCODE_SEQ,
1193       [nir_op_sne] = TGSI_OPCODE_SNE,
1194 
1195       [nir_op_ftrunc] = TGSI_OPCODE_TRUNC,
1196       [nir_op_fadd] = TGSI_OPCODE_ADD,
1197       [nir_op_fmul] = TGSI_OPCODE_MUL,
1198 
1199       [nir_op_fmin] = TGSI_OPCODE_MIN,
1200       [nir_op_fmax] = TGSI_OPCODE_MAX,
1201       [nir_op_ffma] = TGSI_OPCODE_MAD,
1202    };
1203 
1204    if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) {
1205       /* The normal path for NIR to TGSI ALU op translation */
1206       ntr_insn(c, op_map[instr->op],
1207                 dst, src[0], src[1], src[2], src[3]);
1208    } else {
1209       /* Special cases for NIR to TGSI ALU op translation. */
1210 
1211       /* TODO: Use something like the ntr_store() path for the MOV calls so we
1212        * don't emit extra MOVs for swizzles/srcmods of inputs/const/imm.
1213        */
1214 
1215       switch (instr->op) {
1216       case nir_op_fabs:
1217          /* Try to eliminate */
1218          if (!c->lower_fabs && nir_legacy_float_mod_folds(instr))
1219             break;
1220 
1221          if (c->lower_fabs)
1222             ntr_MAX(c, dst, src[0], ureg_negate(src[0]));
1223          else
1224             ntr_MOV(c, dst, ureg_abs(src[0]));
1225          break;
1226 
1227       case nir_op_fsat:
1228          ntr_MOV(c, ureg_saturate(dst), src[0]);
1229          break;
1230 
1231       case nir_op_fneg:
1232          /* Try to eliminate */
1233          if (nir_legacy_float_mod_folds(instr))
1234             break;
1235 
1236          ntr_MOV(c, dst, ureg_negate(src[0]));
1237          break;
1238 
1239          /* NOTE: TGSI 32-bit math ops have the old "one source channel
1240           * replicated to all dst channels" behavior, while 64 is normal mapping
1241           * of src channels to dst.
1242           */
1243       case nir_op_frcp:
1244          ntr_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], ureg_src_undef());
1245          break;
1246 
1247       case nir_op_frsq:
1248          ntr_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], ureg_src_undef());
1249          break;
1250 
1251       case nir_op_fexp2:
1252          ntr_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], ureg_src_undef());
1253          break;
1254 
1255       case nir_op_flog2:
1256          ntr_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], ureg_src_undef());
1257          break;
1258 
1259       case nir_op_fsin:
1260          ntr_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], ureg_src_undef());
1261          break;
1262 
1263       case nir_op_fcos:
1264          ntr_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], ureg_src_undef());
1265          break;
1266 
1267       case nir_op_fsub:
1268          ntr_ADD(c, dst, src[0], ureg_negate(src[1]));
1269          break;
1270 
1271       case nir_op_fmod:
1272          unreachable("should be handled by .lower_fmod = true");
1273          break;
1274 
1275       case nir_op_fpow:
1276          ntr_emit_scalar(c, TGSI_OPCODE_POW, dst, src[0], src[1]);
1277          break;
1278 
1279       case nir_op_flrp:
1280          ntr_LRP(c, dst, src[2], src[1], src[0]);
1281          break;
1282 
1283       case nir_op_fcsel:
1284          /* Implement this as CMP(-abs(src0), src1, src2). */
1285          ntr_CMP(c, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]);
1286          break;
1287 
1288       case nir_op_fcsel_gt:
1289          ntr_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
1290          break;
1291 
1292       case nir_op_fcsel_ge:
1293          /* Implement this as if !(src0 < 0.0) was identical to src0 >= 0.0. */
1294          ntr_CMP(c, dst, src[0], src[2], src[1]);
1295          break;
1296 
1297       case nir_op_vec4:
1298       case nir_op_vec3:
1299       case nir_op_vec2:
1300          unreachable("covered by nir_lower_vec_to_movs()");
1301 
1302       default:
1303          fprintf(stderr, "Unknown NIR opcode: %s\n", nir_op_infos[instr->op].name);
1304          unreachable("Unknown NIR opcode");
1305       }
1306    }
1307 
1308    c->precise = false;
1309 }
1310 
1311 static struct ureg_src
ntr_ureg_src_indirect(struct ntr_compile * c,struct ureg_src usrc,nir_src src,int addr_reg)1312 ntr_ureg_src_indirect(struct ntr_compile *c, struct ureg_src usrc,
1313                       nir_src src, int addr_reg)
1314 {
1315    if (nir_src_is_const(src)) {
1316       usrc.Index += ntr_src_as_uint(c, src);
1317       return usrc;
1318    } else {
1319       return ureg_src_indirect(usrc, ntr_reladdr(c, ntr_get_src(c, src), addr_reg));
1320    }
1321 }
1322 
1323 static struct ureg_dst
ntr_ureg_dst_indirect(struct ntr_compile * c,struct ureg_dst dst,nir_src src)1324 ntr_ureg_dst_indirect(struct ntr_compile *c, struct ureg_dst dst,
1325                       nir_src src)
1326 {
1327    if (nir_src_is_const(src)) {
1328       dst.Index += ntr_src_as_uint(c, src);
1329       return dst;
1330    } else {
1331       return ureg_dst_indirect(dst, ntr_reladdr(c, ntr_get_src(c, src), 0));
1332    }
1333 }
1334 
1335 static struct ureg_dst
ntr_ureg_dst_dimension_indirect(struct ntr_compile * c,struct ureg_dst udst,nir_src src)1336 ntr_ureg_dst_dimension_indirect(struct ntr_compile *c, struct ureg_dst udst,
1337                                 nir_src src)
1338 {
1339    if (nir_src_is_const(src)) {
1340       return ureg_dst_dimension(udst, ntr_src_as_uint(c, src));
1341    } else {
1342       return ureg_dst_dimension_indirect(udst,
1343                                          ntr_reladdr(c, ntr_get_src(c, src), 1),
1344                                          0);
1345    }
1346 }
1347 /* Some load operations in NIR will have a fractional offset that we need to
1348  * swizzle down before storing to the result register.
1349  */
1350 static struct ureg_src
ntr_shift_by_frac(struct ureg_src src,unsigned frac,unsigned num_components)1351 ntr_shift_by_frac(struct ureg_src src, unsigned frac, unsigned num_components)
1352 {
1353    return ureg_swizzle(src,
1354                        frac,
1355                        frac + MIN2(num_components - 1, 1),
1356                        frac + MIN2(num_components - 1, 2),
1357                        frac + MIN2(num_components - 1, 3));
1358 }
1359 
1360 
1361 static void
ntr_emit_load_ubo(struct ntr_compile * c,nir_intrinsic_instr * instr)1362 ntr_emit_load_ubo(struct ntr_compile *c, nir_intrinsic_instr *instr)
1363 {
1364    struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
1365 
1366    struct ureg_dst addr_temp = ureg_dst_undef();
1367 
1368    if (nir_src_is_const(instr->src[0])) {
1369       src = ureg_src_dimension(src, ntr_src_as_uint(c, instr->src[0]));
1370    } else {
1371       /* virglrenderer requires that indirect UBO references have the UBO
1372        * array's base index in the Index field, not added to the indirect
1373        * address.
1374        *
1375        * Many nir intrinsics have a base address const value for the start of
1376        * their array indirection, but load_ubo doesn't.  We fake it by
1377        * subtracting it off here.
1378        */
1379       addr_temp = ntr_temp(c);
1380       ntr_UADD(c, addr_temp, ntr_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
1381       src = ureg_src_dimension_indirect(src,
1382                                          ntr_reladdr(c, ureg_src(addr_temp), 1),
1383                                          c->first_ubo);
1384    }
1385 
1386    /* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const
1387     * file.
1388     */
1389    src.Index = nir_intrinsic_base(instr);
1390 
1391    if (nir_src_is_const(instr->src[1])) {
1392       src.Index += ntr_src_as_uint(c, instr->src[1]);
1393    } else {
1394       src = ureg_src_indirect(src, ntr_reladdr(c, ntr_get_src(c, instr->src[1]), 0));
1395    }
1396 
1397    int start_component = nir_intrinsic_component(instr);
1398 
1399    src = ntr_shift_by_frac(src, start_component, instr->num_components);
1400 
1401    ntr_store(c, &instr->def, src);
1402 }
1403 
1404 static void
ntr_emit_load_input(struct ntr_compile * c,nir_intrinsic_instr * instr)1405 ntr_emit_load_input(struct ntr_compile *c, nir_intrinsic_instr *instr)
1406 {
1407    uint32_t frac = nir_intrinsic_component(instr);
1408    uint32_t num_components = instr->num_components;
1409    unsigned base = nir_intrinsic_base(instr);
1410    struct ureg_src input;
1411    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
1412 
1413    if (c->s->info.stage == MESA_SHADER_VERTEX) {
1414       input = ureg_DECL_vs_input(c->ureg, base);
1415       for (int i = 1; i < semantics.num_slots; i++)
1416          ureg_DECL_vs_input(c->ureg, base + i);
1417    } else {
1418       input = c->input_index_map[base];
1419    }
1420 
1421    input = ntr_shift_by_frac(input, frac, num_components);
1422 
1423    switch (instr->intrinsic) {
1424    case nir_intrinsic_load_input:
1425       input = ntr_ureg_src_indirect(c, input, instr->src[0], 0);
1426       ntr_store(c, &instr->def, input);
1427       break;
1428 
1429    case nir_intrinsic_load_interpolated_input: {
1430       input = ntr_ureg_src_indirect(c, input, instr->src[1], 0);
1431 
1432       nir_intrinsic_instr *bary_instr =
1433          nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
1434 
1435       switch (bary_instr->intrinsic) {
1436       case nir_intrinsic_load_barycentric_pixel:
1437       case nir_intrinsic_load_barycentric_sample:
1438          /* For these, we know that the barycentric load matches the
1439           * interpolation on the input declaration, so we can use it directly.
1440           */
1441          ntr_store(c, &instr->def, input);
1442          break;
1443 
1444       case nir_intrinsic_load_barycentric_centroid:
1445          /* If the input was declared centroid, then there's no need to
1446           * emit the extra TGSI interp instruction, we can just read the
1447           * input.
1448           */
1449          if (c->centroid_inputs & (1ull << nir_intrinsic_base(instr))) {
1450             ntr_store(c, &instr->def, input);
1451          } else {
1452             ntr_INTERP_CENTROID(c, ntr_get_dest(c, &instr->def), input);
1453          }
1454          break;
1455 
1456       case nir_intrinsic_load_barycentric_at_sample:
1457          /* We stored the sample in the fake "bary" dest. */
1458          ntr_INTERP_SAMPLE(c, ntr_get_dest(c, &instr->def), input,
1459                             ntr_get_src(c, instr->src[0]));
1460          break;
1461 
1462       case nir_intrinsic_load_barycentric_at_offset:
1463          /* We stored the offset in the fake "bary" dest. */
1464          ntr_INTERP_OFFSET(c, ntr_get_dest(c, &instr->def), input,
1465                             ntr_get_src(c, instr->src[0]));
1466          break;
1467 
1468       default:
1469          unreachable("bad barycentric interp intrinsic\n");
1470       }
1471       break;
1472    }
1473 
1474    default:
1475       unreachable("bad load input intrinsic\n");
1476    }
1477 }
1478 
1479 static void
ntr_emit_store_output(struct ntr_compile * c,nir_intrinsic_instr * instr)1480 ntr_emit_store_output(struct ntr_compile *c, nir_intrinsic_instr *instr)
1481 {
1482    struct ureg_src src = ntr_get_src(c, instr->src[0]);
1483 
1484    if (src.File == TGSI_FILE_OUTPUT) {
1485       /* If our src is the output file, that's an indication that we were able
1486        * to emit the output stores in the generating instructions and we have
1487        * nothing to do here.
1488        */
1489       return;
1490    }
1491 
1492    uint32_t frac;
1493    struct ureg_dst out = ntr_output_decl(c, instr, &frac);
1494 
1495    if (instr->intrinsic == nir_intrinsic_store_per_vertex_output) {
1496       out = ntr_ureg_dst_indirect(c, out, instr->src[2]);
1497       out = ntr_ureg_dst_dimension_indirect(c, out, instr->src[1]);
1498    } else {
1499       out = ntr_ureg_dst_indirect(c, out, instr->src[1]);
1500    }
1501 
1502    uint8_t swizzle[4] = { 0, 0, 0, 0 };
1503    for (int i = frac; i < 4; i++) {
1504       if (out.WriteMask & (1 << i))
1505          swizzle[i] = i - frac;
1506    }
1507 
1508    src = ureg_swizzle(src, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1509 
1510    ntr_MOV(c, out, src);
1511 }
1512 
1513 static void
ntr_emit_load_output(struct ntr_compile * c,nir_intrinsic_instr * instr)1514 ntr_emit_load_output(struct ntr_compile *c, nir_intrinsic_instr *instr)
1515 {
1516    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
1517 
1518    /* ntr_try_store_in_tgsi_output() optimization is not valid if normal
1519     * load_output is present.
1520     */
1521    assert(c->s->info.stage != MESA_SHADER_VERTEX &&
1522           (c->s->info.stage != MESA_SHADER_FRAGMENT || semantics.fb_fetch_output));
1523 
1524    uint32_t frac;
1525    struct ureg_dst out = ntr_output_decl(c, instr, &frac);
1526 
1527    if (instr->intrinsic == nir_intrinsic_load_per_vertex_output) {
1528       out = ntr_ureg_dst_indirect(c, out, instr->src[1]);
1529       out = ntr_ureg_dst_dimension_indirect(c, out, instr->src[0]);
1530    } else {
1531       out = ntr_ureg_dst_indirect(c, out, instr->src[0]);
1532    }
1533 
1534    struct ureg_dst dst = ntr_get_dest(c, &instr->def);
1535    struct ureg_src out_src = ureg_src(out);
1536 
1537    /* Don't swizzling unavailable channels of the output in the writemasked-out
1538     * components. Avoids compile failures in virglrenderer with
1539     * TESS_LEVEL_INNER.
1540     */
1541    int fill_channel = ffs(dst.WriteMask) - 1;
1542    uint8_t swizzles[4] = { 0, 1, 2, 3 };
1543    for (int i = 0; i < 4; i++)
1544       if (!(dst.WriteMask & (1 << i)))
1545          swizzles[i] = fill_channel;
1546    out_src = ureg_swizzle(out_src, swizzles[0], swizzles[1], swizzles[2], swizzles[3]);
1547 
1548    if (semantics.fb_fetch_output)
1549       ntr_FBFETCH(c, dst, out_src);
1550    else
1551       ntr_MOV(c, dst, out_src);
1552 }
1553 
1554 static void
ntr_emit_load_sysval(struct ntr_compile * c,nir_intrinsic_instr * instr)1555 ntr_emit_load_sysval(struct ntr_compile *c, nir_intrinsic_instr *instr)
1556 {
1557    gl_system_value sysval = nir_system_value_from_intrinsic(instr->intrinsic);
1558    enum tgsi_semantic semantic = tgsi_get_sysval_semantic(sysval);
1559    struct ureg_src sv = ureg_DECL_system_value(c->ureg, semantic, 0);
1560 
1561    /* virglrenderer doesn't like references to channels of the sysval that
1562     * aren't defined, even if they aren't really read.  (GLSL compile fails on
1563     * gl_NumWorkGroups.w, for example).
1564     */
1565    uint32_t write_mask = BITSET_MASK(instr->def.num_components);
1566    sv = ntr_swizzle_for_write_mask(sv, write_mask);
1567 
1568    /* TGSI and NIR define these intrinsics as always loading ints, but they can
1569     * still appear on hardware with non-native-integers fragment shaders using
1570     * the draw path (i915g).  In that case, having called nir_lower_int_to_float
1571     * means that we actually want floats instead.
1572     */
1573    switch (instr->intrinsic) {
1574    case nir_intrinsic_load_vertex_id:
1575    case nir_intrinsic_load_instance_id:
1576       ntr_U2F(c, ntr_get_dest(c, &instr->def), sv);
1577       return;
1578 
1579    default:
1580       break;
1581    }
1582 
1583    ntr_store(c, &instr->def, sv);
1584 }
1585 
1586 static void
ntr_emit_intrinsic(struct ntr_compile * c,nir_intrinsic_instr * instr)1587 ntr_emit_intrinsic(struct ntr_compile *c, nir_intrinsic_instr *instr)
1588 {
1589    switch (instr->intrinsic) {
1590    case nir_intrinsic_load_ubo:
1591    case nir_intrinsic_load_ubo_vec4:
1592       ntr_emit_load_ubo(c, instr);
1593       break;
1594 
1595       /* Vertex */
1596    case nir_intrinsic_load_draw_id:
1597    case nir_intrinsic_load_invocation_id:
1598    case nir_intrinsic_load_frag_coord:
1599    case nir_intrinsic_load_point_coord:
1600    case nir_intrinsic_load_front_face:
1601       ntr_emit_load_sysval(c, instr);
1602       break;
1603 
1604    case nir_intrinsic_load_input:
1605    case nir_intrinsic_load_per_vertex_input:
1606    case nir_intrinsic_load_interpolated_input:
1607       ntr_emit_load_input(c, instr);
1608       break;
1609 
1610    case nir_intrinsic_store_output:
1611    case nir_intrinsic_store_per_vertex_output:
1612       ntr_emit_store_output(c, instr);
1613       break;
1614 
1615    case nir_intrinsic_load_output:
1616    case nir_intrinsic_load_per_vertex_output:
1617       ntr_emit_load_output(c, instr);
1618       break;
1619 
1620    case nir_intrinsic_terminate:
1621       ntr_KILL(c);
1622       break;
1623 
1624    case nir_intrinsic_terminate_if: {
1625       struct ureg_src cond = ureg_scalar(ntr_get_src(c, instr->src[0]), 0);
1626       /* For !native_integers, the bool got lowered to 1.0 or 0.0. */
1627       ntr_KILL_IF(c, ureg_negate(cond));
1628       break;
1629    }
1630       /* In TGSI we don't actually generate the barycentric coords, and emit
1631        * interp intrinsics later.  However, we do need to store the
1632        * load_barycentric_at_* argument so that we can use it at that point.
1633        */
1634    case nir_intrinsic_load_barycentric_pixel:
1635    case nir_intrinsic_load_barycentric_centroid:
1636    case nir_intrinsic_load_barycentric_sample:
1637       break;
1638    case nir_intrinsic_load_barycentric_at_sample:
1639    case nir_intrinsic_load_barycentric_at_offset:
1640       ntr_store(c, &instr->def, ntr_get_src(c, instr->src[0]));
1641       break;
1642 
1643    case nir_intrinsic_ddx:
1644    case nir_intrinsic_ddx_coarse:
1645       ntr_DDX(c, ntr_get_dest(c, &instr->def), ntr_get_src(c, instr->src[0]));
1646       return;
1647    case nir_intrinsic_ddy:
1648    case nir_intrinsic_ddy_coarse:
1649       ntr_DDY(c, ntr_get_dest(c, &instr->def), ntr_get_src(c, instr->src[0]));
1650       return;
1651 
1652    case nir_intrinsic_decl_reg:
1653    case nir_intrinsic_load_reg:
1654    case nir_intrinsic_load_reg_indirect:
1655    case nir_intrinsic_store_reg:
1656    case nir_intrinsic_store_reg_indirect:
1657       /* fully consumed */
1658       break;
1659 
1660    default:
1661       fprintf(stderr, "Unknown intrinsic: ");
1662       nir_print_instr(&instr->instr, stderr);
1663       fprintf(stderr, "\n");
1664       break;
1665    }
1666 }
1667 
1668 struct ntr_tex_operand_state {
1669    struct ureg_src srcs[4];
1670    unsigned i;
1671 };
1672 
1673 static void
ntr_push_tex_arg(struct ntr_compile * c,nir_tex_instr * instr,nir_tex_src_type tex_src_type,struct ntr_tex_operand_state * s)1674 ntr_push_tex_arg(struct ntr_compile *c,
1675                  nir_tex_instr *instr,
1676                  nir_tex_src_type tex_src_type,
1677                  struct ntr_tex_operand_state *s)
1678 {
1679    int tex_src = nir_tex_instr_src_index(instr, tex_src_type);
1680    if (tex_src < 0)
1681       return;
1682 
1683    nir_src *src = &instr->src[tex_src].src;
1684    s->srcs[s->i++] = ntr_get_src(c, *src);
1685 }
1686 
1687 static void
ntr_emit_texture(struct ntr_compile * c,nir_tex_instr * instr)1688 ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr)
1689 {
1690    struct ureg_dst dst = ntr_get_dest(c, &instr->def);
1691    enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(instr->sampler_dim, instr->is_array, instr->is_shadow);
1692    unsigned tex_opcode;
1693 
1694    int tex_handle_src = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
1695    int sampler_handle_src = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
1696 
1697    struct ureg_src sampler;
1698    if (tex_handle_src >= 0 && sampler_handle_src >= 0) {
1699       /* It seems we can't get separate tex/sampler on GL, just use one of the handles */
1700       sampler = ntr_get_src(c, instr->src[tex_handle_src].src);
1701       assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1);
1702    } else {
1703       assert(tex_handle_src == -1 && sampler_handle_src == -1);
1704       sampler = ureg_DECL_sampler(c->ureg, instr->sampler_index);
1705       int sampler_src = nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset);
1706       if (sampler_src >= 0) {
1707          struct ureg_src reladdr = ntr_get_src(c, instr->src[sampler_src].src);
1708          sampler = ureg_src_indirect(sampler, ntr_reladdr(c, reladdr, 2));
1709       }
1710    }
1711 
1712    switch (instr->op) {
1713    case nir_texop_tex:
1714       if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) >
1715          MAX2(instr->coord_components, 2) + instr->is_shadow)
1716          tex_opcode = TGSI_OPCODE_TXP;
1717       else
1718          tex_opcode = TGSI_OPCODE_TEX;
1719       break;
1720    case nir_texop_txl:
1721       tex_opcode = TGSI_OPCODE_TXL;
1722       break;
1723    case nir_texop_txb:
1724       tex_opcode = TGSI_OPCODE_TXB;
1725       break;
1726    case nir_texop_txd:
1727       tex_opcode = TGSI_OPCODE_TXD;
1728       break;
1729    case nir_texop_txs:
1730       tex_opcode = TGSI_OPCODE_TXQ;
1731       break;
1732    case nir_texop_tg4:
1733       tex_opcode = TGSI_OPCODE_TG4;
1734       break;
1735    case nir_texop_query_levels:
1736       tex_opcode = TGSI_OPCODE_TXQ;
1737       break;
1738    case nir_texop_lod:
1739       tex_opcode = TGSI_OPCODE_LODQ;
1740       break;
1741    case nir_texop_texture_samples:
1742       tex_opcode = TGSI_OPCODE_TXQS;
1743       break;
1744    default:
1745       unreachable("unsupported tex op");
1746    }
1747 
1748    struct ntr_tex_operand_state s = { .i = 0 };
1749    ntr_push_tex_arg(c, instr, nir_tex_src_backend1, &s);
1750    ntr_push_tex_arg(c, instr, nir_tex_src_backend2, &s);
1751 
1752    /* non-coord arg for TXQ */
1753    if (tex_opcode == TGSI_OPCODE_TXQ) {
1754       ntr_push_tex_arg(c, instr, nir_tex_src_lod, &s);
1755       /* virglrenderer mistakenly looks at .w instead of .x, so make sure it's
1756        * scalar
1757        */
1758       s.srcs[s.i - 1] = ureg_scalar(s.srcs[s.i - 1], 0);
1759    }
1760 
1761    if (s.i > 1) {
1762       if (tex_opcode == TGSI_OPCODE_TEX)
1763          tex_opcode = TGSI_OPCODE_TEX2;
1764       if (tex_opcode == TGSI_OPCODE_TXB)
1765          tex_opcode = TGSI_OPCODE_TXB2;
1766       if (tex_opcode == TGSI_OPCODE_TXL)
1767          tex_opcode = TGSI_OPCODE_TXL2;
1768    }
1769 
1770    if (instr->op == nir_texop_txd) {
1771       /* Derivs appear in their own src args */
1772       int ddx = nir_tex_instr_src_index(instr, nir_tex_src_ddx);
1773       int ddy = nir_tex_instr_src_index(instr, nir_tex_src_ddy);
1774       s.srcs[s.i++] = ntr_get_src(c, instr->src[ddx].src);
1775       s.srcs[s.i++] = ntr_get_src(c, instr->src[ddy].src);
1776    }
1777 
1778    if (instr->op == nir_texop_tg4 && target != TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1779       if (c->screen->get_param(c->screen,
1780                                PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE)) {
1781          sampler = ureg_scalar(sampler, instr->component);
1782          s.srcs[s.i++] = ureg_src_undef();
1783       } else {
1784          s.srcs[s.i++] = ureg_imm1u(c->ureg, instr->component);
1785       }
1786    }
1787 
1788    s.srcs[s.i++] = sampler;
1789 
1790    enum tgsi_return_type tex_type;
1791    switch (instr->dest_type) {
1792    case nir_type_float32:
1793       tex_type = TGSI_RETURN_TYPE_FLOAT;
1794       break;
1795    case nir_type_int32:
1796       tex_type = TGSI_RETURN_TYPE_SINT;
1797       break;
1798    case nir_type_uint32:
1799       tex_type = TGSI_RETURN_TYPE_UINT;
1800       break;
1801    default:
1802       unreachable("unknown texture type");
1803    }
1804 
1805    struct ureg_dst tex_dst;
1806    if (instr->op == nir_texop_query_levels)
1807       tex_dst = ureg_writemask(ntr_temp(c), TGSI_WRITEMASK_W);
1808    else
1809       tex_dst = dst;
1810 
1811    while (s.i < 4)
1812       s.srcs[s.i++] = ureg_src_undef();
1813 
1814    struct ntr_insn *insn = ntr_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]);
1815    insn->tex_target = target;
1816    insn->tex_return_type = tex_type;
1817    insn->is_tex = true;
1818 
1819    int tex_offset_src = nir_tex_instr_src_index(instr, nir_tex_src_offset);
1820    if (tex_offset_src >= 0) {
1821       struct ureg_src offset = ntr_get_src(c, instr->src[tex_offset_src].src);
1822 
1823       insn->tex_offset[0].File = offset.File;
1824       insn->tex_offset[0].Index = offset.Index;
1825       insn->tex_offset[0].SwizzleX = offset.SwizzleX;
1826       insn->tex_offset[0].SwizzleY = offset.SwizzleY;
1827       insn->tex_offset[0].SwizzleZ = offset.SwizzleZ;
1828       insn->tex_offset[0].Padding = 0;
1829    }
1830 
1831    if (nir_tex_instr_has_explicit_tg4_offsets(instr)) {
1832       for (uint8_t i = 0; i < 4; ++i) {
1833          struct ureg_src imm = ureg_imm2i(c->ureg, instr->tg4_offsets[i][0], instr->tg4_offsets[i][1]);
1834          insn->tex_offset[i].File = imm.File;
1835          insn->tex_offset[i].Index = imm.Index;
1836          insn->tex_offset[i].SwizzleX = imm.SwizzleX;
1837          insn->tex_offset[i].SwizzleY = imm.SwizzleY;
1838          insn->tex_offset[i].SwizzleZ = imm.SwizzleZ;
1839       }
1840    }
1841 
1842    if (instr->op == nir_texop_query_levels)
1843       ntr_MOV(c, dst, ureg_scalar(ureg_src(tex_dst), 3));
1844 }
1845 
1846 static void
ntr_emit_jump(struct ntr_compile * c,nir_jump_instr * jump)1847 ntr_emit_jump(struct ntr_compile *c, nir_jump_instr *jump)
1848 {
1849    switch (jump->type) {
1850    case nir_jump_break:
1851       ntr_BRK(c);
1852       break;
1853 
1854    case nir_jump_continue:
1855       ntr_CONT(c);
1856       break;
1857 
1858    default:
1859       fprintf(stderr, "Unknown jump instruction: ");
1860       nir_print_instr(&jump->instr, stderr);
1861       fprintf(stderr, "\n");
1862       abort();
1863    }
1864 }
1865 
1866 static void
ntr_emit_ssa_undef(struct ntr_compile * c,nir_undef_instr * instr)1867 ntr_emit_ssa_undef(struct ntr_compile *c, nir_undef_instr *instr)
1868 {
1869    /* Nothing to do but make sure that we have some storage to deref. */
1870    (void)ntr_get_ssa_def_decl(c, &instr->def);
1871 }
1872 
1873 static void
ntr_emit_instr(struct ntr_compile * c,nir_instr * instr)1874 ntr_emit_instr(struct ntr_compile *c, nir_instr *instr)
1875 {
1876    switch (instr->type) {
1877    case nir_instr_type_deref:
1878       /* ignored, will be walked by nir_intrinsic_image_*_deref. */
1879       break;
1880 
1881    case nir_instr_type_alu:
1882       ntr_emit_alu(c, nir_instr_as_alu(instr));
1883       break;
1884 
1885    case nir_instr_type_intrinsic:
1886       ntr_emit_intrinsic(c, nir_instr_as_intrinsic(instr));
1887       break;
1888 
1889    case nir_instr_type_load_const:
1890       /* Nothing to do here, as load consts are done directly from
1891        * ntr_get_src() (since many constant NIR srcs will often get folded
1892        * directly into a register file index instead of as a TGSI src).
1893        */
1894       break;
1895 
1896    case nir_instr_type_tex:
1897       ntr_emit_texture(c, nir_instr_as_tex(instr));
1898       break;
1899 
1900    case nir_instr_type_jump:
1901       ntr_emit_jump(c, nir_instr_as_jump(instr));
1902       break;
1903 
1904    case nir_instr_type_undef:
1905       ntr_emit_ssa_undef(c, nir_instr_as_undef(instr));
1906       break;
1907 
1908    default:
1909       fprintf(stderr, "Unknown NIR instr type: ");
1910       nir_print_instr(instr, stderr);
1911       fprintf(stderr, "\n");
1912       abort();
1913    }
1914 }
1915 
1916 static void
ntr_emit_if(struct ntr_compile * c,nir_if * if_stmt)1917 ntr_emit_if(struct ntr_compile *c, nir_if *if_stmt)
1918 {
1919    ntr_IF(c, c->if_cond);
1920 
1921    ntr_emit_cf_list(c, &if_stmt->then_list);
1922 
1923    if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
1924       ntr_ELSE(c);
1925       ntr_emit_cf_list(c, &if_stmt->else_list);
1926    }
1927 
1928    ntr_ENDIF(c);
1929 }
1930 
1931 static void
ntr_emit_loop(struct ntr_compile * c,nir_loop * loop)1932 ntr_emit_loop(struct ntr_compile *c, nir_loop *loop)
1933 {
1934    assert(!nir_loop_has_continue_construct(loop));
1935    ntr_BGNLOOP(c);
1936    ntr_emit_cf_list(c, &loop->body);
1937    ntr_ENDLOOP(c);
1938 }
1939 
1940 static void
ntr_emit_block(struct ntr_compile * c,nir_block * block)1941 ntr_emit_block(struct ntr_compile *c, nir_block *block)
1942 {
1943    struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
1944    c->cur_block = ntr_block;
1945 
1946    nir_foreach_instr(instr, block) {
1947       ntr_emit_instr(c, instr);
1948 
1949       /* Sanity check that we didn't accidentally ureg_OPCODE() instead of ntr_OPCODE(). */
1950       if (ureg_get_instruction_number(c->ureg) != 0) {
1951          fprintf(stderr, "Emitted ureg insn during: ");
1952          nir_print_instr(instr, stderr);
1953          fprintf(stderr, "\n");
1954          unreachable("emitted ureg insn");
1955       }
1956    }
1957 
1958    /* Set up the if condition for ntr_emit_if(), which we have to do before
1959     * freeing up the temps (the "if" is treated as inside the block for liveness
1960     * purposes, despite not being an instruction)
1961     *
1962     * Note that, while IF and UIF are supposed to look at only .x, virglrenderer
1963     * looks at all of .xyzw.  No harm in working around the bug.
1964     */
1965    nir_if *nif = nir_block_get_following_if(block);
1966    if (nif)
1967       c->if_cond = ureg_scalar(ntr_get_src(c, nif->condition), TGSI_SWIZZLE_X);
1968 }
1969 
1970 static void
ntr_emit_cf_list(struct ntr_compile * c,struct exec_list * list)1971 ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list)
1972 {
1973    foreach_list_typed(nir_cf_node, node, node, list) {
1974       switch (node->type) {
1975       case nir_cf_node_block:
1976          ntr_emit_block(c, nir_cf_node_as_block(node));
1977          break;
1978 
1979       case nir_cf_node_if:
1980          ntr_emit_if(c, nir_cf_node_as_if(node));
1981          break;
1982 
1983       case nir_cf_node_loop:
1984          ntr_emit_loop(c, nir_cf_node_as_loop(node));
1985          break;
1986 
1987       default:
1988          unreachable("unknown CF type");
1989       }
1990    }
1991 }
1992 
1993 static void
ntr_emit_block_ureg(struct ntr_compile * c,struct nir_block * block)1994 ntr_emit_block_ureg(struct ntr_compile *c, struct nir_block *block)
1995 {
1996    struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
1997 
1998    /* Emit the ntr insns to tgsi_ureg. */
1999    util_dynarray_foreach(&ntr_block->insns, struct ntr_insn, insn) {
2000       const struct tgsi_opcode_info *opcode_info =
2001          tgsi_get_opcode_info(insn->opcode);
2002 
2003       switch (insn->opcode) {
2004       case TGSI_OPCODE_IF:
2005          ureg_IF(c->ureg, insn->src[0], &c->cf_label);
2006          break;
2007 
2008       case TGSI_OPCODE_ELSE:
2009          ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
2010          ureg_ELSE(c->ureg, &c->cf_label);
2011          c->current_if_else = c->cf_label;
2012          break;
2013 
2014       case TGSI_OPCODE_ENDIF:
2015          ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
2016          ureg_ENDIF(c->ureg);
2017          break;
2018 
2019       case TGSI_OPCODE_BGNLOOP:
2020          /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
2021           * does reference BGNLOOP's.  Follow the former behavior unless something comes up
2022           * with a need.
2023           */
2024          ureg_BGNLOOP(c->ureg, &c->cf_label);
2025          break;
2026 
2027       case TGSI_OPCODE_ENDLOOP:
2028          ureg_ENDLOOP(c->ureg, &c->cf_label);
2029          break;
2030 
2031       default:
2032          if (insn->is_tex) {
2033             int num_offsets = 0;
2034             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
2035                if (insn->tex_offset[i].File != TGSI_FILE_NULL)
2036                   num_offsets = i + 1;
2037             }
2038             ureg_tex_insn(c->ureg, insn->opcode,
2039                           insn->dst, opcode_info->num_dst,
2040                           insn->tex_target, insn->tex_return_type,
2041                           insn->tex_offset,
2042                           num_offsets,
2043                           insn->src, opcode_info->num_src);
2044          } else {
2045             ureg_insn(c->ureg, insn->opcode,
2046                      insn->dst, opcode_info->num_dst,
2047                      insn->src, opcode_info->num_src,
2048                      insn->precise);
2049          }
2050       }
2051    }
2052 }
2053 
2054 static void
ntr_emit_if_ureg(struct ntr_compile * c,nir_if * if_stmt)2055 ntr_emit_if_ureg(struct ntr_compile *c, nir_if *if_stmt)
2056 {
2057    /* Note: the last block emitted our IF opcode. */
2058 
2059    int if_stack = c->current_if_else;
2060    c->current_if_else = c->cf_label;
2061 
2062    /* Either the then or else block includes the ENDIF, which will fix up the
2063     * IF(/ELSE)'s label for jumping
2064     */
2065    ntr_emit_cf_list_ureg(c, &if_stmt->then_list);
2066    ntr_emit_cf_list_ureg(c, &if_stmt->else_list);
2067 
2068    c->current_if_else = if_stack;
2069 }
2070 
2071 static void
ntr_emit_cf_list_ureg(struct ntr_compile * c,struct exec_list * list)2072 ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list)
2073 {
2074    foreach_list_typed(nir_cf_node, node, node, list) {
2075       switch (node->type) {
2076       case nir_cf_node_block:
2077          ntr_emit_block_ureg(c, nir_cf_node_as_block(node));
2078          break;
2079 
2080       case nir_cf_node_if:
2081          ntr_emit_if_ureg(c, nir_cf_node_as_if(node));
2082          break;
2083 
2084       case nir_cf_node_loop:
2085          /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
2086           * does reference BGNLOOP's.  Follow the former behavior unless something comes up
2087           * with a need.
2088           */
2089          ntr_emit_cf_list_ureg(c, &nir_cf_node_as_loop(node)->body);
2090          break;
2091 
2092       default:
2093          unreachable("unknown CF type");
2094       }
2095    }
2096 }
2097 
2098 static void
ntr_emit_impl(struct ntr_compile * c,nir_function_impl * impl)2099 ntr_emit_impl(struct ntr_compile *c, nir_function_impl *impl)
2100 {
2101    c->impl = impl;
2102 
2103    c->ssa_temp = rzalloc_array(c, struct ureg_src, impl->ssa_alloc);
2104    c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->ssa_alloc);
2105 
2106    /* Set up the struct ntr_blocks to put insns in */
2107    c->blocks = _mesa_pointer_hash_table_create(c);
2108    nir_foreach_block(block, impl) {
2109       struct ntr_block *ntr_block = rzalloc(c->blocks, struct ntr_block);
2110       util_dynarray_init(&ntr_block->insns, ntr_block);
2111       _mesa_hash_table_insert(c->blocks, block, ntr_block);
2112    }
2113 
2114 
2115    ntr_setup_registers(c);
2116 
2117    c->cur_block = ntr_block_from_nir(c, nir_start_block(impl));
2118    ntr_setup_inputs(c);
2119    ntr_setup_outputs(c);
2120    ntr_setup_uniforms(c);
2121 
2122    /* Emit the ntr insns */
2123    ntr_emit_cf_list(c, &impl->body);
2124 
2125    if (c->s->info.stage == MESA_SHADER_FRAGMENT)
2126       ntr_allocate_regs(c, impl);
2127    else
2128       ntr_allocate_regs_unoptimized(c, impl);
2129 
2130    /* Turn the ntr insns into actual TGSI tokens */
2131    ntr_emit_cf_list_ureg(c, &impl->body);
2132 
2133    ralloc_free(c->liveness);
2134    c->liveness = NULL;
2135 
2136 }
2137 
2138 static int
type_size(const struct glsl_type * type,bool bindless)2139 type_size(const struct glsl_type *type, bool bindless)
2140 {
2141    return glsl_count_attribute_slots(type, false);
2142 }
2143 
2144 /* Allow vectorizing of ALU instructions.
2145  */
2146 static uint8_t
ntr_should_vectorize_instr(const nir_instr * instr,const void * data)2147 ntr_should_vectorize_instr(const nir_instr *instr, const void *data)
2148 {
2149    if (instr->type != nir_instr_type_alu)
2150       return 0;
2151 
2152    return 4;
2153 }
2154 
2155 static bool
ntr_should_vectorize_io(unsigned align,unsigned bit_size,unsigned num_components,unsigned high_offset,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)2156 ntr_should_vectorize_io(unsigned align, unsigned bit_size,
2157                         unsigned num_components, unsigned high_offset,
2158                         nir_intrinsic_instr *low, nir_intrinsic_instr *high,
2159                         void *data)
2160 {
2161    if (bit_size != 32)
2162       return false;
2163 
2164    /* Our offset alignment should always be at least 4 bytes */
2165    if (align < 4)
2166       return false;
2167 
2168    /* No wrapping off the end of a TGSI reg.  We could do a bit better by
2169     * looking at low's actual offset.  XXX: With LOAD_CONSTBUF maybe we don't
2170     * need this restriction.
2171     */
2172    unsigned worst_start_component = align == 4 ? 3 : align / 4;
2173    if (worst_start_component + num_components > 4)
2174       return false;
2175 
2176    return true;
2177 }
2178 
2179 static nir_variable_mode
ntr_no_indirects_mask(nir_shader * s,struct pipe_screen * screen)2180 ntr_no_indirects_mask(nir_shader *s, struct pipe_screen *screen)
2181 {
2182    unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
2183    unsigned indirect_mask = 0;
2184 
2185    if (!screen->get_shader_param(screen, pipe_stage,
2186                                  PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR)) {
2187       indirect_mask |= nir_var_shader_in;
2188    }
2189 
2190    if (!screen->get_shader_param(screen, pipe_stage,
2191                                  PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR)) {
2192       indirect_mask |= nir_var_shader_out;
2193    }
2194 
2195    if (!screen->get_shader_param(screen, pipe_stage,
2196                                  PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR)) {
2197       indirect_mask |= nir_var_function_temp;
2198    }
2199 
2200    return indirect_mask;
2201 }
2202 
2203 struct ntr_lower_tex_state {
2204    nir_scalar channels[8];
2205    unsigned i;
2206 };
2207 
2208 static void
nir_to_rc_lower_tex_instr_arg(nir_builder * b,nir_tex_instr * instr,nir_tex_src_type tex_src_type,struct ntr_lower_tex_state * s)2209 nir_to_rc_lower_tex_instr_arg(nir_builder *b,
2210                                 nir_tex_instr *instr,
2211                                 nir_tex_src_type tex_src_type,
2212                                 struct ntr_lower_tex_state *s)
2213 {
2214    int tex_src = nir_tex_instr_src_index(instr, tex_src_type);
2215    if (tex_src < 0)
2216       return;
2217 
2218    nir_def *def = instr->src[tex_src].src.ssa;
2219    for (int i = 0; i < def->num_components; i++) {
2220       s->channels[s->i++] = nir_get_scalar(def, i);
2221    }
2222 
2223    nir_tex_instr_remove_src(instr, tex_src);
2224 }
2225 
2226 /**
2227  * Merges together a vec4 of tex coordinate/compare/bias/lod into a backend tex
2228  * src.  This lets NIR handle the coalescing of the vec4 rather than trying to
2229  * manage it on our own, and may lead to more vectorization.
2230  */
2231 static bool
nir_to_rc_lower_tex_instr(nir_builder * b,nir_instr * instr,void * data)2232 nir_to_rc_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data)
2233 {
2234    if (instr->type != nir_instr_type_tex)
2235       return false;
2236 
2237    nir_tex_instr *tex = nir_instr_as_tex(instr);
2238 
2239    if (nir_tex_instr_src_index(tex, nir_tex_src_coord) < 0)
2240       return false;
2241 
2242    b->cursor = nir_before_instr(instr);
2243 
2244    struct ntr_lower_tex_state s = {0};
2245 
2246    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_coord, &s);
2247    /* We always have at least two slots for the coordinate, even on 1D. */
2248    s.i = MAX2(s.i, 2);
2249 
2250    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_comparator, &s);
2251    s.i = MAX2(s.i, 3);
2252 
2253    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_bias, &s);
2254 
2255    /* XXX: LZ */
2256    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_lod, &s);
2257    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_projector, &s);
2258    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_ms_index, &s);
2259 
2260    /* No need to pack undefs in unused channels of the tex instr */
2261    while (!s.channels[s.i - 1].def)
2262       s.i--;
2263 
2264    /* Instead of putting undefs in the unused slots of the vecs, just put in
2265     * another used channel.  Otherwise, we'll get unnecessary moves into
2266     * registers.
2267     */
2268    assert(s.channels[0].def != NULL);
2269    for (int i = 1; i < s.i; i++) {
2270       if (!s.channels[i].def)
2271          s.channels[i] = s.channels[0];
2272    }
2273 
2274    nir_tex_instr_add_src(tex, nir_tex_src_backend1,
2275                          nir_vec_scalars(b, s.channels, MIN2(s.i, 4)));
2276    if (s.i > 4)
2277       nir_tex_instr_add_src(tex, nir_tex_src_backend2,
2278                             nir_vec_scalars(b, &s.channels[4], s.i - 4));
2279 
2280    return true;
2281 }
2282 
2283 static bool
nir_to_rc_lower_tex(nir_shader * s)2284 nir_to_rc_lower_tex(nir_shader *s)
2285 {
2286    return nir_shader_instructions_pass(s,
2287                                        nir_to_rc_lower_tex_instr,
2288                                        nir_metadata_control_flow,
2289                                        NULL);
2290 }
2291 
2292 /* Lowers texture projectors if we can't do them as TGSI_OPCODE_TXP. */
2293 static void
nir_to_rc_lower_txp(nir_shader * s)2294 nir_to_rc_lower_txp(nir_shader *s)
2295 {
2296    nir_lower_tex_options lower_tex_options = {
2297        .lower_txp = 0,
2298    };
2299 
2300    nir_foreach_block(block, nir_shader_get_entrypoint(s)) {
2301       nir_foreach_instr(instr, block) {
2302          if (instr->type != nir_instr_type_tex)
2303             continue;
2304          nir_tex_instr *tex = nir_instr_as_tex(instr);
2305 
2306          if (nir_tex_instr_src_index(tex, nir_tex_src_projector) < 0)
2307             continue;
2308 
2309          bool has_compare = nir_tex_instr_src_index(tex, nir_tex_src_comparator) >= 0;
2310          bool has_lod = nir_tex_instr_src_index(tex, nir_tex_src_lod) >= 0 || s->info.stage != MESA_SHADER_FRAGMENT;
2311          bool has_offset = nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
2312 
2313          /* We can do TXP for any tex (not txg) where we can fit all the
2314           * coordinates and comparator and projector in one vec4 without any
2315           * other modifiers to add on.
2316           *
2317           * nir_lower_tex() only handles the lowering on a sampler-dim basis, so
2318           * if we get any funny projectors then we just blow them all away.
2319           */
2320          if (tex->op != nir_texop_tex || has_lod || has_offset || (tex->coord_components >= 3 && has_compare))
2321             lower_tex_options.lower_txp |= 1 << tex->sampler_dim;
2322       }
2323    }
2324 
2325    /* nir_lower_tex must be run even if no options are set, because we need the
2326     * LOD to be set for query_levels and for non-fragment shaders.
2327     */
2328    NIR_PASS_V(s, nir_lower_tex, &lower_tex_options);
2329 }
2330 
2331 /**
2332  * Translates the NIR shader to TGSI.
2333  *
2334  * This requires some lowering of the NIR shader to prepare it for translation.
2335  * We take ownership of the NIR shader passed, returning a reference to the new
2336  * TGSI tokens instead.  If you need to keep the NIR, then pass us a clone.
2337  */
nir_to_rc(struct nir_shader * s,struct pipe_screen * screen)2338 const void *nir_to_rc(struct nir_shader *s,
2339                       struct pipe_screen *screen)
2340 {
2341    struct ntr_compile *c;
2342    const void *tgsi_tokens;
2343    bool is_r500 = r300_screen(screen)->caps.is_r500;
2344    c = rzalloc(NULL, struct ntr_compile);
2345    c->screen = screen;
2346    c->lower_fabs = !is_r500 && s->info.stage == MESA_SHADER_VERTEX;
2347 
2348    /* Lower array indexing on FS inputs.  Since we don't set
2349     * ureg->supports_any_inout_decl_range, the TGSI input decls will be split to
2350     * elements by ureg, and so dynamically indexing them would be invalid.
2351     * Ideally we would set that ureg flag based on
2352     * PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE, but can't due to mesa/st
2353     * splitting NIR VS outputs to elements even if the FS doesn't get the
2354     * corresponding splitting, and virgl depends on TGSI across link boundaries
2355     * having matching declarations.
2356     */
2357    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2358       NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
2359       NIR_PASS_V(s, nir_remove_dead_variables, nir_var_shader_in, NULL);
2360    }
2361 
2362    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
2363               type_size, (nir_lower_io_options)0);
2364 
2365    nir_to_rc_lower_txp(s);
2366    NIR_PASS_V(s, nir_to_rc_lower_tex);
2367 
2368    bool progress;
2369 
2370    NIR_PASS_V(s, nir_opt_constant_folding);
2371 
2372    do {
2373       progress = false;
2374       NIR_PASS(progress, s, nir_opt_algebraic_late);
2375       if (progress) {
2376          NIR_PASS_V(s, nir_copy_prop);
2377          NIR_PASS_V(s, nir_opt_dce);
2378          NIR_PASS_V(s, nir_opt_cse);
2379       }
2380    } while (progress);
2381 
2382    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2383       NIR_PASS_V(s, r300_nir_prepare_presubtract);
2384    }
2385 
2386    NIR_PASS_V(s, nir_lower_int_to_float);
2387    NIR_PASS_V(s, nir_copy_prop);
2388    NIR_PASS_V(s, r300_nir_post_integer_lowering);
2389    NIR_PASS_V(s, nir_lower_bool_to_float,
2390               is_r500 || s->info.stage == MESA_SHADER_FRAGMENT);
2391    /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
2392    NIR_PASS_V(s, nir_copy_prop);
2393    /* CSE cleanup after late ftrunc lowering. */
2394    NIR_PASS_V(s, nir_opt_cse);
2395    /* At this point we need to clean;
2396     *  a) fcsel_gt that come from the ftrunc lowering on R300,
2397     *  b) all flavours of fcsels that read three different temp sources on R500.
2398     */
2399    if (s->info.stage == MESA_SHADER_VERTEX) {
2400       if (is_r500)
2401          NIR_PASS_V(s, r300_nir_lower_fcsel_r500);
2402       else
2403          NIR_PASS_V(s, r300_nir_lower_fcsel_r300);
2404       NIR_PASS_V(s, r300_nir_lower_flrp);
2405    } else {
2406       NIR_PASS_V(s, r300_nir_lower_comparison_fs);
2407    }
2408    NIR_PASS_V(s, r300_nir_opt_algebraic_late);
2409    NIR_PASS_V(s, nir_opt_dce);
2410 
2411    nir_move_options move_all =
2412        nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
2413        nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
2414 
2415    NIR_PASS_V(s, nir_opt_move, move_all);
2416    NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, true);
2417    /* Late vectorizing after nir_move_vec_src_uses_to_dest helps instructions but
2418     * increases register usage. Testing shows this is beneficial only in VS.
2419     */
2420    if (s->info.stage == MESA_SHADER_VERTEX)
2421       NIR_PASS_V(s, nir_opt_vectorize, ntr_should_vectorize_instr, NULL);
2422 
2423    NIR_PASS_V(s, nir_convert_from_ssa, true);
2424    NIR_PASS_V(s, nir_lower_vec_to_regs, NULL, NULL);
2425 
2426    /* locals_to_reg_intrinsics will leave dead derefs that are good to clean up.
2427     */
2428    NIR_PASS_V(s, nir_lower_locals_to_regs, 32);
2429    NIR_PASS_V(s, nir_opt_dce);
2430 
2431    /* See comment in ntr_get_alu_src for supported modifiers */
2432    NIR_PASS_V(s, nir_legacy_trivialize, !c->lower_fabs);
2433 
2434    if (NIR_DEBUG(TGSI)) {
2435       fprintf(stderr, "NIR before translation to TGSI:\n");
2436       nir_print_shader(s, stderr);
2437    }
2438 
2439    c->s = s;
2440    c->ureg = ureg_create(pipe_shader_type_from_mesa(s->info.stage));
2441    ureg_setup_shader_info(c->ureg, &s->info);
2442    if (s->info.use_legacy_math_rules && screen->get_param(screen, PIPE_CAP_LEGACY_MATH_RULES))
2443       ureg_property(c->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
2444 
2445    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2446       /* The draw module's polygon stipple layer doesn't respect the chosen
2447        * coordinate mode, so leave it as unspecified unless we're actually
2448        * reading the position in the shader already.  See
2449        * gl-2.1-polygon-stipple-fs on softpipe.
2450        */
2451       if ((s->info.inputs_read & VARYING_BIT_POS) ||
2452           BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
2453          ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
2454                        s->info.fs.origin_upper_left ?
2455                        TGSI_FS_COORD_ORIGIN_UPPER_LEFT :
2456                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
2457 
2458          ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
2459                        s->info.fs.pixel_center_integer ?
2460                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER :
2461                        TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
2462       }
2463    }
2464    /* Emit the main function */
2465    nir_function_impl *impl = nir_shader_get_entrypoint(c->s);
2466    ntr_emit_impl(c, impl);
2467    ureg_END(c->ureg);
2468 
2469    tgsi_tokens = ureg_get_tokens(c->ureg, NULL);
2470 
2471    if (NIR_DEBUG(TGSI)) {
2472       fprintf(stderr, "TGSI after translation from NIR:\n");
2473       tgsi_dump(tgsi_tokens, 0);
2474    }
2475 
2476    ureg_destroy(c->ureg);
2477 
2478    ralloc_free(c);
2479    ralloc_free(s);
2480 
2481    return tgsi_tokens;
2482 }
2483