xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2020 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <[email protected]>
25  */
26 
27 #ifndef H_ETNAVIV_COMPILER_NIR
28 #define H_ETNAVIV_COMPILER_NIR
29 
30 #include "compiler/nir/nir.h"
31 #include "etna_core_info.h"
32 #include "etnaviv_asm.h"
33 #include "etnaviv_compiler.h"
34 #include "util/compiler.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37 
38 struct etna_compile {
39    nir_shader *nir;
40    nir_function_impl *impl;
41 #define is_fs(c) ((c)->nir->info.stage == MESA_SHADER_FRAGMENT)
42    const struct etna_core_info *info;
43    const struct etna_specs *specs;
44    struct etna_shader_variant *variant;
45 
46    /* block # to instr index */
47    unsigned *block_ptr;
48 
49    /* Code generation */
50    int inst_ptr; /* current instruction pointer */
51    struct etna_inst code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
52 
53    /* constants */
54    uint64_t consts[ETNA_MAX_IMM];
55    unsigned const_count;
56 
57    /* ra state */
58    struct ra_graph *g;
59    unsigned *live_map;
60    unsigned num_nodes;
61 
62    /* There was an error during compilation */
63    bool error;
64 };
65 
66 #define compile_error(ctx, args...) ({ \
67    mesa_loge(args); \
68    ctx->error = true; \
69    abort(); \
70 })
71 
72 enum etna_pass_flags {
73    BYPASS_DST = BITFIELD_BIT(0),
74    BYPASS_SRC = BITFIELD_BIT(1),
75 
76    /* source modifier */
77    SRC0_MOD_NEG = BITFIELD_BIT(2),
78    SRC1_MOD_NEG = BITFIELD_BIT(3),
79    SRC2_MOD_NEG = BITFIELD_BIT(4),
80    SRC0_MOD_ABS = BITFIELD_BIT(5),
81    SRC1_MOD_ABS = BITFIELD_BIT(6),
82    SRC2_MOD_ABS = BITFIELD_BIT(7),
83 };
84 
85 #define PASS_FLAGS_IS_DEAD_MASK     BITFIELD_RANGE(0, 2)
86 #define PASS_FLAGS_SRC_MOD_NEG_MASK BITFIELD_RANGE(2, 3)
87 #define PASS_FLAGS_SRC_MOD_ABS_MASK BITFIELD_RANGE(5, 3)
88 
89 static_assert(PASS_FLAGS_IS_DEAD_MASK == (BYPASS_DST | BYPASS_SRC), "is_dead_mask is wrong");
90 static_assert(PASS_FLAGS_SRC_MOD_NEG_MASK == (SRC0_MOD_NEG | SRC1_MOD_NEG | SRC2_MOD_NEG), "src_mod_neg_mask is wrong");
91 static_assert(PASS_FLAGS_SRC_MOD_ABS_MASK == (SRC0_MOD_ABS | SRC1_MOD_ABS | SRC2_MOD_ABS), "src_mod_abs_mask is wrong");
92 
is_dead_instruction(nir_instr * instr)93 static inline bool is_dead_instruction(nir_instr *instr)
94 {
95    return instr->pass_flags & PASS_FLAGS_IS_DEAD_MASK;
96 }
97 
set_src_mod_abs(nir_instr * instr,unsigned idx)98 static inline void set_src_mod_abs(nir_instr *instr, unsigned idx)
99 {
100    assert(idx < 3);
101    instr->pass_flags |= (SRC0_MOD_ABS << idx);
102 }
103 
set_src_mod_neg(nir_instr * instr,unsigned idx)104 static inline void set_src_mod_neg(nir_instr *instr, unsigned idx)
105 {
106    assert(idx < 3);
107    instr->pass_flags |= (SRC0_MOD_NEG << idx);
108 }
109 
toggle_src_mod_neg(nir_instr * instr,unsigned idx)110 static inline void toggle_src_mod_neg(nir_instr *instr, unsigned idx)
111 {
112    assert(idx < 3);
113    instr->pass_flags ^= (SRC0_MOD_NEG << idx);
114 }
115 
is_src_mod_abs(nir_instr * instr,unsigned idx)116 static inline bool is_src_mod_abs(nir_instr *instr, unsigned idx)
117 {
118    if (idx < 3)
119       return instr->pass_flags & (SRC0_MOD_ABS << idx);
120 
121    return false;
122 }
123 
is_src_mod_neg(nir_instr * instr,unsigned idx)124 static inline bool is_src_mod_neg(nir_instr *instr, unsigned idx)
125 {
126    if (idx < 3)
127       return instr->pass_flags & (SRC0_MOD_NEG << idx);
128 
129    return false;
130 }
131 
is_sysval(nir_instr * instr)132 static inline bool is_sysval(nir_instr *instr)
133 {
134    if (instr->type != nir_instr_type_intrinsic)
135       return false;
136 
137    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
138    return intr->intrinsic == nir_intrinsic_load_front_face ||
139           intr->intrinsic == nir_intrinsic_load_frag_coord;
140 }
141 
142 /* get unique ssa/reg index for nir_src */
143 static inline unsigned
src_index(nir_function_impl * impl,nir_src * src)144 src_index(nir_function_impl *impl, nir_src *src)
145 {
146    nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa);
147 
148    if (load) {
149       nir_def *reg = load->src[0].ssa;
150       ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
151       assert(nir_intrinsic_base(load) == 0);
152       assert(nir_intrinsic_num_array_elems(decl) == 0);
153 
154       return reg->index;
155    }
156 
157    return src->ssa->index;
158 }
159 
160 /* get unique ssa/reg index for nir_def */
161 static inline unsigned
def_index(nir_function_impl * impl,nir_def * def)162 def_index(nir_function_impl *impl, nir_def *def)
163 {
164    nir_intrinsic_instr *store = nir_store_reg_for_def(def);
165 
166    if (store) {
167       nir_def *reg = store->src[1].ssa;
168       ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
169       assert(nir_intrinsic_base(store) == 0);
170       assert(nir_intrinsic_num_array_elems(decl) == 0);
171 
172       return reg->index;
173    }
174 
175    return def->index;
176 }
177 
178 static inline void
update_swiz_mask(nir_alu_instr * alu,nir_def * def,unsigned * swiz,unsigned * mask)179 update_swiz_mask(nir_alu_instr *alu, nir_def *def, unsigned *swiz, unsigned *mask)
180 {
181    if (!swiz)
182       return;
183 
184    bool is_vec = def != NULL;
185    unsigned swizzle = 0, write_mask = 0;
186    for (unsigned i = 0; i < alu->def.num_components; i++) {
187       /* src is different (only check for vecN) */
188       if (is_vec && alu->src[i].src.ssa != def)
189          continue;
190 
191       unsigned src_swiz = is_vec ? alu->src[i].swizzle[0] : alu->src[0].swizzle[i];
192       swizzle |= (*swiz >> src_swiz * 2 & 3) << i * 2;
193       /* this channel isn't written through this chain */
194       if (*mask & (1 << src_swiz))
195          write_mask |= 1 << i;
196    }
197    *swiz = swizzle;
198    *mask = write_mask;
199 }
200 
201 static nir_def *
real_def(nir_def * def,unsigned * swiz,unsigned * mask)202 real_def(nir_def *def, unsigned *swiz, unsigned *mask)
203 {
204    if (!def)
205       return def;
206 
207    bool can_bypass_src = !nir_def_used_by_if(def);
208    nir_instr *p_instr = def->parent_instr;
209 
210    /* if used by a vecN, the "real" destination becomes the vecN destination
211     * lower_alu guarantees that values used by a vecN are only used by that vecN
212     * we can apply the same logic to movs in a some cases too
213     */
214    nir_foreach_use(use_src, def) {
215       nir_instr *instr = nir_src_parent_instr(use_src);
216 
217       /* src bypass check: for now only deal with tex src mov case
218        * note: for alu don't bypass mov for multiple uniform sources
219        */
220       switch (instr->type) {
221       case nir_instr_type_tex:
222          if (p_instr->type == nir_instr_type_alu &&
223              nir_instr_as_alu(p_instr)->op == nir_op_mov) {
224             break;
225          }
226          FALLTHROUGH;
227       default:
228          can_bypass_src = false;
229          break;
230       }
231 
232       if (instr->type != nir_instr_type_alu)
233          continue;
234 
235       nir_alu_instr *alu = nir_instr_as_alu(instr);
236 
237       switch (alu->op) {
238       case nir_op_vec2:
239       case nir_op_vec3:
240       case nir_op_vec4:
241          assert(!nir_def_used_by_if(def));
242          nir_foreach_use(use_src, def)
243             assert(nir_src_parent_instr(use_src) == instr);
244 
245          update_swiz_mask(alu, def, swiz, mask);
246          break;
247       case nir_op_mov: {
248          switch (def->parent_instr->type) {
249          case nir_instr_type_alu:
250          case nir_instr_type_tex:
251             break;
252          default:
253             continue;
254          }
255          if (nir_def_used_by_if(def) || list_length(&def->uses) > 1)
256             continue;
257 
258          update_swiz_mask(alu, NULL, swiz, mask);
259          break;
260       };
261       default:
262          continue;
263       }
264 
265       assert(!(instr->pass_flags & BYPASS_SRC));
266       instr->pass_flags |= BYPASS_DST;
267       return real_def(&alu->def, swiz, mask);
268    }
269 
270    if (can_bypass_src && !(p_instr->pass_flags & BYPASS_DST)) {
271       p_instr->pass_flags |= BYPASS_SRC;
272       return NULL;
273    }
274 
275    return def;
276 }
277 
278 /* if instruction dest needs a register, return nir_def for it */
279 static inline nir_def *
def_for_instr(nir_instr * instr)280 def_for_instr(nir_instr *instr)
281 {
282    nir_def *def = NULL;
283 
284    switch (instr->type) {
285    case nir_instr_type_alu:
286       def = &nir_instr_as_alu(instr)->def;
287       break;
288    case nir_instr_type_tex:
289       def = &nir_instr_as_tex(instr)->def;
290       break;
291    case nir_instr_type_intrinsic: {
292       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
293       if (intr->intrinsic == nir_intrinsic_load_uniform ||
294           intr->intrinsic == nir_intrinsic_load_ubo ||
295           intr->intrinsic == nir_intrinsic_load_input ||
296           intr->intrinsic == nir_intrinsic_load_instance_id ||
297           intr->intrinsic == nir_intrinsic_load_vertex_id ||
298           intr->intrinsic == nir_intrinsic_load_texture_scale ||
299           intr->intrinsic == nir_intrinsic_load_texture_size_etna)
300          def = &intr->def;
301    } break;
302    case nir_instr_type_deref:
303       return NULL;
304    default:
305       break;
306    }
307    return real_def(def, NULL, NULL);
308 }
309 
310 struct live_def {
311    nir_instr *instr;
312    nir_def *def; /* cached def_for_instr */
313    unsigned live_start, live_end; /* live range */
314 };
315 
316 unsigned
317 etna_live_defs(nir_function_impl *impl, struct live_def *defs, unsigned *live_map);
318 
319 /* Swizzles and write masks can be used to layer virtual non-interfering
320  * registers on top of the real VEC4 registers. For example, the virtual
321  * VEC3_XYZ register and the virtual SCALAR_W register that use the same
322  * physical VEC4 base register do not interfere.
323  */
324 enum reg_class {
325    REG_CLASS_VIRT_SCALAR,
326    REG_CLASS_VIRT_VEC2,
327    REG_CLASS_VIRT_VEC3,
328    REG_CLASS_VEC4,
329    /* special vec2 class for fast transcendentals, limited to XY or ZW */
330    REG_CLASS_VIRT_VEC2T,
331    /* special classes for LOAD - contiguous components */
332    REG_CLASS_VIRT_VEC2C,
333    REG_CLASS_VIRT_VEC3C,
334    NUM_REG_CLASSES,
335 };
336 
337 enum reg_type {
338    REG_TYPE_VEC4,
339    REG_TYPE_VIRT_VEC3_XYZ,
340    REG_TYPE_VIRT_VEC3_XYW,
341    REG_TYPE_VIRT_VEC3_XZW,
342    REG_TYPE_VIRT_VEC3_YZW,
343    REG_TYPE_VIRT_VEC2_XY,
344    REG_TYPE_VIRT_VEC2_XZ,
345    REG_TYPE_VIRT_VEC2_XW,
346    REG_TYPE_VIRT_VEC2_YZ,
347    REG_TYPE_VIRT_VEC2_YW,
348    REG_TYPE_VIRT_VEC2_ZW,
349    REG_TYPE_VIRT_SCALAR_X,
350    REG_TYPE_VIRT_SCALAR_Y,
351    REG_TYPE_VIRT_SCALAR_Z,
352    REG_TYPE_VIRT_SCALAR_W,
353    REG_TYPE_VIRT_VEC2T_XY,
354    REG_TYPE_VIRT_VEC2T_ZW,
355    REG_TYPE_VIRT_VEC2C_XY,
356    REG_TYPE_VIRT_VEC2C_YZ,
357    REG_TYPE_VIRT_VEC2C_ZW,
358    REG_TYPE_VIRT_VEC3C_XYZ,
359    REG_TYPE_VIRT_VEC3C_YZW,
360    NUM_REG_TYPES,
361 };
362 
363 /* writemask when used as dest */
364 static const uint8_t
365 reg_writemask[NUM_REG_TYPES] = {
366    [REG_TYPE_VEC4] = 0xf,
367    [REG_TYPE_VIRT_SCALAR_X] = 0x1,
368    [REG_TYPE_VIRT_SCALAR_Y] = 0x2,
369    [REG_TYPE_VIRT_VEC2_XY] = 0x3,
370    [REG_TYPE_VIRT_VEC2T_XY] = 0x3,
371    [REG_TYPE_VIRT_VEC2C_XY] = 0x3,
372    [REG_TYPE_VIRT_SCALAR_Z] = 0x4,
373    [REG_TYPE_VIRT_VEC2_XZ] = 0x5,
374    [REG_TYPE_VIRT_VEC2_YZ] = 0x6,
375    [REG_TYPE_VIRT_VEC2C_YZ] = 0x6,
376    [REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
377    [REG_TYPE_VIRT_VEC3C_XYZ] = 0x7,
378    [REG_TYPE_VIRT_SCALAR_W] = 0x8,
379    [REG_TYPE_VIRT_VEC2_XW] = 0x9,
380    [REG_TYPE_VIRT_VEC2_YW] = 0xa,
381    [REG_TYPE_VIRT_VEC3_XYW] = 0xb,
382    [REG_TYPE_VIRT_VEC2_ZW] = 0xc,
383    [REG_TYPE_VIRT_VEC2T_ZW] = 0xc,
384    [REG_TYPE_VIRT_VEC2C_ZW] = 0xc,
385    [REG_TYPE_VIRT_VEC3_XZW] = 0xd,
386    [REG_TYPE_VIRT_VEC3_YZW] = 0xe,
387    [REG_TYPE_VIRT_VEC3C_YZW] = 0xe,
388 };
389 
reg_get_type(int virt_reg)390 static inline int reg_get_type(int virt_reg)
391 {
392    return virt_reg % NUM_REG_TYPES;
393 }
394 
reg_get_base(struct etna_compile * c,int virt_reg)395 static inline int reg_get_base(struct etna_compile *c, int virt_reg)
396 {
397    /* offset by 1 to avoid reserved position register */
398    if (c->nir->info.stage == MESA_SHADER_FRAGMENT)
399       return (virt_reg / NUM_REG_TYPES + 1) % ETNA_MAX_TEMPS;
400    return virt_reg / NUM_REG_TYPES;
401 }
402 
403 struct ra_regs *
404 etna_ra_setup(void *mem_ctx);
405 
406 void
407 etna_ra_assign(struct etna_compile *c, nir_shader *shader);
408 
409 unsigned
410 etna_ra_finish(struct etna_compile *c);
411 
412 static inline void
emit_inst(struct etna_compile * c,struct etna_inst * inst)413 emit_inst(struct etna_compile *c, struct etna_inst *inst)
414 {
415    c->code[c->inst_ptr++] = *inst;
416 }
417 
418 void
419 etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,
420               struct etna_inst_src src[3], bool saturate);
421 
422 void
423 etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,
424               struct etna_inst_dst dst, struct etna_inst_src coord,
425               struct etna_inst_src src1, struct etna_inst_src src2);
426 
427 void
428 etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition);
429 
430 void
431 etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition);
432 
433 #endif
434