xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a2xx/ir2_private.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Jonathan Marek <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Jonathan Marek <[email protected]>
7  */
8 
9 #include <assert.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include "ir2/instr-a2xx.h"
16 #include "fd2_program.h"
17 #include "ir2.h"
18 
19 enum ir2_src_type {
20    IR2_SRC_SSA,
21    IR2_SRC_REG,
22    IR2_SRC_INPUT,
23    IR2_SRC_CONST,
24 };
25 
26 struct ir2_src {
27    /* num can mean different things
28     *   ssa: index of instruction
29     *   reg: index in ctx->reg array
30     *   input: index in ctx->input array
31     *   const: constant index (C0, C1, etc)
32     */
33    uint16_t num;
34    uint8_t swizzle;
35    enum ir2_src_type type : 2;
36    uint8_t abs : 1;
37    uint8_t negate : 1;
38    uint8_t : 4;
39 };
40 
41 struct ir2_reg_component {
42    uint8_t c : 3;     /* assigned x/y/z/w (7=dont write, for fetch instr) */
43    bool alloc : 1;    /* is it currently allocated */
44    uint8_t ref_count; /* for ra */
45 };
46 
47 struct ir2_reg {
48    uint8_t idx; /* assigned hardware register */
49    uint8_t ncomp;
50 
51    uint8_t loop_depth;
52    bool initialized;
53    /* block_idx to free on (-1 = free on ref_count==0) */
54    int block_idx_free;
55    struct ir2_reg_component comp[4];
56 };
57 
58 struct ir2_instr {
59    unsigned idx;
60 
61    unsigned block_idx;
62 
63    enum {
64       IR2_NONE,
65       IR2_FETCH,
66       IR2_ALU,
67       IR2_CF,
68    } type : 2;
69 
70    /* instruction needs to be emitted (for scheduling) */
71    bool need_emit : 1;
72 
73    /* predicate value - (usually) same for entire block */
74    uint8_t pred : 2;
75 
76    /* src */
77    uint8_t src_count;
78    struct ir2_src src[4];
79 
80    /* dst */
81    bool is_ssa;
82    union {
83       struct ir2_reg ssa;
84       struct ir2_reg *reg;
85    };
86 
87    /* type-specific */
88    union {
89       struct {
90          instr_fetch_opc_t opc : 5;
91          union {
92             struct {
93                uint8_t const_idx;
94                uint8_t const_idx_sel;
95             } vtx;
96             struct {
97                bool is_cube : 1;
98                bool is_rect : 1;
99                uint8_t samp_id;
100             } tex;
101          };
102       } fetch;
103       struct {
104          /* store possible opcs, then we can choose vector/scalar instr */
105          instr_scalar_opc_t scalar_opc : 6;
106          instr_vector_opc_t vector_opc : 5;
107          /* same as nir */
108          uint8_t write_mask : 4;
109          bool saturate : 1;
110 
111          /* export idx (-1 no export) */
112          int8_t export;
113 
114          /* for scalarized 2 src instruction */
115          uint8_t src1_swizzle;
116       } alu;
117       struct {
118          /* jmp dst block_idx */
119          uint8_t block_idx;
120       } cf;
121    };
122 };
123 
124 struct ir2_sched_instr {
125    uint32_t reg_state[8];
126    struct ir2_instr *instr, *instr_s;
127 };
128 
129 struct ir2_context {
130    struct fd2_shader_stateobj *so;
131 
132    unsigned block_idx, pred_idx;
133    uint8_t pred;
134    bool block_has_jump[64];
135 
136    unsigned loop_last_block[64];
137    unsigned loop_depth;
138 
139    nir_shader *nir;
140 
141    /* ssa index of position output */
142    struct ir2_src position;
143 
144    /* to translate SSA ids to instruction ids */
145    int16_t ssa_map[1024];
146 
147    struct ir2_shader_info *info;
148    struct ir2_frag_linkage *f;
149 
150    int prev_export;
151 
152    /* RA state */
153    struct ir2_reg *live_regs[64];
154    uint32_t reg_state[256 / 32]; /* 64*4 bits */
155 
156    /* inputs */
157    struct ir2_reg input[16 + 1]; /* 16 + param */
158 
159    /* non-ssa regs */
160    struct ir2_reg reg[1024];
161    unsigned reg_count;
162 
163    struct ir2_instr instr[0x300];
164    unsigned instr_count;
165 
166    struct ir2_sched_instr instr_sched[0x180];
167    unsigned instr_sched_count;
168 };
169 
170 void assemble(struct ir2_context *ctx, bool binning);
171 
172 void ir2_nir_compile(struct ir2_context *ctx, bool binning);
173 bool ir2_nir_lower_scalar(nir_shader *shader);
174 
175 void ra_count_refs(struct ir2_context *ctx);
176 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
177             bool export, uint8_t export_writemask);
178 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
179 void ra_block_free(struct ir2_context *ctx, unsigned block);
180 
181 void cp_src(struct ir2_context *ctx);
182 void cp_export(struct ir2_context *ctx);
183 
184 /* utils */
185 enum {
186    IR2_SWIZZLE_Y = 1 << 0,
187    IR2_SWIZZLE_Z = 2 << 0,
188    IR2_SWIZZLE_W = 3 << 0,
189 
190    IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
191 
192    IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
193 
194    IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
195    IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
196    IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
197    IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
198    IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
199    IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
200    IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
201    IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
202 };
203 
204 #define compile_error(ctx, args...)                                            \
205    ({                                                                          \
206       printf(args);                                                            \
207       assert(0);                                                               \
208    })
209 
210 static inline struct ir2_src
ir2_src(uint16_t num,uint8_t swizzle,enum ir2_src_type type)211 ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
212 {
213    return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
214 }
215 
216 /* ir2_assemble uses it .. */
217 struct ir2_src ir2_zero(struct ir2_context *ctx);
218 
219 #define ir2_foreach_instr(it, ctx)                                             \
220    for (struct ir2_instr *it = (ctx)->instr; ({                                \
221            while (it != &(ctx)->instr[(ctx)->instr_count] &&                   \
222                   it->type == IR2_NONE)                                        \
223               it++;                                                            \
224            it != &(ctx)->instr[(ctx)->instr_count];                            \
225         });                                                                    \
226         it++)
227 
228 #define ir2_foreach_live_reg(it, ctx)                                          \
229    for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({                     \
230            while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL)            \
231               __ptr++;                                                         \
232            __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL;              \
233         });                                                                    \
234         it++)
235 
236 #define ir2_foreach_avail(it)                                                  \
237    for (struct ir2_instr **__instrp = avail, *it;                              \
238         it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
239 
240 #define ir2_foreach_src(it, instr)                                             \
241    for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count];  \
242         it++)
243 
244 /* mask for register allocation
245  * 64 registers with 4 components each = 256 bits
246  */
247 /* typedef struct {
248         uint64_t data[4];
249 } regmask_t; */
250 
251 static inline bool
mask_isset(uint32_t * mask,unsigned num)252 mask_isset(uint32_t *mask, unsigned num)
253 {
254    return !!(mask[num / 32] & 1 << num % 32);
255 }
256 
257 static inline void
mask_set(uint32_t * mask,unsigned num)258 mask_set(uint32_t *mask, unsigned num)
259 {
260    mask[num / 32] |= 1 << num % 32;
261 }
262 
263 static inline void
mask_unset(uint32_t * mask,unsigned num)264 mask_unset(uint32_t *mask, unsigned num)
265 {
266    mask[num / 32] &= ~(1 << num % 32);
267 }
268 
269 static inline unsigned
mask_reg(uint32_t * mask,unsigned num)270 mask_reg(uint32_t *mask, unsigned num)
271 {
272    return mask[num / 8] >> num % 8 * 4 & 0xf;
273 }
274 
275 static inline bool
is_export(struct ir2_instr * instr)276 is_export(struct ir2_instr *instr)
277 {
278    return instr->type == IR2_ALU && instr->alu.export >= 0;
279 }
280 
281 static inline instr_alloc_type_t
export_buf(unsigned num)282 export_buf(unsigned num)
283 {
284    return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
285 }
286 
287 /* component c for channel i */
288 static inline unsigned
swiz_set(unsigned c,unsigned i)289 swiz_set(unsigned c, unsigned i)
290 {
291    return ((c - i) & 3) << i * 2;
292 }
293 
294 /* get swizzle in channel i */
295 static inline unsigned
swiz_get(unsigned swiz,unsigned i)296 swiz_get(unsigned swiz, unsigned i)
297 {
298    return ((swiz >> i * 2) + i) & 3;
299 }
300 
301 static inline unsigned
swiz_merge(unsigned swiz0,unsigned swiz1)302 swiz_merge(unsigned swiz0, unsigned swiz1)
303 {
304    unsigned swiz = 0;
305    for (int i = 0; i < 4; i++)
306       swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
307    return swiz;
308 }
309 
310 static inline void
swiz_merge_p(uint8_t * swiz0,unsigned swiz1)311 swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
312 {
313    unsigned swiz = 0;
314    for (int i = 0; i < 4; i++)
315       swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
316    *swiz0 = swiz;
317 }
318 
319 static inline struct ir2_reg *
get_reg(struct ir2_instr * instr)320 get_reg(struct ir2_instr *instr)
321 {
322    return instr->is_ssa ? &instr->ssa : instr->reg;
323 }
324 
325 static inline struct ir2_reg *
get_reg_src(struct ir2_context * ctx,struct ir2_src * src)326 get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
327 {
328    switch (src->type) {
329    case IR2_SRC_INPUT:
330       return &ctx->input[src->num];
331    case IR2_SRC_SSA:
332       return &ctx->instr[src->num].ssa;
333    case IR2_SRC_REG:
334       return &ctx->reg[src->num];
335    default:
336       return NULL;
337    }
338 }
339 
340 /* gets a ncomp value for the dst */
341 static inline unsigned
dst_ncomp(struct ir2_instr * instr)342 dst_ncomp(struct ir2_instr *instr)
343 {
344    if (instr->is_ssa)
345       return instr->ssa.ncomp;
346 
347    if (instr->type == IR2_FETCH)
348       return instr->reg->ncomp;
349 
350    assert(instr->type == IR2_ALU);
351 
352    unsigned ncomp = 0;
353    for (int i = 0; i < instr->reg->ncomp; i++)
354       ncomp += !!(instr->alu.write_mask & 1 << i);
355    return ncomp;
356 }
357 
358 /* gets a ncomp value for the src registers */
359 static inline unsigned
src_ncomp(struct ir2_instr * instr)360 src_ncomp(struct ir2_instr *instr)
361 {
362    if (instr->type == IR2_FETCH) {
363       switch (instr->fetch.opc) {
364       case VTX_FETCH:
365          return 1;
366       case TEX_FETCH:
367          return instr->fetch.tex.is_cube ? 3 : 2;
368       case TEX_SET_TEX_LOD:
369          return 1;
370       default:
371          assert(0);
372       }
373    }
374 
375    switch (instr->alu.scalar_opc) {
376    case PRED_SETEs ... KILLONEs:
377       return 1;
378    default:
379       break;
380    }
381 
382    switch (instr->alu.vector_opc) {
383    case DOT2ADDv:
384       return 2;
385    case DOT3v:
386       return 3;
387    case DOT4v:
388    case CUBEv:
389    case PRED_SETE_PUSHv:
390       return 4;
391    default:
392       return dst_ncomp(instr);
393    }
394 }
395