1 /*
2 * Copyright © 2018 Jonathan Marek <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Jonathan Marek <[email protected]>
7 */
8
9 #include <assert.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14
15 #include "ir2/instr-a2xx.h"
16 #include "fd2_program.h"
17 #include "ir2.h"
18
19 enum ir2_src_type {
20 IR2_SRC_SSA,
21 IR2_SRC_REG,
22 IR2_SRC_INPUT,
23 IR2_SRC_CONST,
24 };
25
26 struct ir2_src {
27 /* num can mean different things
28 * ssa: index of instruction
29 * reg: index in ctx->reg array
30 * input: index in ctx->input array
31 * const: constant index (C0, C1, etc)
32 */
33 uint16_t num;
34 uint8_t swizzle;
35 enum ir2_src_type type : 2;
36 uint8_t abs : 1;
37 uint8_t negate : 1;
38 uint8_t : 4;
39 };
40
41 struct ir2_reg_component {
42 uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
43 bool alloc : 1; /* is it currently allocated */
44 uint8_t ref_count; /* for ra */
45 };
46
47 struct ir2_reg {
48 uint8_t idx; /* assigned hardware register */
49 uint8_t ncomp;
50
51 uint8_t loop_depth;
52 bool initialized;
53 /* block_idx to free on (-1 = free on ref_count==0) */
54 int block_idx_free;
55 struct ir2_reg_component comp[4];
56 };
57
58 struct ir2_instr {
59 unsigned idx;
60
61 unsigned block_idx;
62
63 enum {
64 IR2_NONE,
65 IR2_FETCH,
66 IR2_ALU,
67 IR2_CF,
68 } type : 2;
69
70 /* instruction needs to be emitted (for scheduling) */
71 bool need_emit : 1;
72
73 /* predicate value - (usually) same for entire block */
74 uint8_t pred : 2;
75
76 /* src */
77 uint8_t src_count;
78 struct ir2_src src[4];
79
80 /* dst */
81 bool is_ssa;
82 union {
83 struct ir2_reg ssa;
84 struct ir2_reg *reg;
85 };
86
87 /* type-specific */
88 union {
89 struct {
90 instr_fetch_opc_t opc : 5;
91 union {
92 struct {
93 uint8_t const_idx;
94 uint8_t const_idx_sel;
95 } vtx;
96 struct {
97 bool is_cube : 1;
98 bool is_rect : 1;
99 uint8_t samp_id;
100 } tex;
101 };
102 } fetch;
103 struct {
104 /* store possible opcs, then we can choose vector/scalar instr */
105 instr_scalar_opc_t scalar_opc : 6;
106 instr_vector_opc_t vector_opc : 5;
107 /* same as nir */
108 uint8_t write_mask : 4;
109 bool saturate : 1;
110
111 /* export idx (-1 no export) */
112 int8_t export;
113
114 /* for scalarized 2 src instruction */
115 uint8_t src1_swizzle;
116 } alu;
117 struct {
118 /* jmp dst block_idx */
119 uint8_t block_idx;
120 } cf;
121 };
122 };
123
124 struct ir2_sched_instr {
125 uint32_t reg_state[8];
126 struct ir2_instr *instr, *instr_s;
127 };
128
129 struct ir2_context {
130 struct fd2_shader_stateobj *so;
131
132 unsigned block_idx, pred_idx;
133 uint8_t pred;
134 bool block_has_jump[64];
135
136 unsigned loop_last_block[64];
137 unsigned loop_depth;
138
139 nir_shader *nir;
140
141 /* ssa index of position output */
142 struct ir2_src position;
143
144 /* to translate SSA ids to instruction ids */
145 int16_t ssa_map[1024];
146
147 struct ir2_shader_info *info;
148 struct ir2_frag_linkage *f;
149
150 int prev_export;
151
152 /* RA state */
153 struct ir2_reg *live_regs[64];
154 uint32_t reg_state[256 / 32]; /* 64*4 bits */
155
156 /* inputs */
157 struct ir2_reg input[16 + 1]; /* 16 + param */
158
159 /* non-ssa regs */
160 struct ir2_reg reg[1024];
161 unsigned reg_count;
162
163 struct ir2_instr instr[0x300];
164 unsigned instr_count;
165
166 struct ir2_sched_instr instr_sched[0x180];
167 unsigned instr_sched_count;
168 };
169
170 void assemble(struct ir2_context *ctx, bool binning);
171
172 void ir2_nir_compile(struct ir2_context *ctx, bool binning);
173 bool ir2_nir_lower_scalar(nir_shader *shader);
174
175 void ra_count_refs(struct ir2_context *ctx);
176 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
177 bool export, uint8_t export_writemask);
178 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
179 void ra_block_free(struct ir2_context *ctx, unsigned block);
180
181 void cp_src(struct ir2_context *ctx);
182 void cp_export(struct ir2_context *ctx);
183
184 /* utils */
185 enum {
186 IR2_SWIZZLE_Y = 1 << 0,
187 IR2_SWIZZLE_Z = 2 << 0,
188 IR2_SWIZZLE_W = 3 << 0,
189
190 IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
191
192 IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
193
194 IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
195 IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
196 IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
197 IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
198 IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
199 IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
200 IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
201 IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
202 };
203
204 #define compile_error(ctx, args...) \
205 ({ \
206 printf(args); \
207 assert(0); \
208 })
209
210 static inline struct ir2_src
ir2_src(uint16_t num,uint8_t swizzle,enum ir2_src_type type)211 ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
212 {
213 return (struct ir2_src){.num = num, .swizzle = swizzle, .type = type};
214 }
215
216 /* ir2_assemble uses it .. */
217 struct ir2_src ir2_zero(struct ir2_context *ctx);
218
219 #define ir2_foreach_instr(it, ctx) \
220 for (struct ir2_instr *it = (ctx)->instr; ({ \
221 while (it != &(ctx)->instr[(ctx)->instr_count] && \
222 it->type == IR2_NONE) \
223 it++; \
224 it != &(ctx)->instr[(ctx)->instr_count]; \
225 }); \
226 it++)
227
228 #define ir2_foreach_live_reg(it, ctx) \
229 for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
230 while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) \
231 __ptr++; \
232 __ptr != &(ctx)->live_regs[64] ? (it = *__ptr) : NULL; \
233 }); \
234 it++)
235
236 #define ir2_foreach_avail(it) \
237 for (struct ir2_instr **__instrp = avail, *it; \
238 it = *__instrp, __instrp != &avail[avail_count]; __instrp++)
239
240 #define ir2_foreach_src(it, instr) \
241 for (struct ir2_src *it = instr->src; it != &instr->src[instr->src_count]; \
242 it++)
243
244 /* mask for register allocation
245 * 64 registers with 4 components each = 256 bits
246 */
247 /* typedef struct {
248 uint64_t data[4];
249 } regmask_t; */
250
251 static inline bool
mask_isset(uint32_t * mask,unsigned num)252 mask_isset(uint32_t *mask, unsigned num)
253 {
254 return !!(mask[num / 32] & 1 << num % 32);
255 }
256
257 static inline void
mask_set(uint32_t * mask,unsigned num)258 mask_set(uint32_t *mask, unsigned num)
259 {
260 mask[num / 32] |= 1 << num % 32;
261 }
262
263 static inline void
mask_unset(uint32_t * mask,unsigned num)264 mask_unset(uint32_t *mask, unsigned num)
265 {
266 mask[num / 32] &= ~(1 << num % 32);
267 }
268
269 static inline unsigned
mask_reg(uint32_t * mask,unsigned num)270 mask_reg(uint32_t *mask, unsigned num)
271 {
272 return mask[num / 8] >> num % 8 * 4 & 0xf;
273 }
274
275 static inline bool
is_export(struct ir2_instr * instr)276 is_export(struct ir2_instr *instr)
277 {
278 return instr->type == IR2_ALU && instr->alu.export >= 0;
279 }
280
281 static inline instr_alloc_type_t
export_buf(unsigned num)282 export_buf(unsigned num)
283 {
284 return num < 32 ? SQ_PARAMETER_PIXEL : num >= 62 ? SQ_POSITION : SQ_MEMORY;
285 }
286
287 /* component c for channel i */
288 static inline unsigned
swiz_set(unsigned c,unsigned i)289 swiz_set(unsigned c, unsigned i)
290 {
291 return ((c - i) & 3) << i * 2;
292 }
293
294 /* get swizzle in channel i */
295 static inline unsigned
swiz_get(unsigned swiz,unsigned i)296 swiz_get(unsigned swiz, unsigned i)
297 {
298 return ((swiz >> i * 2) + i) & 3;
299 }
300
301 static inline unsigned
swiz_merge(unsigned swiz0,unsigned swiz1)302 swiz_merge(unsigned swiz0, unsigned swiz1)
303 {
304 unsigned swiz = 0;
305 for (int i = 0; i < 4; i++)
306 swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
307 return swiz;
308 }
309
310 static inline void
swiz_merge_p(uint8_t * swiz0,unsigned swiz1)311 swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
312 {
313 unsigned swiz = 0;
314 for (int i = 0; i < 4; i++)
315 swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
316 *swiz0 = swiz;
317 }
318
319 static inline struct ir2_reg *
get_reg(struct ir2_instr * instr)320 get_reg(struct ir2_instr *instr)
321 {
322 return instr->is_ssa ? &instr->ssa : instr->reg;
323 }
324
325 static inline struct ir2_reg *
get_reg_src(struct ir2_context * ctx,struct ir2_src * src)326 get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
327 {
328 switch (src->type) {
329 case IR2_SRC_INPUT:
330 return &ctx->input[src->num];
331 case IR2_SRC_SSA:
332 return &ctx->instr[src->num].ssa;
333 case IR2_SRC_REG:
334 return &ctx->reg[src->num];
335 default:
336 return NULL;
337 }
338 }
339
340 /* gets a ncomp value for the dst */
341 static inline unsigned
dst_ncomp(struct ir2_instr * instr)342 dst_ncomp(struct ir2_instr *instr)
343 {
344 if (instr->is_ssa)
345 return instr->ssa.ncomp;
346
347 if (instr->type == IR2_FETCH)
348 return instr->reg->ncomp;
349
350 assert(instr->type == IR2_ALU);
351
352 unsigned ncomp = 0;
353 for (int i = 0; i < instr->reg->ncomp; i++)
354 ncomp += !!(instr->alu.write_mask & 1 << i);
355 return ncomp;
356 }
357
358 /* gets a ncomp value for the src registers */
359 static inline unsigned
src_ncomp(struct ir2_instr * instr)360 src_ncomp(struct ir2_instr *instr)
361 {
362 if (instr->type == IR2_FETCH) {
363 switch (instr->fetch.opc) {
364 case VTX_FETCH:
365 return 1;
366 case TEX_FETCH:
367 return instr->fetch.tex.is_cube ? 3 : 2;
368 case TEX_SET_TEX_LOD:
369 return 1;
370 default:
371 assert(0);
372 }
373 }
374
375 switch (instr->alu.scalar_opc) {
376 case PRED_SETEs ... KILLONEs:
377 return 1;
378 default:
379 break;
380 }
381
382 switch (instr->alu.vector_opc) {
383 case DOT2ADDv:
384 return 2;
385 case DOT3v:
386 return 3;
387 case DOT4v:
388 case CUBEv:
389 case PRED_SETE_PUSHv:
390 return 4;
391 default:
392 return dst_ncomp(instr);
393 }
394 }
395