1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * Copyright 2021 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "util/compiler.h"
8 #include "agx_compiler.h"
9 #include "agx_debug.h"
10 #include "agx_opcodes.h"
11
12 /* Validatation doesn't make sense in release builds */
13 #ifndef NDEBUG
14
15 #define agx_validate_assert(stmt) \
16 if (!(stmt)) { \
17 return false; \
18 }
19
20 /*
21 * If a block contains phi nodes, they must come at the start of the block. If a
22 * block contains control flow, it must come at the beginning/end as applicable.
23 * Therefore the form of a valid block is:
24 *
25 * Control flow instructions (else)
26 * Phi nodes
27 * General instructions
28 * Control flow instructions (except else)
29 *
30 * Validate that this form is satisfied.
31 */
32 enum agx_block_state {
33 AGX_BLOCK_STATE_CF_ELSE = 0,
34 AGX_BLOCK_STATE_PHI = 1,
35 AGX_BLOCK_STATE_BODY = 2,
36 AGX_BLOCK_STATE_CF = 3
37 };
38
39 static bool
agx_validate_block_form(agx_block * block)40 agx_validate_block_form(agx_block *block)
41 {
42 enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE;
43
44 agx_foreach_instr_in_block(block, I) {
45 switch (I->op) {
46 case AGX_OPCODE_PRELOAD:
47 case AGX_OPCODE_ELSE_ICMP:
48 case AGX_OPCODE_ELSE_FCMP:
49 agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE);
50 break;
51
52 case AGX_OPCODE_PHI:
53 agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE ||
54 state == AGX_BLOCK_STATE_PHI);
55
56 state = AGX_BLOCK_STATE_PHI;
57 break;
58
59 case AGX_OPCODE_EXPORT:
60 agx_validate_assert(agx_num_successors(block) == 0);
61 state = AGX_BLOCK_STATE_CF;
62 break;
63
64 default:
65 if (instr_after_logical_end(I)) {
66 state = AGX_BLOCK_STATE_CF;
67 } else {
68 agx_validate_assert(state != AGX_BLOCK_STATE_CF);
69 state = AGX_BLOCK_STATE_BODY;
70 }
71 break;
72 }
73 }
74
75 return true;
76 }
77
78 /*
79 * Only moves and phis use stack. Phis cannot use moves due to their
80 * parallel nature, so we allow phis to take memory, later lowered to moves.
81 */
82 static bool
is_stack_valid(agx_instr * I)83 is_stack_valid(agx_instr *I)
84 {
85 return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI);
86 }
87
88 static bool
agx_validate_sources(agx_instr * I)89 agx_validate_sources(agx_instr *I)
90 {
91 agx_foreach_src(I, s) {
92 agx_index src = I->src[s];
93
94 if (src.type == AGX_INDEX_IMMEDIATE) {
95 agx_validate_assert(!src.kill);
96 agx_validate_assert(!src.cache);
97 agx_validate_assert(!src.discard);
98
99 bool ldst = agx_allows_16bit_immediate(I);
100
101 /* Immediates are encoded as 8-bit (16-bit for memory load/store). For
102 * integers, they extend to 16-bit. For floating point, they are 8-bit
103 * minifloats. The 8-bit minifloats are a strict subset of 16-bit
104 * standard floats, so we treat them as such in the IR, with an
105 * implicit f16->f32 for 32-bit floating point operations.
106 */
107 agx_validate_assert(src.size == AGX_SIZE_16);
108 agx_validate_assert(src.value < (1 << (ldst ? 16 : 8)));
109 } else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) {
110 agx_validate_assert(src.size == I->src[0].size);
111 } else if (I->op == AGX_OPCODE_PHI) {
112 agx_validate_assert(src.size == I->dest[0].size);
113 agx_validate_assert(!agx_is_null(src));
114 }
115
116 agx_validate_assert(!src.memory || is_stack_valid(I));
117 }
118
119 return true;
120 }
121
122 static bool
agx_validate_defs(agx_instr * I,BITSET_WORD * defs)123 agx_validate_defs(agx_instr *I, BITSET_WORD *defs)
124 {
125 agx_foreach_ssa_src(I, s) {
126 /* Skip phis, they're special in loop headers */
127 if (I->op == AGX_OPCODE_PHI)
128 break;
129
130 /* Sources must be defined before their use */
131 if (!BITSET_TEST(defs, I->src[s].value))
132 return false;
133 }
134
135 agx_foreach_ssa_dest(I, d) {
136 /* Static single assignment */
137 if (BITSET_TEST(defs, I->dest[d].value))
138 return false;
139
140 BITSET_SET(defs, I->dest[d].value);
141
142 if (I->dest[d].memory && !is_stack_valid(I))
143 return false;
144 }
145
146 return true;
147 }
148
149 /** Returns number of registers written by an instruction */
150 static unsigned
agx_write_registers(const agx_instr * I,unsigned d)151 agx_write_registers(const agx_instr *I, unsigned d)
152 {
153 unsigned size = agx_size_align_16(I->dest[d].size);
154
155 switch (I->op) {
156 case AGX_OPCODE_MOV:
157 case AGX_OPCODE_PHI:
158 /* Tautological */
159 return agx_index_size_16(I->dest[d]);
160
161 case AGX_OPCODE_ITER:
162 case AGX_OPCODE_ITERPROJ:
163 assert(1 <= I->channels && I->channels <= 4);
164 return I->channels * size;
165
166 case AGX_OPCODE_IMAGE_LOAD:
167 case AGX_OPCODE_TEXTURE_LOAD:
168 case AGX_OPCODE_TEXTURE_SAMPLE:
169 /* Even when masked out, these clobber 4 registers */
170 return 4 * size;
171
172 case AGX_OPCODE_DEVICE_LOAD:
173 case AGX_OPCODE_LOCAL_LOAD:
174 case AGX_OPCODE_STACK_LOAD:
175 case AGX_OPCODE_LD_TILE:
176 /* Can write 16-bit or 32-bit. Anything logically 64-bit is already
177 * expanded to 32-bit in the mask.
178 */
179 return util_bitcount(I->mask) * MIN2(size, 2);
180
181 case AGX_OPCODE_LDCF:
182 return 6;
183 case AGX_OPCODE_COLLECT:
184 return I->nr_srcs * agx_size_align_16(I->src[0].size);
185 default:
186 return size;
187 }
188 }
189
190 struct dim_info {
191 unsigned comps;
192 bool array;
193 };
194
195 static struct dim_info
agx_dim_info(enum agx_dim dim)196 agx_dim_info(enum agx_dim dim)
197 {
198 switch (dim) {
199 case AGX_DIM_1D:
200 return (struct dim_info){1, false};
201 case AGX_DIM_1D_ARRAY:
202 return (struct dim_info){1, true};
203 case AGX_DIM_2D:
204 return (struct dim_info){2, false};
205 case AGX_DIM_2D_ARRAY:
206 return (struct dim_info){2, true};
207 case AGX_DIM_2D_MS:
208 return (struct dim_info){3, false};
209 case AGX_DIM_3D:
210 return (struct dim_info){3, false};
211 case AGX_DIM_CUBE:
212 return (struct dim_info){3, false};
213 case AGX_DIM_CUBE_ARRAY:
214 return (struct dim_info){3, true};
215 case AGX_DIM_2D_MS_ARRAY:
216 return (struct dim_info){2, true};
217 default:
218 unreachable("invalid dim");
219 }
220 }
221
222 /*
223 * Return number of registers required for coordinates for a texture/image
224 * instruction. We handle layer + sample index as 32-bit even when only the
225 * lower 16-bits are present. LOD queries do not take a layer.
226 */
227 static unsigned
agx_coordinate_registers(const agx_instr * I)228 agx_coordinate_registers(const agx_instr *I)
229 {
230 struct dim_info dim = agx_dim_info(I->dim);
231 bool has_array = !I->query_lod;
232
233 return 2 * (dim.comps + (has_array && dim.array));
234 }
235
236 static unsigned
agx_read_registers(const agx_instr * I,unsigned s)237 agx_read_registers(const agx_instr *I, unsigned s)
238 {
239 unsigned size = agx_size_align_16(I->src[s].size);
240
241 switch (I->op) {
242 case AGX_OPCODE_MOV:
243 case AGX_OPCODE_EXPORT:
244 /* Tautological */
245 return agx_index_size_16(I->src[0]);
246
247 case AGX_OPCODE_PHI:
248 if (I->src[s].type == AGX_INDEX_IMMEDIATE)
249 return size;
250 else
251 return agx_index_size_16(I->dest[0]);
252
253 case AGX_OPCODE_SPLIT:
254 return I->nr_dests * agx_size_align_16(agx_split_width(I));
255
256 case AGX_OPCODE_UNIFORM_STORE:
257 if (s == 0)
258 return util_bitcount(I->mask) * size;
259 else
260 return size;
261
262 case AGX_OPCODE_DEVICE_STORE:
263 case AGX_OPCODE_LOCAL_STORE:
264 case AGX_OPCODE_STACK_STORE:
265 case AGX_OPCODE_ST_TILE:
266 /* See agx_write_registers */
267 if (s == 0)
268 return util_bitcount(I->mask) * MIN2(size, 2);
269 else if (s == 2 && I->explicit_coords)
270 return 2;
271 else
272 return size;
273
274 case AGX_OPCODE_ZS_EMIT:
275 if (s == 1) {
276 /* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad
277 * up to u32 for simplicity
278 */
279 bool z = !!(I->zs & 1);
280 bool s = !!(I->zs & 2);
281 assert(z || s);
282
283 return (z && s) ? 4 : z ? 2 : 1;
284 } else {
285 return 1;
286 }
287
288 case AGX_OPCODE_IMAGE_WRITE:
289 if (s == 0)
290 return 4 * size /* data */;
291 else if (s == 1)
292 return agx_coordinate_registers(I);
293 else
294 return size;
295
296 case AGX_OPCODE_IMAGE_LOAD:
297 case AGX_OPCODE_TEXTURE_LOAD:
298 case AGX_OPCODE_TEXTURE_SAMPLE:
299 if (s == 0) {
300 return agx_coordinate_registers(I);
301 } else if (s == 1) {
302 /* LOD */
303 if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD ||
304 I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN) {
305
306 /* Technically only 16-bit but we model as 32-bit to keep the IR
307 * simple, since the gradient is otherwise 32-bit.
308 */
309 unsigned min = I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN ? 2 : 0;
310
311 switch (I->dim) {
312 case AGX_DIM_1D:
313 case AGX_DIM_1D_ARRAY:
314 return (2 * 2 * 1) + min;
315 case AGX_DIM_2D:
316 case AGX_DIM_2D_ARRAY:
317 case AGX_DIM_2D_MS_ARRAY:
318 case AGX_DIM_2D_MS:
319 return (2 * 2 * 2) + min;
320 case AGX_DIM_CUBE:
321 case AGX_DIM_CUBE_ARRAY:
322 case AGX_DIM_3D:
323 return (2 * 2 * 3) + min;
324 }
325
326 unreachable("Invalid texture dimension");
327 } else if (I->lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) {
328 return 2;
329 } else {
330 return 1;
331 }
332 } else if (s == 5) {
333 /* Compare/offset */
334 return 2 * ((!!I->shadow) + (!!I->offset));
335 } else {
336 return size;
337 }
338
339 case AGX_OPCODE_BLOCK_IMAGE_STORE:
340 if (s == 3 && I->explicit_coords)
341 return agx_coordinate_registers(I);
342 else
343 return size;
344
345 case AGX_OPCODE_ATOMIC:
346 case AGX_OPCODE_LOCAL_ATOMIC:
347 if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG)
348 return size * 2;
349 else
350 return size;
351
352 default:
353 return size;
354 }
355 }
356
357 /* Type check the dimensionality of sources and destinations. */
358 static bool
agx_validate_width(agx_context * ctx)359 agx_validate_width(agx_context *ctx)
360 {
361 bool succ = true;
362
363 agx_foreach_instr_global(ctx, I) {
364 agx_foreach_dest(I, d) {
365 unsigned exp = agx_write_registers(I, d);
366 unsigned act =
367 agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size);
368
369 if (exp != act) {
370 succ = false;
371 fprintf(stderr, "destination %u, expected width %u, got width %u\n",
372 d, exp, act);
373 agx_print_instr(I, stderr);
374 fprintf(stderr, "\n");
375 }
376 }
377
378 agx_foreach_src(I, s) {
379 if (I->src[s].type == AGX_INDEX_NULL)
380 continue;
381
382 unsigned exp = agx_read_registers(I, s);
383 unsigned act =
384 agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size);
385
386 if (exp != act) {
387 succ = false;
388 fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
389 exp, act);
390 agx_print_instr(I, stderr);
391 fprintf(stderr, "\n");
392 }
393 }
394 }
395
396 return succ;
397 }
398
399 static bool
agx_validate_predecessors(agx_block * block)400 agx_validate_predecessors(agx_block *block)
401 {
402 /* Loop headers (only) have predecessors that are later in source form */
403 bool has_later_preds = false;
404
405 agx_foreach_predecessor(block, pred) {
406 if ((*pred)->index >= block->index)
407 has_later_preds = true;
408 }
409
410 if (has_later_preds && !block->loop_header)
411 return false;
412
413 /* Successors and predecessors are found together */
414 agx_foreach_predecessor(block, pred) {
415 bool found = false;
416
417 agx_foreach_successor((*pred), succ) {
418 if (succ == block)
419 found = true;
420 }
421
422 if (!found)
423 return false;
424 }
425
426 return true;
427 }
428
429 static bool
agx_validate_sr(const agx_instr * I)430 agx_validate_sr(const agx_instr *I)
431 {
432 bool none = (I->op == AGX_OPCODE_GET_SR);
433 bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE);
434 bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER);
435
436 /* Filter get_sr instructions */
437 if (!(none || coverage || barrier))
438 return true;
439
440 switch (I->sr) {
441 case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD:
442 case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP:
443 case AGX_SR_TOTAL_ACTIVE_THREADS_IN_QUAD:
444 case AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP:
445 case AGX_SR_COVERAGE_MASK:
446 case AGX_SR_IS_ACTIVE_THREAD:
447 return coverage;
448
449 case AGX_SR_HELPER_OP:
450 case AGX_SR_HELPER_ARG_L:
451 case AGX_SR_HELPER_ARG_H:
452 return barrier;
453
454 default:
455 return none;
456 }
457 }
458
459 void
agx_validate(agx_context * ctx,const char * after)460 agx_validate(agx_context *ctx, const char *after)
461 {
462 bool fail = false;
463
464 if (agx_compiler_debug & AGX_DBG_NOVALIDATE)
465 return;
466
467 int last_index = -1;
468
469 agx_foreach_block(ctx, block) {
470 if ((int)block->index < last_index) {
471 fprintf(stderr, "Out-of-order block index %d vs %d after %s\n",
472 block->index, last_index, after);
473 agx_print_block(block, stderr);
474 fail = true;
475 }
476
477 last_index = block->index;
478
479 if (!agx_validate_block_form(block)) {
480 fprintf(stderr, "Invalid block form after %s\n", after);
481 agx_print_block(block, stderr);
482 fail = true;
483 }
484
485 if (!agx_validate_predecessors(block)) {
486 fprintf(stderr, "Invalid loop header flag after %s\n", after);
487 agx_print_block(block, stderr);
488 fail = true;
489 }
490 }
491
492 {
493 BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc));
494
495 agx_foreach_instr_global(ctx, I) {
496 if (!agx_validate_defs(I, defs)) {
497 fprintf(stderr, "Invalid defs after %s\n", after);
498 agx_print_instr(I, stderr);
499 fail = true;
500 }
501 }
502
503 free(defs);
504 }
505
506 agx_foreach_instr_global(ctx, I) {
507 if (!agx_validate_sources(I)) {
508 fprintf(stderr, "Invalid sources form after %s\n", after);
509 agx_print_instr(I, stderr);
510 fail = true;
511 }
512
513 if (!agx_validate_sr(I)) {
514 fprintf(stderr, "Invalid SR after %s\n", after);
515 agx_print_instr(I, stdout);
516 fail = true;
517 }
518 }
519
520 if (!agx_validate_width(ctx)) {
521 fprintf(stderr, "Invalid vectors after %s\n", after);
522 fail = true;
523 }
524
525 if (fail) {
526 agx_print_shader(ctx, stderr);
527 exit(1);
528 }
529 }
530
531 #endif /* NDEBUG */
532