xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_validate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * Copyright 2021 Collabora, Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "util/compiler.h"
8 #include "agx_compiler.h"
9 #include "agx_debug.h"
10 #include "agx_opcodes.h"
11 
12 /* Validatation doesn't make sense in release builds */
13 #ifndef NDEBUG
14 
15 #define agx_validate_assert(stmt)                                              \
16    if (!(stmt)) {                                                              \
17       return false;                                                            \
18    }
19 
20 /*
21  * If a block contains phi nodes, they must come at the start of the block. If a
22  * block contains control flow, it must come at the beginning/end as applicable.
23  * Therefore the form of a valid block is:
24  *
25  *       Control flow instructions (else)
26  *       Phi nodes
27  *       General instructions
28  *       Control flow instructions (except else)
29  *
30  * Validate that this form is satisfied.
31  */
32 enum agx_block_state {
33    AGX_BLOCK_STATE_CF_ELSE = 0,
34    AGX_BLOCK_STATE_PHI = 1,
35    AGX_BLOCK_STATE_BODY = 2,
36    AGX_BLOCK_STATE_CF = 3
37 };
38 
39 static bool
agx_validate_block_form(agx_block * block)40 agx_validate_block_form(agx_block *block)
41 {
42    enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE;
43 
44    agx_foreach_instr_in_block(block, I) {
45       switch (I->op) {
46       case AGX_OPCODE_PRELOAD:
47       case AGX_OPCODE_ELSE_ICMP:
48       case AGX_OPCODE_ELSE_FCMP:
49          agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE);
50          break;
51 
52       case AGX_OPCODE_PHI:
53          agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE ||
54                              state == AGX_BLOCK_STATE_PHI);
55 
56          state = AGX_BLOCK_STATE_PHI;
57          break;
58 
59       case AGX_OPCODE_EXPORT:
60          agx_validate_assert(agx_num_successors(block) == 0);
61          state = AGX_BLOCK_STATE_CF;
62          break;
63 
64       default:
65          if (instr_after_logical_end(I)) {
66             state = AGX_BLOCK_STATE_CF;
67          } else {
68             agx_validate_assert(state != AGX_BLOCK_STATE_CF);
69             state = AGX_BLOCK_STATE_BODY;
70          }
71          break;
72       }
73    }
74 
75    return true;
76 }
77 
78 /*
79  * Only moves and phis use stack. Phis cannot use moves due to their
80  * parallel nature, so we allow phis to take memory, later lowered to moves.
81  */
82 static bool
is_stack_valid(agx_instr * I)83 is_stack_valid(agx_instr *I)
84 {
85    return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI);
86 }
87 
88 static bool
agx_validate_sources(agx_instr * I)89 agx_validate_sources(agx_instr *I)
90 {
91    agx_foreach_src(I, s) {
92       agx_index src = I->src[s];
93 
94       if (src.type == AGX_INDEX_IMMEDIATE) {
95          agx_validate_assert(!src.kill);
96          agx_validate_assert(!src.cache);
97          agx_validate_assert(!src.discard);
98 
99          bool ldst = agx_allows_16bit_immediate(I);
100 
101          /* Immediates are encoded as 8-bit (16-bit for memory load/store). For
102           * integers, they extend to 16-bit. For floating point, they are 8-bit
103           * minifloats. The 8-bit minifloats are a strict subset of 16-bit
104           * standard floats, so we treat them as such in the IR, with an
105           * implicit f16->f32 for 32-bit floating point operations.
106           */
107          agx_validate_assert(src.size == AGX_SIZE_16);
108          agx_validate_assert(src.value < (1 << (ldst ? 16 : 8)));
109       } else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) {
110          agx_validate_assert(src.size == I->src[0].size);
111       } else if (I->op == AGX_OPCODE_PHI) {
112          agx_validate_assert(src.size == I->dest[0].size);
113          agx_validate_assert(!agx_is_null(src));
114       }
115 
116       agx_validate_assert(!src.memory || is_stack_valid(I));
117    }
118 
119    return true;
120 }
121 
122 static bool
agx_validate_defs(agx_instr * I,BITSET_WORD * defs)123 agx_validate_defs(agx_instr *I, BITSET_WORD *defs)
124 {
125    agx_foreach_ssa_src(I, s) {
126       /* Skip phis, they're special in loop headers */
127       if (I->op == AGX_OPCODE_PHI)
128          break;
129 
130       /* Sources must be defined before their use */
131       if (!BITSET_TEST(defs, I->src[s].value))
132          return false;
133    }
134 
135    agx_foreach_ssa_dest(I, d) {
136       /* Static single assignment */
137       if (BITSET_TEST(defs, I->dest[d].value))
138          return false;
139 
140       BITSET_SET(defs, I->dest[d].value);
141 
142       if (I->dest[d].memory && !is_stack_valid(I))
143          return false;
144    }
145 
146    return true;
147 }
148 
149 /** Returns number of registers written by an instruction */
150 static unsigned
agx_write_registers(const agx_instr * I,unsigned d)151 agx_write_registers(const agx_instr *I, unsigned d)
152 {
153    unsigned size = agx_size_align_16(I->dest[d].size);
154 
155    switch (I->op) {
156    case AGX_OPCODE_MOV:
157    case AGX_OPCODE_PHI:
158       /* Tautological */
159       return agx_index_size_16(I->dest[d]);
160 
161    case AGX_OPCODE_ITER:
162    case AGX_OPCODE_ITERPROJ:
163       assert(1 <= I->channels && I->channels <= 4);
164       return I->channels * size;
165 
166    case AGX_OPCODE_IMAGE_LOAD:
167    case AGX_OPCODE_TEXTURE_LOAD:
168    case AGX_OPCODE_TEXTURE_SAMPLE:
169       /* Even when masked out, these clobber 4 registers */
170       return 4 * size;
171 
172    case AGX_OPCODE_DEVICE_LOAD:
173    case AGX_OPCODE_LOCAL_LOAD:
174    case AGX_OPCODE_STACK_LOAD:
175    case AGX_OPCODE_LD_TILE:
176       /* Can write 16-bit or 32-bit. Anything logically 64-bit is already
177        * expanded to 32-bit in the mask.
178        */
179       return util_bitcount(I->mask) * MIN2(size, 2);
180 
181    case AGX_OPCODE_LDCF:
182       return 6;
183    case AGX_OPCODE_COLLECT:
184       return I->nr_srcs * agx_size_align_16(I->src[0].size);
185    default:
186       return size;
187    }
188 }
189 
190 struct dim_info {
191    unsigned comps;
192    bool array;
193 };
194 
195 static struct dim_info
agx_dim_info(enum agx_dim dim)196 agx_dim_info(enum agx_dim dim)
197 {
198    switch (dim) {
199    case AGX_DIM_1D:
200       return (struct dim_info){1, false};
201    case AGX_DIM_1D_ARRAY:
202       return (struct dim_info){1, true};
203    case AGX_DIM_2D:
204       return (struct dim_info){2, false};
205    case AGX_DIM_2D_ARRAY:
206       return (struct dim_info){2, true};
207    case AGX_DIM_2D_MS:
208       return (struct dim_info){3, false};
209    case AGX_DIM_3D:
210       return (struct dim_info){3, false};
211    case AGX_DIM_CUBE:
212       return (struct dim_info){3, false};
213    case AGX_DIM_CUBE_ARRAY:
214       return (struct dim_info){3, true};
215    case AGX_DIM_2D_MS_ARRAY:
216       return (struct dim_info){2, true};
217    default:
218       unreachable("invalid dim");
219    }
220 }
221 
222 /*
223  * Return number of registers required for coordinates for a texture/image
224  * instruction. We handle layer + sample index as 32-bit even when only the
225  * lower 16-bits are present. LOD queries do not take a layer.
226  */
227 static unsigned
agx_coordinate_registers(const agx_instr * I)228 agx_coordinate_registers(const agx_instr *I)
229 {
230    struct dim_info dim = agx_dim_info(I->dim);
231    bool has_array = !I->query_lod;
232 
233    return 2 * (dim.comps + (has_array && dim.array));
234 }
235 
236 static unsigned
agx_read_registers(const agx_instr * I,unsigned s)237 agx_read_registers(const agx_instr *I, unsigned s)
238 {
239    unsigned size = agx_size_align_16(I->src[s].size);
240 
241    switch (I->op) {
242    case AGX_OPCODE_MOV:
243    case AGX_OPCODE_EXPORT:
244       /* Tautological */
245       return agx_index_size_16(I->src[0]);
246 
247    case AGX_OPCODE_PHI:
248       if (I->src[s].type == AGX_INDEX_IMMEDIATE)
249          return size;
250       else
251          return agx_index_size_16(I->dest[0]);
252 
253    case AGX_OPCODE_SPLIT:
254       return I->nr_dests * agx_size_align_16(agx_split_width(I));
255 
256    case AGX_OPCODE_UNIFORM_STORE:
257       if (s == 0)
258          return util_bitcount(I->mask) * size;
259       else
260          return size;
261 
262    case AGX_OPCODE_DEVICE_STORE:
263    case AGX_OPCODE_LOCAL_STORE:
264    case AGX_OPCODE_STACK_STORE:
265    case AGX_OPCODE_ST_TILE:
266       /* See agx_write_registers */
267       if (s == 0)
268          return util_bitcount(I->mask) * MIN2(size, 2);
269       else if (s == 2 && I->explicit_coords)
270          return 2;
271       else
272          return size;
273 
274    case AGX_OPCODE_ZS_EMIT:
275       if (s == 1) {
276          /* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad
277           * up to u32 for simplicity
278           */
279          bool z = !!(I->zs & 1);
280          bool s = !!(I->zs & 2);
281          assert(z || s);
282 
283          return (z && s) ? 4 : z ? 2 : 1;
284       } else {
285          return 1;
286       }
287 
288    case AGX_OPCODE_IMAGE_WRITE:
289       if (s == 0)
290          return 4 * size /* data */;
291       else if (s == 1)
292          return agx_coordinate_registers(I);
293       else
294          return size;
295 
296    case AGX_OPCODE_IMAGE_LOAD:
297    case AGX_OPCODE_TEXTURE_LOAD:
298    case AGX_OPCODE_TEXTURE_SAMPLE:
299       if (s == 0) {
300          return agx_coordinate_registers(I);
301       } else if (s == 1) {
302          /* LOD */
303          if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD ||
304              I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN) {
305 
306             /* Technically only 16-bit but we model as 32-bit to keep the IR
307              * simple, since the gradient is otherwise 32-bit.
308              */
309             unsigned min = I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN ? 2 : 0;
310 
311             switch (I->dim) {
312             case AGX_DIM_1D:
313             case AGX_DIM_1D_ARRAY:
314                return (2 * 2 * 1) + min;
315             case AGX_DIM_2D:
316             case AGX_DIM_2D_ARRAY:
317             case AGX_DIM_2D_MS_ARRAY:
318             case AGX_DIM_2D_MS:
319                return (2 * 2 * 2) + min;
320             case AGX_DIM_CUBE:
321             case AGX_DIM_CUBE_ARRAY:
322             case AGX_DIM_3D:
323                return (2 * 2 * 3) + min;
324             }
325 
326             unreachable("Invalid texture dimension");
327          } else if (I->lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) {
328             return 2;
329          } else {
330             return 1;
331          }
332       } else if (s == 5) {
333          /* Compare/offset */
334          return 2 * ((!!I->shadow) + (!!I->offset));
335       } else {
336          return size;
337       }
338 
339    case AGX_OPCODE_BLOCK_IMAGE_STORE:
340       if (s == 3 && I->explicit_coords)
341          return agx_coordinate_registers(I);
342       else
343          return size;
344 
345    case AGX_OPCODE_ATOMIC:
346    case AGX_OPCODE_LOCAL_ATOMIC:
347       if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG)
348          return size * 2;
349       else
350          return size;
351 
352    default:
353       return size;
354    }
355 }
356 
357 /* Type check the dimensionality of sources and destinations. */
358 static bool
agx_validate_width(agx_context * ctx)359 agx_validate_width(agx_context *ctx)
360 {
361    bool succ = true;
362 
363    agx_foreach_instr_global(ctx, I) {
364       agx_foreach_dest(I, d) {
365          unsigned exp = agx_write_registers(I, d);
366          unsigned act =
367             agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size);
368 
369          if (exp != act) {
370             succ = false;
371             fprintf(stderr, "destination %u, expected width %u, got width %u\n",
372                     d, exp, act);
373             agx_print_instr(I, stderr);
374             fprintf(stderr, "\n");
375          }
376       }
377 
378       agx_foreach_src(I, s) {
379          if (I->src[s].type == AGX_INDEX_NULL)
380             continue;
381 
382          unsigned exp = agx_read_registers(I, s);
383          unsigned act =
384             agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size);
385 
386          if (exp != act) {
387             succ = false;
388             fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
389                     exp, act);
390             agx_print_instr(I, stderr);
391             fprintf(stderr, "\n");
392          }
393       }
394    }
395 
396    return succ;
397 }
398 
399 static bool
agx_validate_predecessors(agx_block * block)400 agx_validate_predecessors(agx_block *block)
401 {
402    /* Loop headers (only) have predecessors that are later in source form */
403    bool has_later_preds = false;
404 
405    agx_foreach_predecessor(block, pred) {
406       if ((*pred)->index >= block->index)
407          has_later_preds = true;
408    }
409 
410    if (has_later_preds && !block->loop_header)
411       return false;
412 
413    /* Successors and predecessors are found together */
414    agx_foreach_predecessor(block, pred) {
415       bool found = false;
416 
417       agx_foreach_successor((*pred), succ) {
418          if (succ == block)
419             found = true;
420       }
421 
422       if (!found)
423          return false;
424    }
425 
426    return true;
427 }
428 
429 static bool
agx_validate_sr(const agx_instr * I)430 agx_validate_sr(const agx_instr *I)
431 {
432    bool none = (I->op == AGX_OPCODE_GET_SR);
433    bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE);
434    bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER);
435 
436    /* Filter get_sr instructions */
437    if (!(none || coverage || barrier))
438       return true;
439 
440    switch (I->sr) {
441    case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD:
442    case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP:
443    case AGX_SR_TOTAL_ACTIVE_THREADS_IN_QUAD:
444    case AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP:
445    case AGX_SR_COVERAGE_MASK:
446    case AGX_SR_IS_ACTIVE_THREAD:
447       return coverage;
448 
449    case AGX_SR_HELPER_OP:
450    case AGX_SR_HELPER_ARG_L:
451    case AGX_SR_HELPER_ARG_H:
452       return barrier;
453 
454    default:
455       return none;
456    }
457 }
458 
459 void
agx_validate(agx_context * ctx,const char * after)460 agx_validate(agx_context *ctx, const char *after)
461 {
462    bool fail = false;
463 
464    if (agx_compiler_debug & AGX_DBG_NOVALIDATE)
465       return;
466 
467    int last_index = -1;
468 
469    agx_foreach_block(ctx, block) {
470       if ((int)block->index < last_index) {
471          fprintf(stderr, "Out-of-order block index %d vs %d after %s\n",
472                  block->index, last_index, after);
473          agx_print_block(block, stderr);
474          fail = true;
475       }
476 
477       last_index = block->index;
478 
479       if (!agx_validate_block_form(block)) {
480          fprintf(stderr, "Invalid block form after %s\n", after);
481          agx_print_block(block, stderr);
482          fail = true;
483       }
484 
485       if (!agx_validate_predecessors(block)) {
486          fprintf(stderr, "Invalid loop header flag after %s\n", after);
487          agx_print_block(block, stderr);
488          fail = true;
489       }
490    }
491 
492    {
493       BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc));
494 
495       agx_foreach_instr_global(ctx, I) {
496          if (!agx_validate_defs(I, defs)) {
497             fprintf(stderr, "Invalid defs after %s\n", after);
498             agx_print_instr(I, stderr);
499             fail = true;
500          }
501       }
502 
503       free(defs);
504    }
505 
506    agx_foreach_instr_global(ctx, I) {
507       if (!agx_validate_sources(I)) {
508          fprintf(stderr, "Invalid sources form after %s\n", after);
509          agx_print_instr(I, stderr);
510          fail = true;
511       }
512 
513       if (!agx_validate_sr(I)) {
514          fprintf(stderr, "Invalid SR after %s\n", after);
515          agx_print_instr(I, stdout);
516          fail = true;
517       }
518    }
519 
520    if (!agx_validate_width(ctx)) {
521       fprintf(stderr, "Invalid vectors after %s\n", after);
522       fail = true;
523    }
524 
525    if (fail) {
526       agx_print_shader(ctx, stderr);
527       exit(1);
528    }
529 }
530 
531 #endif /* NDEBUG */
532