/* * Copyright 2022 Alyssa Rosenzweig * Copyright 2021 Collabora, Ltd. * SPDX-License-Identifier: MIT */ #include "util/compiler.h" #include "agx_compiler.h" #include "agx_debug.h" #include "agx_opcodes.h" /* Validatation doesn't make sense in release builds */ #ifndef NDEBUG #define agx_validate_assert(stmt) \ if (!(stmt)) { \ return false; \ } /* * If a block contains phi nodes, they must come at the start of the block. If a * block contains control flow, it must come at the beginning/end as applicable. * Therefore the form of a valid block is: * * Control flow instructions (else) * Phi nodes * General instructions * Control flow instructions (except else) * * Validate that this form is satisfied. */ enum agx_block_state { AGX_BLOCK_STATE_CF_ELSE = 0, AGX_BLOCK_STATE_PHI = 1, AGX_BLOCK_STATE_BODY = 2, AGX_BLOCK_STATE_CF = 3 }; static bool agx_validate_block_form(agx_block *block) { enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE; agx_foreach_instr_in_block(block, I) { switch (I->op) { case AGX_OPCODE_PRELOAD: case AGX_OPCODE_ELSE_ICMP: case AGX_OPCODE_ELSE_FCMP: agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE); break; case AGX_OPCODE_PHI: agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE || state == AGX_BLOCK_STATE_PHI); state = AGX_BLOCK_STATE_PHI; break; case AGX_OPCODE_EXPORT: agx_validate_assert(agx_num_successors(block) == 0); state = AGX_BLOCK_STATE_CF; break; default: if (instr_after_logical_end(I)) { state = AGX_BLOCK_STATE_CF; } else { agx_validate_assert(state != AGX_BLOCK_STATE_CF); state = AGX_BLOCK_STATE_BODY; } break; } } return true; } /* * Only moves and phis use stack. Phis cannot use moves due to their * parallel nature, so we allow phis to take memory, later lowered to moves. */ static bool is_stack_valid(agx_instr *I) { return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI); } static bool agx_validate_sources(agx_instr *I) { agx_foreach_src(I, s) { agx_index src = I->src[s]; if (src.type == AGX_INDEX_IMMEDIATE) { agx_validate_assert(!src.kill); agx_validate_assert(!src.cache); agx_validate_assert(!src.discard); bool ldst = agx_allows_16bit_immediate(I); /* Immediates are encoded as 8-bit (16-bit for memory load/store). For * integers, they extend to 16-bit. For floating point, they are 8-bit * minifloats. The 8-bit minifloats are a strict subset of 16-bit * standard floats, so we treat them as such in the IR, with an * implicit f16->f32 for 32-bit floating point operations. */ agx_validate_assert(src.size == AGX_SIZE_16); agx_validate_assert(src.value < (1 << (ldst ? 16 : 8))); } else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) { agx_validate_assert(src.size == I->src[0].size); } else if (I->op == AGX_OPCODE_PHI) { agx_validate_assert(src.size == I->dest[0].size); agx_validate_assert(!agx_is_null(src)); } agx_validate_assert(!src.memory || is_stack_valid(I)); } return true; } static bool agx_validate_defs(agx_instr *I, BITSET_WORD *defs) { agx_foreach_ssa_src(I, s) { /* Skip phis, they're special in loop headers */ if (I->op == AGX_OPCODE_PHI) break; /* Sources must be defined before their use */ if (!BITSET_TEST(defs, I->src[s].value)) return false; } agx_foreach_ssa_dest(I, d) { /* Static single assignment */ if (BITSET_TEST(defs, I->dest[d].value)) return false; BITSET_SET(defs, I->dest[d].value); if (I->dest[d].memory && !is_stack_valid(I)) return false; } return true; } /** Returns number of registers written by an instruction */ static unsigned agx_write_registers(const agx_instr *I, unsigned d) { unsigned size = agx_size_align_16(I->dest[d].size); switch (I->op) { case AGX_OPCODE_MOV: case AGX_OPCODE_PHI: /* Tautological */ return agx_index_size_16(I->dest[d]); case AGX_OPCODE_ITER: case AGX_OPCODE_ITERPROJ: assert(1 <= I->channels && I->channels <= 4); return I->channels * size; case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: /* Even when masked out, these clobber 4 registers */ return 4 * size; case AGX_OPCODE_DEVICE_LOAD: case AGX_OPCODE_LOCAL_LOAD: case AGX_OPCODE_STACK_LOAD: case AGX_OPCODE_LD_TILE: /* Can write 16-bit or 32-bit. Anything logically 64-bit is already * expanded to 32-bit in the mask. */ return util_bitcount(I->mask) * MIN2(size, 2); case AGX_OPCODE_LDCF: return 6; case AGX_OPCODE_COLLECT: return I->nr_srcs * agx_size_align_16(I->src[0].size); default: return size; } } struct dim_info { unsigned comps; bool array; }; static struct dim_info agx_dim_info(enum agx_dim dim) { switch (dim) { case AGX_DIM_1D: return (struct dim_info){1, false}; case AGX_DIM_1D_ARRAY: return (struct dim_info){1, true}; case AGX_DIM_2D: return (struct dim_info){2, false}; case AGX_DIM_2D_ARRAY: return (struct dim_info){2, true}; case AGX_DIM_2D_MS: return (struct dim_info){3, false}; case AGX_DIM_3D: return (struct dim_info){3, false}; case AGX_DIM_CUBE: return (struct dim_info){3, false}; case AGX_DIM_CUBE_ARRAY: return (struct dim_info){3, true}; case AGX_DIM_2D_MS_ARRAY: return (struct dim_info){2, true}; default: unreachable("invalid dim"); } } /* * Return number of registers required for coordinates for a texture/image * instruction. We handle layer + sample index as 32-bit even when only the * lower 16-bits are present. LOD queries do not take a layer. */ static unsigned agx_coordinate_registers(const agx_instr *I) { struct dim_info dim = agx_dim_info(I->dim); bool has_array = !I->query_lod; return 2 * (dim.comps + (has_array && dim.array)); } static unsigned agx_read_registers(const agx_instr *I, unsigned s) { unsigned size = agx_size_align_16(I->src[s].size); switch (I->op) { case AGX_OPCODE_MOV: case AGX_OPCODE_EXPORT: /* Tautological */ return agx_index_size_16(I->src[0]); case AGX_OPCODE_PHI: if (I->src[s].type == AGX_INDEX_IMMEDIATE) return size; else return agx_index_size_16(I->dest[0]); case AGX_OPCODE_SPLIT: return I->nr_dests * agx_size_align_16(agx_split_width(I)); case AGX_OPCODE_UNIFORM_STORE: if (s == 0) return util_bitcount(I->mask) * size; else return size; case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_LOCAL_STORE: case AGX_OPCODE_STACK_STORE: case AGX_OPCODE_ST_TILE: /* See agx_write_registers */ if (s == 0) return util_bitcount(I->mask) * MIN2(size, 2); else if (s == 2 && I->explicit_coords) return 2; else return size; case AGX_OPCODE_ZS_EMIT: if (s == 1) { /* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad * up to u32 for simplicity */ bool z = !!(I->zs & 1); bool s = !!(I->zs & 2); assert(z || s); return (z && s) ? 4 : z ? 2 : 1; } else { return 1; } case AGX_OPCODE_IMAGE_WRITE: if (s == 0) return 4 * size /* data */; else if (s == 1) return agx_coordinate_registers(I); else return size; case AGX_OPCODE_IMAGE_LOAD: case AGX_OPCODE_TEXTURE_LOAD: case AGX_OPCODE_TEXTURE_SAMPLE: if (s == 0) { return agx_coordinate_registers(I); } else if (s == 1) { /* LOD */ if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD || I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN) { /* Technically only 16-bit but we model as 32-bit to keep the IR * simple, since the gradient is otherwise 32-bit. */ unsigned min = I->lod_mode == AGX_LOD_MODE_LOD_GRAD_MIN ? 2 : 0; switch (I->dim) { case AGX_DIM_1D: case AGX_DIM_1D_ARRAY: return (2 * 2 * 1) + min; case AGX_DIM_2D: case AGX_DIM_2D_ARRAY: case AGX_DIM_2D_MS_ARRAY: case AGX_DIM_2D_MS: return (2 * 2 * 2) + min; case AGX_DIM_CUBE: case AGX_DIM_CUBE_ARRAY: case AGX_DIM_3D: return (2 * 2 * 3) + min; } unreachable("Invalid texture dimension"); } else if (I->lod_mode == AGX_LOD_MODE_AUTO_LOD_BIAS_MIN) { return 2; } else { return 1; } } else if (s == 5) { /* Compare/offset */ return 2 * ((!!I->shadow) + (!!I->offset)); } else { return size; } case AGX_OPCODE_BLOCK_IMAGE_STORE: if (s == 3 && I->explicit_coords) return agx_coordinate_registers(I); else return size; case AGX_OPCODE_ATOMIC: case AGX_OPCODE_LOCAL_ATOMIC: if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG) return size * 2; else return size; default: return size; } } /* Type check the dimensionality of sources and destinations. */ static bool agx_validate_width(agx_context *ctx) { bool succ = true; agx_foreach_instr_global(ctx, I) { agx_foreach_dest(I, d) { unsigned exp = agx_write_registers(I, d); unsigned act = agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size); if (exp != act) { succ = false; fprintf(stderr, "destination %u, expected width %u, got width %u\n", d, exp, act); agx_print_instr(I, stderr); fprintf(stderr, "\n"); } } agx_foreach_src(I, s) { if (I->src[s].type == AGX_INDEX_NULL) continue; unsigned exp = agx_read_registers(I, s); unsigned act = agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size); if (exp != act) { succ = false; fprintf(stderr, "source %u, expected width %u, got width %u\n", s, exp, act); agx_print_instr(I, stderr); fprintf(stderr, "\n"); } } } return succ; } static bool agx_validate_predecessors(agx_block *block) { /* Loop headers (only) have predecessors that are later in source form */ bool has_later_preds = false; agx_foreach_predecessor(block, pred) { if ((*pred)->index >= block->index) has_later_preds = true; } if (has_later_preds && !block->loop_header) return false; /* Successors and predecessors are found together */ agx_foreach_predecessor(block, pred) { bool found = false; agx_foreach_successor((*pred), succ) { if (succ == block) found = true; } if (!found) return false; } return true; } static bool agx_validate_sr(const agx_instr *I) { bool none = (I->op == AGX_OPCODE_GET_SR); bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE); bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER); /* Filter get_sr instructions */ if (!(none || coverage || barrier)) return true; switch (I->sr) { case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD: case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP: case AGX_SR_TOTAL_ACTIVE_THREADS_IN_QUAD: case AGX_SR_TOTAL_ACTIVE_THREADS_IN_SUBGROUP: case AGX_SR_COVERAGE_MASK: case AGX_SR_IS_ACTIVE_THREAD: return coverage; case AGX_SR_HELPER_OP: case AGX_SR_HELPER_ARG_L: case AGX_SR_HELPER_ARG_H: return barrier; default: return none; } } void agx_validate(agx_context *ctx, const char *after) { bool fail = false; if (agx_compiler_debug & AGX_DBG_NOVALIDATE) return; int last_index = -1; agx_foreach_block(ctx, block) { if ((int)block->index < last_index) { fprintf(stderr, "Out-of-order block index %d vs %d after %s\n", block->index, last_index, after); agx_print_block(block, stderr); fail = true; } last_index = block->index; if (!agx_validate_block_form(block)) { fprintf(stderr, "Invalid block form after %s\n", after); agx_print_block(block, stderr); fail = true; } if (!agx_validate_predecessors(block)) { fprintf(stderr, "Invalid loop header flag after %s\n", after); agx_print_block(block, stderr); fail = true; } } { BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc)); agx_foreach_instr_global(ctx, I) { if (!agx_validate_defs(I, defs)) { fprintf(stderr, "Invalid defs after %s\n", after); agx_print_instr(I, stderr); fail = true; } } free(defs); } agx_foreach_instr_global(ctx, I) { if (!agx_validate_sources(I)) { fprintf(stderr, "Invalid sources form after %s\n", after); agx_print_instr(I, stderr); fail = true; } if (!agx_validate_sr(I)) { fprintf(stderr, "Invalid SR after %s\n", after); agx_print_instr(I, stdout); fail = true; } } if (!agx_validate_width(ctx)) { fprintf(stderr, "Invalid vectors after %s\n", after); fail = true; } if (fail) { agx_print_shader(ctx, stderr); exit(1); } } #endif /* NDEBUG */