/* * Copyright © 2023 Valve Corporation. * SPDX-License-Identifier: MIT */ /* Try to fold a shared -> non-shared mov into the instruction producing the * shared src. We do this aggresively, even if there are other uses of the * source, on the assumption that the "default" state should be non-shared and * we should be able to fold the other sources eventually. */ #include "util/ralloc.h" #include "ir3.h" static bool try_shared_folding(struct ir3_instruction *mov, void *mem_ctx) { if (mov->opc != OPC_MOV) return false; if ((mov->dsts[0]->flags & IR3_REG_SHARED) || !(mov->srcs[0]->flags & IR3_REG_SHARED)) return false; struct ir3_instruction *src = ssa(mov->srcs[0]); if (!src) return false; if (mov->cat1.dst_type != mov->cat1.src_type) { /* Check if the conversion can be folded into the source by ir3_cf */ bool can_fold; type_t output_type = ir3_output_conv_type(src, &can_fold); if (!can_fold || output_type != TYPE_U32) return false; foreach_ssa_use (use, src) { if (use->opc != OPC_MOV || use->cat1.src_type != mov->cat1.src_type || use->cat1.dst_type != mov->cat1.dst_type) return false; } } if (src->opc == OPC_META_PHI) { struct ir3_block *block = src->block; for (unsigned i = 0; i < block->predecessors_count; i++) { struct ir3_block *pred = block->predecessors[i]; if (src->srcs[i]->def) { struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1); __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF); unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED | (src->srcs[i]->flags & IR3_REG_HALF); ir3_src_create(pred_mov, INVALID_REG, src_flags)->def = src->srcs[i]->def; pred_mov->cat1.src_type = pred_mov->cat1.dst_type = (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32; _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src); _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov); src->srcs[i]->def = pred_mov->dsts[0]; } src->srcs[i]->flags &= ~IR3_REG_SHARED; } } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) { /* cat2 vector ALU instructions cannot have both shared sources */ if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) && (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST))) return false; } else if (opc_cat(src->opc) == 3) { /* cat3 vector ALU instructions cannot have src1 shared */ if (src->srcs[1]->flags & IR3_REG_SHARED) return false; } else if (src->opc == OPC_LDC) { src->flags &= ~IR3_INSTR_U; } else { return false; } /* Remove IR3_REG_SHARED from the original destination, which should make the * mov trivial so that it can be cleaned up later by copy prop. */ src->dsts[0]->flags &= ~IR3_REG_SHARED; mov->srcs[0]->flags &= ~IR3_REG_SHARED; /* Insert a copy to shared for uses other than this move instruction. */ struct ir3_instruction *shared_mov = NULL; foreach_ssa_use (use, src) { if (use == mov) continue; if (!shared_mov) { shared_mov = ir3_MOV(src->block, src, mov->cat1.src_type); shared_mov->dsts[0]->flags |= IR3_REG_SHARED; if (src->opc == OPC_META_PHI) ir3_instr_move_after_phis(shared_mov, src->block); else ir3_instr_move_after(shared_mov, src); shared_mov->uses = _mesa_pointer_set_create(mem_ctx); } for (unsigned i = 0; i < use->srcs_count; i++) { if (use->srcs[i]->def == src->dsts[0]) use->srcs[i]->def = shared_mov->dsts[0]; } _mesa_set_add(shared_mov->uses, use); } return true; } bool ir3_shared_fold(struct ir3 *ir) { void *mem_ctx = ralloc_context(NULL); bool progress = false; ir3_find_ssa_uses(ir, mem_ctx, false); /* Folding a phi can push the mov up to its sources, so iterate blocks in * reverse to try and convert an entire phi-web in one go. */ foreach_block_rev (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { progress |= try_shared_folding(instr, mem_ctx); } } ralloc_free(mem_ctx); return progress; }