xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_shared_folding.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Valve Corporation.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /* Try to fold a shared -> non-shared mov into the instruction producing the
7  * shared src. We do this aggresively, even if there are other uses of the
8  * source, on the assumption that the "default" state should be non-shared and
9  * we should be able to fold the other sources eventually.
10  */
11 
12 #include "util/ralloc.h"
13 
14 #include "ir3.h"
15 
16 static bool
try_shared_folding(struct ir3_instruction * mov,void * mem_ctx)17 try_shared_folding(struct ir3_instruction *mov, void *mem_ctx)
18 {
19    if (mov->opc != OPC_MOV)
20       return false;
21 
22    if ((mov->dsts[0]->flags & IR3_REG_SHARED) ||
23        !(mov->srcs[0]->flags & IR3_REG_SHARED))
24       return false;
25 
26    struct ir3_instruction *src = ssa(mov->srcs[0]);
27    if (!src)
28       return false;
29 
30    if (mov->cat1.dst_type != mov->cat1.src_type) {
31       /* Check if the conversion can be folded into the source by ir3_cf */
32       bool can_fold;
33       type_t output_type = ir3_output_conv_type(src, &can_fold);
34       if (!can_fold || output_type != TYPE_U32)
35          return false;
36       foreach_ssa_use (use, src) {
37          if (use->opc != OPC_MOV ||
38              use->cat1.src_type != mov->cat1.src_type ||
39              use->cat1.dst_type != mov->cat1.dst_type)
40             return false;
41       }
42    }
43 
44    if (src->opc == OPC_META_PHI) {
45       struct ir3_block *block = src->block;
46       for (unsigned i = 0; i < block->predecessors_count; i++) {
47          struct ir3_block *pred = block->predecessors[i];
48          if (src->srcs[i]->def) {
49             struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1);
50             __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF);
51             unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
52                (src->srcs[i]->flags & IR3_REG_HALF);
53             ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
54                src->srcs[i]->def;
55             pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
56                (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
57 
58             _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src);
59             _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov);
60             src->srcs[i]->def = pred_mov->dsts[0];
61          }
62          src->srcs[i]->flags &= ~IR3_REG_SHARED;
63       }
64    } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) {
65       /* cat2 vector ALU instructions cannot have both shared sources */
66       if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) &&
67           (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST)))
68          return false;
69    } else if (opc_cat(src->opc) == 3) {
70       /* cat3 vector ALU instructions cannot have src1 shared */
71       if (src->srcs[1]->flags & IR3_REG_SHARED)
72          return false;
73    } else if (src->opc == OPC_LDC) {
74       src->flags &= ~IR3_INSTR_U;
75    } else {
76       return false;
77    }
78 
79    /* Remove IR3_REG_SHARED from the original destination, which should make the
80     * mov trivial so that it can be cleaned up later by copy prop.
81     */
82    src->dsts[0]->flags &= ~IR3_REG_SHARED;
83    mov->srcs[0]->flags &= ~IR3_REG_SHARED;
84 
85    /* Insert a copy to shared for uses other than this move instruction. */
86    struct ir3_instruction *shared_mov = NULL;
87    foreach_ssa_use (use, src) {
88       if (use == mov)
89          continue;
90 
91       if (!shared_mov) {
92          shared_mov = ir3_MOV(src->block, src, mov->cat1.src_type);
93          shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
94          if (src->opc == OPC_META_PHI)
95             ir3_instr_move_after_phis(shared_mov, src->block);
96          else
97             ir3_instr_move_after(shared_mov, src);
98          shared_mov->uses = _mesa_pointer_set_create(mem_ctx);
99       }
100 
101       for (unsigned i = 0; i < use->srcs_count; i++) {
102          if (use->srcs[i]->def == src->dsts[0])
103             use->srcs[i]->def = shared_mov->dsts[0];
104       }
105       _mesa_set_add(shared_mov->uses, use);
106    }
107 
108    return true;
109 }
110 
111 bool
ir3_shared_fold(struct ir3 * ir)112 ir3_shared_fold(struct ir3 *ir)
113 {
114    void *mem_ctx = ralloc_context(NULL);
115    bool progress = false;
116 
117    ir3_find_ssa_uses(ir, mem_ctx, false);
118 
119    /* Folding a phi can push the mov up to its sources, so iterate blocks in
120     * reverse to try and convert an entire phi-web in one go.
121     */
122    foreach_block_rev (block, &ir->block_list) {
123       foreach_instr (instr, &block->instr_list) {
124          progress |= try_shared_folding(instr, mem_ctx);
125       }
126    }
127 
128    ralloc_free(mem_ctx);
129 
130    return progress;
131 }
132 
133