1 /*
2 * Copyright © 2023 Valve Corporation.
3 * SPDX-License-Identifier: MIT
4 */
5
6 /* Try to fold a shared -> non-shared mov into the instruction producing the
7 * shared src. We do this aggresively, even if there are other uses of the
8 * source, on the assumption that the "default" state should be non-shared and
9 * we should be able to fold the other sources eventually.
10 */
11
12 #include "util/ralloc.h"
13
14 #include "ir3.h"
15
16 static bool
try_shared_folding(struct ir3_instruction * mov,void * mem_ctx)17 try_shared_folding(struct ir3_instruction *mov, void *mem_ctx)
18 {
19 if (mov->opc != OPC_MOV)
20 return false;
21
22 if ((mov->dsts[0]->flags & IR3_REG_SHARED) ||
23 !(mov->srcs[0]->flags & IR3_REG_SHARED))
24 return false;
25
26 struct ir3_instruction *src = ssa(mov->srcs[0]);
27 if (!src)
28 return false;
29
30 if (mov->cat1.dst_type != mov->cat1.src_type) {
31 /* Check if the conversion can be folded into the source by ir3_cf */
32 bool can_fold;
33 type_t output_type = ir3_output_conv_type(src, &can_fold);
34 if (!can_fold || output_type != TYPE_U32)
35 return false;
36 foreach_ssa_use (use, src) {
37 if (use->opc != OPC_MOV ||
38 use->cat1.src_type != mov->cat1.src_type ||
39 use->cat1.dst_type != mov->cat1.dst_type)
40 return false;
41 }
42 }
43
44 if (src->opc == OPC_META_PHI) {
45 struct ir3_block *block = src->block;
46 for (unsigned i = 0; i < block->predecessors_count; i++) {
47 struct ir3_block *pred = block->predecessors[i];
48 if (src->srcs[i]->def) {
49 struct ir3_instruction *pred_mov = ir3_instr_create(pred, OPC_MOV, 1, 1);
50 __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF);
51 unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
52 (src->srcs[i]->flags & IR3_REG_HALF);
53 ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
54 src->srcs[i]->def;
55 pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
56 (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
57
58 _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src);
59 _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov);
60 src->srcs[i]->def = pred_mov->dsts[0];
61 }
62 src->srcs[i]->flags &= ~IR3_REG_SHARED;
63 }
64 } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) {
65 /* cat2 vector ALU instructions cannot have both shared sources */
66 if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) &&
67 (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST)))
68 return false;
69 } else if (opc_cat(src->opc) == 3) {
70 /* cat3 vector ALU instructions cannot have src1 shared */
71 if (src->srcs[1]->flags & IR3_REG_SHARED)
72 return false;
73 } else if (src->opc == OPC_LDC) {
74 src->flags &= ~IR3_INSTR_U;
75 } else {
76 return false;
77 }
78
79 /* Remove IR3_REG_SHARED from the original destination, which should make the
80 * mov trivial so that it can be cleaned up later by copy prop.
81 */
82 src->dsts[0]->flags &= ~IR3_REG_SHARED;
83 mov->srcs[0]->flags &= ~IR3_REG_SHARED;
84
85 /* Insert a copy to shared for uses other than this move instruction. */
86 struct ir3_instruction *shared_mov = NULL;
87 foreach_ssa_use (use, src) {
88 if (use == mov)
89 continue;
90
91 if (!shared_mov) {
92 shared_mov = ir3_MOV(src->block, src, mov->cat1.src_type);
93 shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
94 if (src->opc == OPC_META_PHI)
95 ir3_instr_move_after_phis(shared_mov, src->block);
96 else
97 ir3_instr_move_after(shared_mov, src);
98 shared_mov->uses = _mesa_pointer_set_create(mem_ctx);
99 }
100
101 for (unsigned i = 0; i < use->srcs_count; i++) {
102 if (use->srcs[i]->def == src->dsts[0])
103 use->srcs[i]->def = shared_mov->dsts[0];
104 }
105 _mesa_set_add(shared_mov->uses, use);
106 }
107
108 return true;
109 }
110
111 bool
ir3_shared_fold(struct ir3 * ir)112 ir3_shared_fold(struct ir3 *ir)
113 {
114 void *mem_ctx = ralloc_context(NULL);
115 bool progress = false;
116
117 ir3_find_ssa_uses(ir, mem_ctx, false);
118
119 /* Folding a phi can push the mov up to its sources, so iterate blocks in
120 * reverse to try and convert an entire phi-web in one go.
121 */
122 foreach_block_rev (block, &ir->block_list) {
123 foreach_instr (instr, &block->instr_list) {
124 progress |= try_shared_folding(instr, mem_ctx);
125 }
126 }
127
128 ralloc_free(mem_ctx);
129
130 return progress;
131 }
132
133