1 /*
2 * Copyright © 2021 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3_ra.h"
7
8 /* The spilling pass leaves out a few details required to successfully operate
9 * ldp/stp:
10 *
11 * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
12 * that and just spills/restores entire values, including arrays and values
13 * created for texture setup which can be more than 4 components.
14 * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot
15 * or have very large arrays before spilling then we could run out.
16 * 3. The spiller doesn't add barrier dependencies needed for post-RA
17 * scheduling.
18 *
19 * The first one, in particular, is much easier to handle after RA because
20 * arrays and normal values can be treated the same way. Therefore this pass
21 * runs after RA, and handles all three issues. This keeps the complexity out of
22 * the spiller.
23 */
24
25 static unsigned
component_bytes(struct ir3_register * src)26 component_bytes(struct ir3_register *src)
27 {
28 return (src->flags & IR3_REG_HALF) ? 2 : 4;
29 }
30
31 /* Note: this won't work if the base register is anything other than 0!
32 * Dynamic bases, which we'll need for "real" function call support, will
33 * probably be a lot harder to handle and may require reserving another
34 * register.
35 */
36 static void
set_base_reg(struct ir3_instruction * mem,unsigned val)37 set_base_reg(struct ir3_instruction *mem, unsigned val)
38 {
39 struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
40 ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags);
41 ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val;
42 mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
43
44 ir3_instr_move_before(mov, mem);
45 }
46
47 static void
reset_base_reg(struct ir3_instruction * mem)48 reset_base_reg(struct ir3_instruction *mem)
49 {
50 /* If the base register is killed, then we don't need to clobber it and it
51 * may be reused as a destination so we can't always clobber it after the
52 * instruction anyway.
53 */
54 struct ir3_register *base = mem->srcs[0];
55 if (base->flags & IR3_REG_KILL)
56 return;
57
58 struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
59 ir3_dst_create(mov, base->num, base->flags);
60 ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0;
61 mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
62
63 ir3_instr_move_after(mov, mem);
64 }
65
66 /* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset
67 * so it can't be used directly. Therefore only offsets under 1 << 12 can be
68 * used without any adjustments.
69 */
70 #define MAX_CAT6_SIZE (1u << 12)
71
72 static void
handle_oob_offset_spill(struct ir3_instruction * spill)73 handle_oob_offset_spill(struct ir3_instruction *spill)
74 {
75 unsigned components = spill->srcs[2]->uim_val;
76
77 if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE)
78 return;
79
80 set_base_reg(spill, spill->cat6.dst_offset);
81 reset_base_reg(spill);
82 spill->cat6.dst_offset = 0;
83 }
84
85 static void
handle_oob_offset_reload(struct ir3_instruction * reload)86 handle_oob_offset_reload(struct ir3_instruction *reload)
87 {
88 unsigned components = reload->srcs[2]->uim_val;
89 unsigned offset = reload->srcs[1]->uim_val;
90 if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE)
91 return;
92
93 set_base_reg(reload, offset);
94 reset_base_reg(reload);
95 reload->srcs[1]->uim_val = 0;
96 }
97
98 static void
split_spill(struct ir3_instruction * spill)99 split_spill(struct ir3_instruction *spill)
100 {
101 unsigned orig_components = spill->srcs[2]->uim_val;
102
103 /* We don't handle splitting dependencies. */
104 assert(spill->deps_count == 0);
105
106 if (orig_components <= 4) {
107 if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
108 spill->srcs[1]->wrmask = MASK(orig_components);
109 spill->srcs[1]->num = spill->srcs[1]->array.base;
110 spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
111 }
112 return;
113 }
114
115 for (unsigned comp = 0; comp < orig_components; comp += 4) {
116 unsigned components = MIN2(orig_components - comp, 4);
117 struct ir3_instruction *clone = ir3_instr_clone(spill);
118 ir3_instr_move_before(clone, spill);
119
120 clone->srcs[1]->wrmask = MASK(components);
121 if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
122 clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
123 clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
124 } else {
125 clone->srcs[1]->num += comp;
126 }
127
128 clone->srcs[2]->uim_val = components;
129 clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]);
130 }
131
132 list_delinit(&spill->node);
133 }
134
135 static void
split_reload(struct ir3_instruction * reload)136 split_reload(struct ir3_instruction *reload)
137 {
138 unsigned orig_components = reload->srcs[2]->uim_val;
139
140 assert(reload->deps_count == 0);
141
142 if (orig_components <= 4) {
143 if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
144 reload->dsts[0]->wrmask = MASK(orig_components);
145 reload->dsts[0]->num = reload->dsts[0]->array.base;
146 reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
147 }
148 return;
149 }
150
151 for (unsigned comp = 0; comp < orig_components; comp += 4) {
152 unsigned components = MIN2(orig_components - comp, 4);
153 struct ir3_instruction *clone = ir3_instr_clone(reload);
154 ir3_instr_move_before(clone, reload);
155
156 clone->dsts[0]->wrmask = MASK(components);
157 if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
158 clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
159 clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
160 } else {
161 clone->dsts[0]->num += comp;
162 }
163
164 clone->srcs[2]->uim_val = components;
165 clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]);
166 }
167
168 list_delinit(&reload->node);
169 }
170
171 static void
add_spill_reload_deps(struct ir3_block * block)172 add_spill_reload_deps(struct ir3_block *block)
173 {
174 struct ir3_instruction *last_spill = NULL;
175
176 foreach_instr (instr, &block->instr_list) {
177 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
178 last_spill) {
179 ir3_instr_add_dep(instr, last_spill);
180 }
181
182 if (instr->opc == OPC_SPILL_MACRO)
183 last_spill = instr;
184 }
185
186
187 last_spill = NULL;
188
189 foreach_instr_rev (instr, &block->instr_list) {
190 if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
191 last_spill) {
192 ir3_instr_add_dep(last_spill, instr);
193 }
194
195 if (instr->opc == OPC_SPILL_MACRO)
196 last_spill = instr;
197 }
198 }
199
200 bool
ir3_lower_spill(struct ir3 * ir)201 ir3_lower_spill(struct ir3 *ir)
202 {
203 foreach_block (block, &ir->block_list) {
204 foreach_instr_safe (instr, &block->instr_list) {
205 if (instr->opc == OPC_SPILL_MACRO) {
206 handle_oob_offset_spill(instr);
207 split_spill(instr);
208 } else if (instr->opc == OPC_RELOAD_MACRO) {
209 handle_oob_offset_reload(instr);
210 split_reload(instr);
211 }
212 }
213
214 add_spill_reload_deps(block);
215
216 foreach_instr (instr, &block->instr_list) {
217 if (instr->opc == OPC_SPILL_MACRO)
218 instr->opc = OPC_STP;
219 else if (instr->opc == OPC_RELOAD_MACRO)
220 instr->opc = OPC_LDP;
221 }
222 }
223
224 return true;
225 }
226