xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_lower_spill.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_ra.h"
7 
8 /* The spilling pass leaves out a few details required to successfully operate
9  * ldp/stp:
10  *
11  * 1. ldp/stp can only load/store 4 components at a time, but spilling ignores
12  *    that and just spills/restores entire values, including arrays and values
13  *    created for texture setup which can be more than 4 components.
14  * 2. The immediate offset only has 13 bits and is signed, so if we spill a lot
15  *    or have very large arrays before spilling then we could run out.
16  * 3. The spiller doesn't add barrier dependencies needed for post-RA
17  *    scheduling.
18  *
19  * The first one, in particular, is much easier to handle after RA because
20  * arrays and normal values can be treated the same way. Therefore this pass
21  * runs after RA, and handles all three issues. This keeps the complexity out of
22  * the spiller.
23  */
24 
25 static unsigned
component_bytes(struct ir3_register * src)26 component_bytes(struct ir3_register *src)
27 {
28    return (src->flags & IR3_REG_HALF) ? 2 : 4;
29 }
30 
31 /* Note: this won't work if the base register is anything other than 0!
32  * Dynamic bases, which we'll need for "real" function call support, will
33  * probably be a lot harder to handle and may require reserving another
34  * register.
35  */
36 static void
set_base_reg(struct ir3_instruction * mem,unsigned val)37 set_base_reg(struct ir3_instruction *mem, unsigned val)
38 {
39    struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
40    ir3_dst_create(mov, mem->srcs[0]->num, mem->srcs[0]->flags);
41    ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = val;
42    mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
43 
44    ir3_instr_move_before(mov, mem);
45 }
46 
47 static void
reset_base_reg(struct ir3_instruction * mem)48 reset_base_reg(struct ir3_instruction *mem)
49 {
50    /* If the base register is killed, then we don't need to clobber it and it
51     * may be reused as a destination so we can't always clobber it after the
52     * instruction anyway.
53     */
54    struct ir3_register *base = mem->srcs[0];
55    if (base->flags & IR3_REG_KILL)
56       return;
57 
58    struct ir3_instruction *mov = ir3_instr_create(mem->block, OPC_MOV, 1, 1);
59    ir3_dst_create(mov, base->num, base->flags);
60    ir3_src_create(mov, INVALID_REG, IR3_REG_IMMED)->uim_val = 0;
61    mov->cat1.dst_type = mov->cat1.src_type = TYPE_U32;
62 
63    ir3_instr_move_after(mov, mem);
64 }
65 
66 /* There are 13 bits, but 1 << 12 will be sign-extended into a negative offset
67  * so it can't be used directly. Therefore only offsets under 1 << 12 can be
68  * used without any adjustments.
69  */
70 #define MAX_CAT6_SIZE (1u << 12)
71 
72 static void
handle_oob_offset_spill(struct ir3_instruction * spill)73 handle_oob_offset_spill(struct ir3_instruction *spill)
74 {
75    unsigned components = spill->srcs[2]->uim_val;
76 
77    if (spill->cat6.dst_offset + components * component_bytes(spill->srcs[1]) < MAX_CAT6_SIZE)
78       return;
79 
80    set_base_reg(spill, spill->cat6.dst_offset);
81    reset_base_reg(spill);
82    spill->cat6.dst_offset = 0;
83 }
84 
85 static void
handle_oob_offset_reload(struct ir3_instruction * reload)86 handle_oob_offset_reload(struct ir3_instruction *reload)
87 {
88    unsigned components = reload->srcs[2]->uim_val;
89    unsigned offset = reload->srcs[1]->uim_val;
90    if (offset + components * component_bytes(reload->dsts[0]) < MAX_CAT6_SIZE)
91       return;
92 
93    set_base_reg(reload, offset);
94    reset_base_reg(reload);
95    reload->srcs[1]->uim_val = 0;
96 }
97 
98 static void
split_spill(struct ir3_instruction * spill)99 split_spill(struct ir3_instruction *spill)
100 {
101    unsigned orig_components = spill->srcs[2]->uim_val;
102 
103    /* We don't handle splitting dependencies. */
104    assert(spill->deps_count == 0);
105 
106    if (orig_components <= 4) {
107       if (spill->srcs[1]->flags & IR3_REG_ARRAY) {
108          spill->srcs[1]->wrmask = MASK(orig_components);
109          spill->srcs[1]->num = spill->srcs[1]->array.base;
110          spill->srcs[1]->flags &= ~IR3_REG_ARRAY;
111       }
112       return;
113    }
114 
115    for (unsigned comp = 0; comp < orig_components; comp += 4) {
116       unsigned components = MIN2(orig_components - comp, 4);
117       struct ir3_instruction *clone = ir3_instr_clone(spill);
118       ir3_instr_move_before(clone, spill);
119 
120       clone->srcs[1]->wrmask = MASK(components);
121       if (clone->srcs[1]->flags & IR3_REG_ARRAY) {
122          clone->srcs[1]->num = clone->srcs[1]->array.base + comp;
123          clone->srcs[1]->flags &= ~IR3_REG_ARRAY;
124       } else {
125          clone->srcs[1]->num += comp;
126       }
127 
128       clone->srcs[2]->uim_val = components;
129       clone->cat6.dst_offset += comp * component_bytes(spill->srcs[1]);
130    }
131 
132    list_delinit(&spill->node);
133 }
134 
135 static void
split_reload(struct ir3_instruction * reload)136 split_reload(struct ir3_instruction *reload)
137 {
138    unsigned orig_components = reload->srcs[2]->uim_val;
139 
140    assert(reload->deps_count == 0);
141 
142    if (orig_components <= 4) {
143       if (reload->dsts[0]->flags & IR3_REG_ARRAY) {
144          reload->dsts[0]->wrmask = MASK(orig_components);
145          reload->dsts[0]->num = reload->dsts[0]->array.base;
146          reload->dsts[0]->flags &= ~IR3_REG_ARRAY;
147       }
148       return;
149    }
150 
151    for (unsigned comp = 0; comp < orig_components; comp += 4) {
152       unsigned components = MIN2(orig_components - comp, 4);
153       struct ir3_instruction *clone = ir3_instr_clone(reload);
154       ir3_instr_move_before(clone, reload);
155 
156       clone->dsts[0]->wrmask = MASK(components);
157       if (clone->dsts[0]->flags & IR3_REG_ARRAY) {
158          clone->dsts[0]->num = clone->dsts[0]->array.base + comp;
159          clone->dsts[0]->flags &= ~IR3_REG_ARRAY;
160       } else {
161          clone->dsts[0]->num += comp;
162       }
163 
164       clone->srcs[2]->uim_val = components;
165       clone->srcs[1]->uim_val += comp * component_bytes(reload->dsts[0]);
166    }
167 
168    list_delinit(&reload->node);
169 }
170 
171 static void
add_spill_reload_deps(struct ir3_block * block)172 add_spill_reload_deps(struct ir3_block *block)
173 {
174    struct ir3_instruction *last_spill = NULL;
175 
176    foreach_instr (instr, &block->instr_list) {
177       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
178           last_spill) {
179          ir3_instr_add_dep(instr, last_spill);
180       }
181 
182       if (instr->opc == OPC_SPILL_MACRO)
183          last_spill = instr;
184    }
185 
186 
187    last_spill = NULL;
188 
189    foreach_instr_rev (instr, &block->instr_list) {
190       if ((instr->opc == OPC_SPILL_MACRO || instr->opc == OPC_RELOAD_MACRO) &&
191           last_spill) {
192          ir3_instr_add_dep(last_spill, instr);
193       }
194 
195       if (instr->opc == OPC_SPILL_MACRO)
196          last_spill = instr;
197    }
198 }
199 
200 bool
ir3_lower_spill(struct ir3 * ir)201 ir3_lower_spill(struct ir3 *ir)
202 {
203    foreach_block (block, &ir->block_list) {
204       foreach_instr_safe (instr, &block->instr_list) {
205          if (instr->opc == OPC_SPILL_MACRO) {
206             handle_oob_offset_spill(instr);
207             split_spill(instr);
208          } else if (instr->opc == OPC_RELOAD_MACRO) {
209             handle_oob_offset_reload(instr);
210             split_reload(instr);
211          }
212       }
213 
214       add_spill_reload_deps(block);
215 
216       foreach_instr (instr, &block->instr_list) {
217          if (instr->opc == OPC_SPILL_MACRO)
218             instr->opc = OPC_STP;
219          else if (instr->opc == OPC_RELOAD_MACRO)
220             instr->opc = OPC_LDP;
221       }
222    }
223 
224    return true;
225 }
226