xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_ra.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #ifndef _IR3_RA_H
7 #define _IR3_RA_H
8 
9 #include "util/rb_tree.h"
10 #include "ir3.h"
11 #include "ir3_compiler.h"
12 
13 #if MESA_DEBUG
14 #define RA_DEBUG (ir3_shader_debug & IR3_DBG_RAMSGS)
15 #else
16 #define RA_DEBUG 0
17 #endif
18 #define d(fmt, ...)                                                            \
19    do {                                                                        \
20       if (RA_DEBUG) {                                                          \
21          mesa_logi("RA: " fmt, ##__VA_ARGS__);                                 \
22       }                                                                        \
23    } while (0)
24 
25 #define di(instr, fmt, ...)                                                    \
26    do {                                                                        \
27       if (RA_DEBUG) {                                                          \
28          struct log_stream *stream = mesa_log_streami();                       \
29          mesa_log_stream_printf(stream, "RA: " fmt ": ", ##__VA_ARGS__);       \
30          ir3_print_instr_stream(stream, instr);                                \
31          mesa_log_stream_destroy(stream);                                      \
32       }                                                                        \
33    } while (0)
34 
35 typedef uint16_t physreg_t;
36 
37 static inline unsigned
ra_physreg_to_num(physreg_t physreg,unsigned flags)38 ra_physreg_to_num(physreg_t physreg, unsigned flags)
39 {
40    if (!(flags & IR3_REG_HALF))
41       physreg /= 2;
42    if (flags & IR3_REG_SHARED)
43       physreg += 48 * 4;
44    else if (flags & IR3_REG_PREDICATE)
45       physreg += REG_P0 * 4;
46    return physreg;
47 }
48 
49 static inline physreg_t
ra_num_to_physreg(unsigned num,unsigned flags)50 ra_num_to_physreg(unsigned num, unsigned flags)
51 {
52    if (flags & IR3_REG_SHARED)
53       num -= 48 * 4;
54    else if (flags & IR3_REG_PREDICATE)
55       num -= REG_P0 * 4;
56    if (!(flags & IR3_REG_HALF))
57       num *= 2;
58    return num;
59 }
60 
61 static inline unsigned
ra_reg_get_num(const struct ir3_register * reg)62 ra_reg_get_num(const struct ir3_register *reg)
63 {
64    return (reg->flags & IR3_REG_ARRAY) ? reg->array.base : reg->num;
65 }
66 
67 static inline physreg_t
ra_reg_get_physreg(const struct ir3_register * reg)68 ra_reg_get_physreg(const struct ir3_register *reg)
69 {
70    return ra_num_to_physreg(ra_reg_get_num(reg), reg->flags);
71 }
72 
73 static inline bool
def_is_gpr(const struct ir3_register * reg)74 def_is_gpr(const struct ir3_register *reg)
75 {
76    return reg_num(reg) != REG_A0 && !(reg->flags & IR3_REG_PREDICATE);
77 }
78 
79 /* Note: don't count undef as a source.
80  */
81 static inline bool
ra_reg_is_src(const struct ir3_register * reg)82 ra_reg_is_src(const struct ir3_register *reg)
83 {
84    return (reg->flags & IR3_REG_SSA) && reg->def && def_is_gpr(reg->def);
85 }
86 
87 static inline bool
ra_reg_is_dst(const struct ir3_register * reg)88 ra_reg_is_dst(const struct ir3_register *reg)
89 {
90    return (reg->flags & IR3_REG_SSA) && def_is_gpr(reg) &&
91           ((reg->flags & IR3_REG_ARRAY) || reg->wrmask);
92 }
93 
94 static inline bool
ra_reg_is_predicate(const struct ir3_register * reg)95 ra_reg_is_predicate(const struct ir3_register *reg)
96 {
97    return (reg->flags & IR3_REG_SSA) && (reg->flags & IR3_REG_PREDICATE);
98 }
99 
100 /* Iterators for sources and destinations which:
101  * - Don't include fake sources (irrelevant for RA)
102  * - Don't include non-SSA sources (immediates and constants, also irrelevant)
103  */
104 
105 #define ra_foreach_src_n(__srcreg, __n, __instr)                               \
106    foreach_src_n(__srcreg, __n, __instr)                                       \
107       if (ra_reg_is_src(__srcreg))
108 
109 #define ra_foreach_src(__srcreg, __instr)                                      \
110    ra_foreach_src_n(__srcreg, __i, __instr)
111 
112 #define ra_foreach_src_rev(__srcreg, __instr)                                  \
113    for (struct ir3_register *__srcreg = (void *)~0; __srcreg; __srcreg = NULL) \
114       for (int __cnt = (__instr)->srcs_count, __i = __cnt - 1; __i >= 0;       \
115            __i--)                                                              \
116          if (ra_reg_is_src((__srcreg = (__instr)->srcs[__i])))
117 
118 #define ra_foreach_dst_n(__dstreg, __n, __instr)                               \
119    foreach_dst_n(__dstreg, __n, __instr)                                         \
120       if (ra_reg_is_dst(__dstreg))
121 
122 #define ra_foreach_dst(__dstreg, __instr)                                      \
123    ra_foreach_dst_n(__dstreg, __i, __instr)
124 
125 #define RA_HALF_SIZE     (4 * 48)
126 #define RA_FULL_SIZE     (4 * 48 * 2)
127 #define RA_SHARED_SIZE   (2 * 4 * 8)
128 #define RA_SHARED_HALF_SIZE (4 * 8)
129 #define RA_MAX_FILE_SIZE RA_FULL_SIZE
130 
131 struct ir3_liveness {
132    unsigned block_count;
133    unsigned interval_offset;
134    DECLARE_ARRAY(struct ir3_register *, definitions);
135    DECLARE_ARRAY(BITSET_WORD *, live_out);
136    DECLARE_ARRAY(BITSET_WORD *, live_in);
137 };
138 
139 typedef bool (*reg_filter_cb)(const struct ir3_register *);
140 
141 struct ir3_liveness *ir3_calc_liveness_for(void *mem_ctx, struct ir3 *ir,
142                                            reg_filter_cb filter_src,
143                                            reg_filter_cb filter_dst);
144 
145 static inline struct ir3_liveness *
ir3_calc_liveness(void * mem_ctx,struct ir3 * ir)146 ir3_calc_liveness(void *mem_ctx, struct ir3 *ir)
147 {
148    return ir3_calc_liveness_for(mem_ctx, ir, ra_reg_is_src, ra_reg_is_dst);
149 }
150 
151 bool ir3_def_live_after(struct ir3_liveness *live, struct ir3_register *def,
152                         struct ir3_instruction *instr);
153 
154 void ir3_create_parallel_copies(struct ir3 *ir);
155 
156 void ir3_merge_regs(struct ir3_liveness *live, struct ir3 *ir);
157 
158 void ir3_force_merge(struct ir3_register *a, struct ir3_register *b,
159                      int b_offset);
160 
161 void ir3_index_instrs_for_merge_sets(struct ir3 *ir);
162 
163 struct ir3_pressure {
164    unsigned full, half, shared, shared_half;
165 };
166 
167 void ir3_calc_pressure(struct ir3_shader_variant *v, struct ir3_liveness *live,
168                        struct ir3_pressure *max_pressure);
169 
170 bool ir3_spill(struct ir3 *ir, struct ir3_shader_variant *v,
171                struct ir3_liveness **live,
172                const struct ir3_pressure *limit_pressure);
173 
174 bool ir3_lower_spill(struct ir3 *ir);
175 
176 void ir3_ra_shared(struct ir3_shader_variant *v, struct ir3_liveness **live);
177 
178 void ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
179                      unsigned half_size, unsigned block_count, bool shared_ra);
180 
181 void ir3_lower_copies(struct ir3_shader_variant *v);
182 
183 /* Register interval datastructure
184  *
185  * ir3_reg_ctx is used to track which registers are live. The tricky part is
186  * that some registers may overlap each other, when registers with overlapping
187  * live ranges get coalesced. For example, splits will overlap with their
188  * parent vector and sometimes collect sources will also overlap with the
189  * collect'ed vector. ir3_merge_regs guarantees for us that none of the
190  * registers in a merge set that are live at any given point partially
191  * overlap, which means that we can organize them into a forest. While each
192  * register has a per-merge-set offset, ir3_merge_regs also computes a
193  * "global" offset which allows us to throw away the original merge sets and
194  * think of registers as just intervals in a forest of live intervals. When a
195  * register becomes live, we insert it into the forest, and when it dies we
196  * remove it from the forest (and then its children get moved up a level). We
197  * use red-black trees to keep track of each level of the forest, so insertion
198  * and deletion should be fast operations. ir3_reg_ctx handles all the
199  * internal bookkeeping for this, so that it can be shared between RA,
200  * spilling, and register pressure tracking.
201  */
202 
203 struct ir3_reg_interval {
204    struct rb_node node;
205 
206    struct rb_tree children;
207 
208    struct ir3_reg_interval *parent;
209 
210    struct ir3_register *reg;
211 
212    bool inserted;
213 };
214 
215 struct ir3_reg_ctx {
216    /* The tree of top-level intervals in the forest. */
217    struct rb_tree intervals;
218 
219    /* Users of ir3_reg_ctx need to keep around additional state that is
220     * modified when top-level intervals are added or removed. For register
221     * pressure tracking, this is just the register pressure, but for RA we
222     * need to keep track of the physreg of each top-level interval. These
223     * callbacks provide a place to let users deriving from ir3_reg_ctx update
224     * their state when top-level intervals are inserted/removed.
225     */
226 
227    /* Called when an interval is added and it turns out to be at the top
228     * level.
229     */
230    void (*interval_add)(struct ir3_reg_ctx *ctx,
231                         struct ir3_reg_interval *interval);
232 
233    /* Called when an interval is deleted from the top level. */
234    void (*interval_delete)(struct ir3_reg_ctx *ctx,
235                            struct ir3_reg_interval *interval);
236 
237    /* Called when an interval is deleted and its child becomes top-level.
238     */
239    void (*interval_readd)(struct ir3_reg_ctx *ctx,
240                           struct ir3_reg_interval *parent,
241                           struct ir3_reg_interval *child);
242 };
243 
244 static inline struct ir3_reg_interval *
ir3_rb_node_to_interval(struct rb_node * node)245 ir3_rb_node_to_interval(struct rb_node *node)
246 {
247    return rb_node_data(struct ir3_reg_interval, node, node);
248 }
249 
250 static inline const struct ir3_reg_interval *
ir3_rb_node_to_interval_const(const struct rb_node * node)251 ir3_rb_node_to_interval_const(const struct rb_node *node)
252 {
253    return rb_node_data(struct ir3_reg_interval, node, node);
254 }
255 
256 static inline struct ir3_reg_interval *
ir3_reg_interval_next(struct ir3_reg_interval * interval)257 ir3_reg_interval_next(struct ir3_reg_interval *interval)
258 {
259    struct rb_node *next = rb_node_next(&interval->node);
260    return next ? ir3_rb_node_to_interval(next) : NULL;
261 }
262 
263 static inline struct ir3_reg_interval *
ir3_reg_interval_next_or_null(struct ir3_reg_interval * interval)264 ir3_reg_interval_next_or_null(struct ir3_reg_interval *interval)
265 {
266    return interval ? ir3_reg_interval_next(interval) : NULL;
267 }
268 
269 static inline void
ir3_reg_interval_init(struct ir3_reg_interval * interval,struct ir3_register * reg)270 ir3_reg_interval_init(struct ir3_reg_interval *interval,
271                       struct ir3_register *reg)
272 {
273    rb_tree_init(&interval->children);
274    interval->reg = reg;
275    interval->parent = NULL;
276    interval->inserted = false;
277 }
278 
279 void ir3_reg_interval_dump(struct log_stream *stream,
280                            struct ir3_reg_interval *interval);
281 
282 void ir3_reg_interval_insert(struct ir3_reg_ctx *ctx,
283                              struct ir3_reg_interval *interval);
284 
285 void ir3_reg_interval_remove(struct ir3_reg_ctx *ctx,
286                              struct ir3_reg_interval *interval);
287 
288 void ir3_reg_interval_remove_all(struct ir3_reg_ctx *ctx,
289                                  struct ir3_reg_interval *interval);
290 
291 void ra_update_affinity(unsigned file_size, struct ir3_register *reg,
292                         physreg_t physreg);
293 
294 #endif
295