xref: /aosp_15_r20/external/mesa3d/src/freedreno/ir3/ir3_rpt.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_nir.h"
7 
8 bool
ir3_supports_vectorized_nir_op(nir_op op)9 ir3_supports_vectorized_nir_op(nir_op op)
10 {
11    switch (op) {
12       /* TODO: emitted as absneg which can often be folded away (e.g., into
13        * (neg)). This seems to often fail when repeated.
14        */
15    case nir_op_b2b1:
16 
17       /* dsx/dsy don't seem to support repeat. */
18    case nir_op_fddx:
19    case nir_op_fddx_coarse:
20    case nir_op_fddx_fine:
21    case nir_op_fddy:
22    case nir_op_fddy_coarse:
23    case nir_op_fddy_fine:
24 
25       /* dp2acc/dp4acc don't seem to support repeat. */
26    case nir_op_udot_4x8_uadd:
27    case nir_op_udot_4x8_uadd_sat:
28    case nir_op_sudot_4x8_iadd:
29    case nir_op_sudot_4x8_iadd_sat:
30 
31       /* Among SFU instructions, only rcp doesn't seem to support repeat. */
32    case nir_op_frcp:
33       return false;
34 
35    default:
36       return true;
37    }
38 }
39 
40 uint8_t
ir3_nir_vectorize_filter(const nir_instr * instr,const void * data)41 ir3_nir_vectorize_filter(const nir_instr *instr, const void *data)
42 {
43    if (instr->type == nir_instr_type_phi)
44       return 4;
45    if (instr->type != nir_instr_type_alu)
46       return 0;
47 
48    struct nir_alu_instr *alu = nir_instr_as_alu(instr);
49 
50    if (!ir3_supports_vectorized_nir_op(alu->op))
51       return 0;
52 
53    return 4;
54 }
55 
56 static void
rpt_list_split(struct list_head * list,struct list_head * at)57 rpt_list_split(struct list_head *list, struct list_head *at)
58 {
59    struct list_head *new_last = at->prev;
60    new_last->next = list;
61    at->prev = list->prev;
62    list->prev->next = at;
63    list->prev = new_last;
64 }
65 
66 static enum ir3_register_flags
rpt_compatible_src_flags(struct ir3_register * src)67 rpt_compatible_src_flags(struct ir3_register *src)
68 {
69    return src->flags &
70           (IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_FABS | IR3_REG_FNEG |
71            IR3_REG_BNOT | IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SSA |
72            IR3_REG_HALF | IR3_REG_SHARED);
73 }
74 
75 static enum ir3_register_flags
rpt_compatible_dst_flags(struct ir3_instruction * instr)76 rpt_compatible_dst_flags(struct ir3_instruction *instr)
77 {
78    return instr->dsts[0]->flags & (IR3_REG_SSA | IR3_REG_HALF | IR3_REG_SHARED);
79 }
80 
81 static enum ir3_register_flags
rpt_illegal_src_flags(struct ir3_register * src)82 rpt_illegal_src_flags(struct ir3_register *src)
83 {
84    return src->flags & (IR3_REG_ARRAY | IR3_REG_RELATIV);
85 }
86 
87 static enum ir3_instruction_flags
rpt_compatible_instr_flags(struct ir3_instruction * instr)88 rpt_compatible_instr_flags(struct ir3_instruction *instr)
89 {
90    return instr->flags & IR3_INSTR_SAT;
91 }
92 
93 static bool
supports_imm_r(unsigned opc)94 supports_imm_r(unsigned opc)
95 {
96    return opc == OPC_BARY_F || opc == OPC_FLAT_B;
97 }
98 
99 static bool
srcs_can_rpt(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src,unsigned rpt_n)100 srcs_can_rpt(struct ir3_instruction *instr, struct ir3_register *src,
101              struct ir3_register *rpt_src, unsigned rpt_n)
102 {
103    if (rpt_illegal_src_flags(src) != 0 || rpt_illegal_src_flags(rpt_src) != 0)
104       return false;
105    if (rpt_compatible_src_flags(src) != rpt_compatible_src_flags(rpt_src))
106       return false;
107    if (src->flags & IR3_REG_IMMED) {
108       uint32_t val = src->uim_val;
109       uint32_t rpt_val = rpt_src->uim_val;
110 
111       if (rpt_val == val)
112          return true;
113       if (supports_imm_r(instr->opc))
114          return rpt_val == val + rpt_n;
115       return false;
116    }
117 
118    return true;
119 }
120 
121 static bool
can_rpt(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)122 can_rpt(struct ir3_instruction *instr, struct ir3_instruction *rpt,
123         unsigned rpt_n)
124 {
125    if (rpt_n >= 4)
126       return false;
127    if (rpt->ip != instr->ip + rpt_n)
128       return false;
129    if (rpt->opc != instr->opc)
130       return false;
131    if (!ir3_supports_rpt(instr->block->shader->compiler, instr->opc))
132       return false;
133    if (rpt_compatible_instr_flags(rpt) != rpt_compatible_instr_flags(instr))
134       return false;
135    if (rpt_compatible_dst_flags(rpt) != rpt_compatible_dst_flags(instr))
136       return false;
137    if (instr->srcs_count != rpt->srcs_count)
138       return false;
139 
140    foreach_src_n (src, src_n, instr) {
141       if (!srcs_can_rpt(instr, src, rpt->srcs[src_n], rpt_n))
142          return false;
143    }
144 
145    return true;
146 }
147 
148 static bool
cleanup_rpt_instr(struct ir3_instruction * instr)149 cleanup_rpt_instr(struct ir3_instruction *instr)
150 {
151    if (!ir3_instr_is_first_rpt(instr))
152       return false;
153 
154    unsigned rpt_n = 1;
155    foreach_instr_rpt_excl (rpt, instr) {
156       if (!can_rpt(instr, rpt, rpt_n++)) {
157          rpt_list_split(&instr->rpt_node, &rpt->rpt_node);
158 
159          /* We have to do this recursively since later repetitions might come
160           * before the first in the instruction list.
161           */
162          cleanup_rpt_instr(rpt);
163          return true;
164       }
165    }
166 
167    return false;
168 }
169 
170 /* Pre-RA pass to clean up repetition groups that can never be merged into a rpt
171  * instruction. This ensures we don't needlessly allocate merge sets for them.
172  */
173 bool
ir3_cleanup_rpt(struct ir3 * ir,struct ir3_shader_variant * v)174 ir3_cleanup_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
175 {
176    ir3_count_instructions(ir);
177    bool progress = false;
178 
179    foreach_block (block, &ir->block_list) {
180       foreach_instr (instr, &block->instr_list)
181          progress |= cleanup_rpt_instr(instr);
182    }
183 
184    return progress;
185 }
186 
187 enum rpt_src_type {
188    RPT_INCOMPATIBLE, /* Incompatible sources. */
189    RPT_SET,          /* Compatible sources that need (r) set. */
190    RPT_DONT_SET,     /* Compatible sources that don't need (r) set. */
191 };
192 
193 static enum rpt_src_type
srcs_rpt_compatible(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src)194 srcs_rpt_compatible(struct ir3_instruction *instr, struct ir3_register *src,
195                     struct ir3_register *rpt_src)
196 {
197    /* Shared RA may have demoted some sources from shared to non-shared. When
198     * this happened for some but not all instructions in a repeat group, the
199     * assert below would trigger. Detect this here.
200     */
201    if ((src->flags & IR3_REG_SHARED) != (rpt_src->flags & IR3_REG_SHARED))
202       return RPT_INCOMPATIBLE;
203 
204    assert(srcs_can_rpt(instr, src, rpt_src, instr->repeat + 1));
205 
206    if (src->flags & IR3_REG_IMMED) {
207       if (supports_imm_r(instr->opc) &&
208           rpt_src->uim_val == src->uim_val + instr->repeat + 1) {
209          return RPT_SET;
210       }
211 
212       assert(rpt_src->uim_val == src->uim_val);
213       return RPT_DONT_SET;
214    }
215 
216    if (rpt_src->num == src->num + instr->repeat + 1) {
217       if ((src->flags & IR3_REG_R) || instr->repeat == 0)
218          return RPT_SET;
219       return RPT_INCOMPATIBLE;
220    }
221 
222    if (rpt_src->num == src->num && !(src->flags & IR3_REG_R))
223       return RPT_DONT_SET;
224    return RPT_INCOMPATIBLE;
225 }
226 
227 static unsigned
inc_wrmask(unsigned wrmask)228 inc_wrmask(unsigned wrmask)
229 {
230    return (wrmask << 1) | 0x1;
231 }
232 
233 static bool
try_merge(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)234 try_merge(struct ir3_instruction *instr, struct ir3_instruction *rpt,
235           unsigned rpt_n)
236 {
237    assert(rpt_n > 0 && rpt_n < 4);
238    assert(instr->opc == rpt->opc);
239    assert(instr->dsts_count == 1 && rpt->dsts_count == 1);
240    assert(instr->srcs_count == rpt->srcs_count);
241    assert(rpt_compatible_instr_flags(instr) == rpt_compatible_instr_flags(rpt));
242 
243    struct ir3_register *dst = instr->dsts[0];
244    struct ir3_register *rpt_dst = rpt->dsts[0];
245 
246    if (rpt->ip != instr->ip + rpt_n)
247       return false;
248    if (rpt_dst->num != dst->num + rpt_n)
249       return false;
250 
251    enum rpt_src_type srcs_rpt[instr->srcs_count];
252 
253    foreach_src_n (src, src_n, instr) {
254       srcs_rpt[src_n] = srcs_rpt_compatible(instr, src, rpt->srcs[src_n]);
255 
256       if (srcs_rpt[src_n] == RPT_INCOMPATIBLE)
257          return false;
258    }
259 
260    foreach_src_n (src, src_n, instr) {
261       assert((src->flags & ~(IR3_REG_R | IR3_REG_KILL | IR3_REG_FIRST_KILL)) ==
262              (rpt->srcs[src_n]->flags & ~(IR3_REG_KILL | IR3_REG_FIRST_KILL)));
263 
264       if (srcs_rpt[src_n] == RPT_SET) {
265          src->flags |= IR3_REG_R;
266          src->wrmask = inc_wrmask(src->wrmask);
267       }
268    }
269 
270    dst->wrmask = inc_wrmask(dst->wrmask);
271    return true;
272 }
273 
274 static bool
merge_instr(struct ir3_instruction * instr)275 merge_instr(struct ir3_instruction *instr)
276 {
277    if (!ir3_instr_is_first_rpt(instr))
278       return false;
279 
280    bool progress = false;
281 
282    unsigned rpt_n = 1;
283 
284    foreach_instr_rpt_excl_safe (rpt, instr) {
285       /* When rpt cannot be merged, stop immediately. We will try to merge rpt
286        * with the following instructions (if any) once we encounter it in
287        * ir3_combine_rpt.
288        */
289       if (!try_merge(instr, rpt, rpt_n))
290          break;
291 
292       instr->repeat++;
293 
294       /* We cannot remove the rpt immediately since when it is the instruction
295        * after instr, foreach_instr_safe will fail. So mark it instead and
296        * remove it in ir3_combine_rpt when we encounter it.
297        */
298       rpt->flags |= IR3_INSTR_MARK;
299       list_delinit(&rpt->rpt_node);
300       ++rpt_n;
301       progress = true;
302    }
303 
304    list_delinit(&instr->rpt_node);
305    return progress;
306 }
307 
308 /* Merge compatible instructions in a repetition group into one or more rpt
309  * instructions.
310  */
311 bool
ir3_merge_rpt(struct ir3 * ir,struct ir3_shader_variant * v)312 ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
313 {
314    ir3_clear_mark(ir);
315    ir3_count_instructions(ir);
316    bool progress = false;
317 
318    foreach_block (block, &ir->block_list) {
319       foreach_instr_safe (instr, &block->instr_list) {
320          if (instr->flags & IR3_INSTR_MARK) {
321             list_delinit(&instr->node);
322             continue;
323          }
324 
325          progress |= merge_instr(instr);
326       }
327    }
328 
329    return progress;
330 }
331