1 /*
2 * Copyright 2024 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3_nir.h"
7
8 bool
ir3_supports_vectorized_nir_op(nir_op op)9 ir3_supports_vectorized_nir_op(nir_op op)
10 {
11 switch (op) {
12 /* TODO: emitted as absneg which can often be folded away (e.g., into
13 * (neg)). This seems to often fail when repeated.
14 */
15 case nir_op_b2b1:
16
17 /* dsx/dsy don't seem to support repeat. */
18 case nir_op_fddx:
19 case nir_op_fddx_coarse:
20 case nir_op_fddx_fine:
21 case nir_op_fddy:
22 case nir_op_fddy_coarse:
23 case nir_op_fddy_fine:
24
25 /* dp2acc/dp4acc don't seem to support repeat. */
26 case nir_op_udot_4x8_uadd:
27 case nir_op_udot_4x8_uadd_sat:
28 case nir_op_sudot_4x8_iadd:
29 case nir_op_sudot_4x8_iadd_sat:
30
31 /* Among SFU instructions, only rcp doesn't seem to support repeat. */
32 case nir_op_frcp:
33 return false;
34
35 default:
36 return true;
37 }
38 }
39
40 uint8_t
ir3_nir_vectorize_filter(const nir_instr * instr,const void * data)41 ir3_nir_vectorize_filter(const nir_instr *instr, const void *data)
42 {
43 if (instr->type == nir_instr_type_phi)
44 return 4;
45 if (instr->type != nir_instr_type_alu)
46 return 0;
47
48 struct nir_alu_instr *alu = nir_instr_as_alu(instr);
49
50 if (!ir3_supports_vectorized_nir_op(alu->op))
51 return 0;
52
53 return 4;
54 }
55
56 static void
rpt_list_split(struct list_head * list,struct list_head * at)57 rpt_list_split(struct list_head *list, struct list_head *at)
58 {
59 struct list_head *new_last = at->prev;
60 new_last->next = list;
61 at->prev = list->prev;
62 list->prev->next = at;
63 list->prev = new_last;
64 }
65
66 static enum ir3_register_flags
rpt_compatible_src_flags(struct ir3_register * src)67 rpt_compatible_src_flags(struct ir3_register *src)
68 {
69 return src->flags &
70 (IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_FABS | IR3_REG_FNEG |
71 IR3_REG_BNOT | IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SSA |
72 IR3_REG_HALF | IR3_REG_SHARED);
73 }
74
75 static enum ir3_register_flags
rpt_compatible_dst_flags(struct ir3_instruction * instr)76 rpt_compatible_dst_flags(struct ir3_instruction *instr)
77 {
78 return instr->dsts[0]->flags & (IR3_REG_SSA | IR3_REG_HALF | IR3_REG_SHARED);
79 }
80
81 static enum ir3_register_flags
rpt_illegal_src_flags(struct ir3_register * src)82 rpt_illegal_src_flags(struct ir3_register *src)
83 {
84 return src->flags & (IR3_REG_ARRAY | IR3_REG_RELATIV);
85 }
86
87 static enum ir3_instruction_flags
rpt_compatible_instr_flags(struct ir3_instruction * instr)88 rpt_compatible_instr_flags(struct ir3_instruction *instr)
89 {
90 return instr->flags & IR3_INSTR_SAT;
91 }
92
93 static bool
supports_imm_r(unsigned opc)94 supports_imm_r(unsigned opc)
95 {
96 return opc == OPC_BARY_F || opc == OPC_FLAT_B;
97 }
98
99 static bool
srcs_can_rpt(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src,unsigned rpt_n)100 srcs_can_rpt(struct ir3_instruction *instr, struct ir3_register *src,
101 struct ir3_register *rpt_src, unsigned rpt_n)
102 {
103 if (rpt_illegal_src_flags(src) != 0 || rpt_illegal_src_flags(rpt_src) != 0)
104 return false;
105 if (rpt_compatible_src_flags(src) != rpt_compatible_src_flags(rpt_src))
106 return false;
107 if (src->flags & IR3_REG_IMMED) {
108 uint32_t val = src->uim_val;
109 uint32_t rpt_val = rpt_src->uim_val;
110
111 if (rpt_val == val)
112 return true;
113 if (supports_imm_r(instr->opc))
114 return rpt_val == val + rpt_n;
115 return false;
116 }
117
118 return true;
119 }
120
121 static bool
can_rpt(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)122 can_rpt(struct ir3_instruction *instr, struct ir3_instruction *rpt,
123 unsigned rpt_n)
124 {
125 if (rpt_n >= 4)
126 return false;
127 if (rpt->ip != instr->ip + rpt_n)
128 return false;
129 if (rpt->opc != instr->opc)
130 return false;
131 if (!ir3_supports_rpt(instr->block->shader->compiler, instr->opc))
132 return false;
133 if (rpt_compatible_instr_flags(rpt) != rpt_compatible_instr_flags(instr))
134 return false;
135 if (rpt_compatible_dst_flags(rpt) != rpt_compatible_dst_flags(instr))
136 return false;
137 if (instr->srcs_count != rpt->srcs_count)
138 return false;
139
140 foreach_src_n (src, src_n, instr) {
141 if (!srcs_can_rpt(instr, src, rpt->srcs[src_n], rpt_n))
142 return false;
143 }
144
145 return true;
146 }
147
148 static bool
cleanup_rpt_instr(struct ir3_instruction * instr)149 cleanup_rpt_instr(struct ir3_instruction *instr)
150 {
151 if (!ir3_instr_is_first_rpt(instr))
152 return false;
153
154 unsigned rpt_n = 1;
155 foreach_instr_rpt_excl (rpt, instr) {
156 if (!can_rpt(instr, rpt, rpt_n++)) {
157 rpt_list_split(&instr->rpt_node, &rpt->rpt_node);
158
159 /* We have to do this recursively since later repetitions might come
160 * before the first in the instruction list.
161 */
162 cleanup_rpt_instr(rpt);
163 return true;
164 }
165 }
166
167 return false;
168 }
169
170 /* Pre-RA pass to clean up repetition groups that can never be merged into a rpt
171 * instruction. This ensures we don't needlessly allocate merge sets for them.
172 */
173 bool
ir3_cleanup_rpt(struct ir3 * ir,struct ir3_shader_variant * v)174 ir3_cleanup_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
175 {
176 ir3_count_instructions(ir);
177 bool progress = false;
178
179 foreach_block (block, &ir->block_list) {
180 foreach_instr (instr, &block->instr_list)
181 progress |= cleanup_rpt_instr(instr);
182 }
183
184 return progress;
185 }
186
187 enum rpt_src_type {
188 RPT_INCOMPATIBLE, /* Incompatible sources. */
189 RPT_SET, /* Compatible sources that need (r) set. */
190 RPT_DONT_SET, /* Compatible sources that don't need (r) set. */
191 };
192
193 static enum rpt_src_type
srcs_rpt_compatible(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src)194 srcs_rpt_compatible(struct ir3_instruction *instr, struct ir3_register *src,
195 struct ir3_register *rpt_src)
196 {
197 /* Shared RA may have demoted some sources from shared to non-shared. When
198 * this happened for some but not all instructions in a repeat group, the
199 * assert below would trigger. Detect this here.
200 */
201 if ((src->flags & IR3_REG_SHARED) != (rpt_src->flags & IR3_REG_SHARED))
202 return RPT_INCOMPATIBLE;
203
204 assert(srcs_can_rpt(instr, src, rpt_src, instr->repeat + 1));
205
206 if (src->flags & IR3_REG_IMMED) {
207 if (supports_imm_r(instr->opc) &&
208 rpt_src->uim_val == src->uim_val + instr->repeat + 1) {
209 return RPT_SET;
210 }
211
212 assert(rpt_src->uim_val == src->uim_val);
213 return RPT_DONT_SET;
214 }
215
216 if (rpt_src->num == src->num + instr->repeat + 1) {
217 if ((src->flags & IR3_REG_R) || instr->repeat == 0)
218 return RPT_SET;
219 return RPT_INCOMPATIBLE;
220 }
221
222 if (rpt_src->num == src->num && !(src->flags & IR3_REG_R))
223 return RPT_DONT_SET;
224 return RPT_INCOMPATIBLE;
225 }
226
227 static unsigned
inc_wrmask(unsigned wrmask)228 inc_wrmask(unsigned wrmask)
229 {
230 return (wrmask << 1) | 0x1;
231 }
232
233 static bool
try_merge(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)234 try_merge(struct ir3_instruction *instr, struct ir3_instruction *rpt,
235 unsigned rpt_n)
236 {
237 assert(rpt_n > 0 && rpt_n < 4);
238 assert(instr->opc == rpt->opc);
239 assert(instr->dsts_count == 1 && rpt->dsts_count == 1);
240 assert(instr->srcs_count == rpt->srcs_count);
241 assert(rpt_compatible_instr_flags(instr) == rpt_compatible_instr_flags(rpt));
242
243 struct ir3_register *dst = instr->dsts[0];
244 struct ir3_register *rpt_dst = rpt->dsts[0];
245
246 if (rpt->ip != instr->ip + rpt_n)
247 return false;
248 if (rpt_dst->num != dst->num + rpt_n)
249 return false;
250
251 enum rpt_src_type srcs_rpt[instr->srcs_count];
252
253 foreach_src_n (src, src_n, instr) {
254 srcs_rpt[src_n] = srcs_rpt_compatible(instr, src, rpt->srcs[src_n]);
255
256 if (srcs_rpt[src_n] == RPT_INCOMPATIBLE)
257 return false;
258 }
259
260 foreach_src_n (src, src_n, instr) {
261 assert((src->flags & ~(IR3_REG_R | IR3_REG_KILL | IR3_REG_FIRST_KILL)) ==
262 (rpt->srcs[src_n]->flags & ~(IR3_REG_KILL | IR3_REG_FIRST_KILL)));
263
264 if (srcs_rpt[src_n] == RPT_SET) {
265 src->flags |= IR3_REG_R;
266 src->wrmask = inc_wrmask(src->wrmask);
267 }
268 }
269
270 dst->wrmask = inc_wrmask(dst->wrmask);
271 return true;
272 }
273
274 static bool
merge_instr(struct ir3_instruction * instr)275 merge_instr(struct ir3_instruction *instr)
276 {
277 if (!ir3_instr_is_first_rpt(instr))
278 return false;
279
280 bool progress = false;
281
282 unsigned rpt_n = 1;
283
284 foreach_instr_rpt_excl_safe (rpt, instr) {
285 /* When rpt cannot be merged, stop immediately. We will try to merge rpt
286 * with the following instructions (if any) once we encounter it in
287 * ir3_combine_rpt.
288 */
289 if (!try_merge(instr, rpt, rpt_n))
290 break;
291
292 instr->repeat++;
293
294 /* We cannot remove the rpt immediately since when it is the instruction
295 * after instr, foreach_instr_safe will fail. So mark it instead and
296 * remove it in ir3_combine_rpt when we encounter it.
297 */
298 rpt->flags |= IR3_INSTR_MARK;
299 list_delinit(&rpt->rpt_node);
300 ++rpt_n;
301 progress = true;
302 }
303
304 list_delinit(&instr->rpt_node);
305 return progress;
306 }
307
308 /* Merge compatible instructions in a repetition group into one or more rpt
309 * instructions.
310 */
311 bool
ir3_merge_rpt(struct ir3 * ir,struct ir3_shader_variant * v)312 ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
313 {
314 ir3_clear_mark(ir);
315 ir3_count_instructions(ir);
316 bool progress = false;
317
318 foreach_block (block, &ir->block_list) {
319 foreach_instr_safe (instr, &block->instr_list) {
320 if (instr->flags & IR3_INSTR_MARK) {
321 list_delinit(&instr->node);
322 continue;
323 }
324
325 progress |= merge_instr(instr);
326 }
327 }
328
329 return progress;
330 }
331