1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7
8 use std::collections::{HashMap, HashSet};
9
10 pub type LegalizeBuilder<'a> = SSAInstrBuilder<'a>;
11
src_is_upred_reg(src: &Src) -> bool12 pub fn src_is_upred_reg(src: &Src) -> bool {
13 match &src.src_ref {
14 SrcRef::True | SrcRef::False => false,
15 SrcRef::SSA(ssa) => {
16 assert!(ssa.comps() == 1);
17 match ssa[0].file() {
18 RegFile::Pred => false,
19 RegFile::UPred => true,
20 _ => panic!("Not a predicate source"),
21 }
22 }
23 SrcRef::Reg(_) => panic!("Not in SSA form"),
24 _ => panic!("Not a predicate source"),
25 }
26 }
27
src_is_reg(src: &Src, reg_file: RegFile) -> bool28 pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool {
29 match src.src_ref {
30 SrcRef::Zero | SrcRef::True | SrcRef::False => true,
31 SrcRef::SSA(ssa) => ssa.file() == Some(reg_file),
32 SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
33 SrcRef::Reg(_) => panic!("Not in SSA form"),
34 }
35 }
36
swap_srcs_if_not_reg( x: &mut Src, y: &mut Src, reg_file: RegFile, ) -> bool37 pub fn swap_srcs_if_not_reg(
38 x: &mut Src,
39 y: &mut Src,
40 reg_file: RegFile,
41 ) -> bool {
42 if !src_is_reg(x, reg_file) && src_is_reg(y, reg_file) {
43 std::mem::swap(x, y);
44 true
45 } else {
46 false
47 }
48 }
49
src_is_imm(src: &Src) -> bool50 fn src_is_imm(src: &Src) -> bool {
51 matches!(src.src_ref, SrcRef::Imm32(_))
52 }
53
54 pub trait LegalizeBuildHelpers: SSABuilder {
copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile)55 fn copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile) {
56 let tmp = self.alloc_ssa(reg_file, 1)[0];
57 self.copy_to(tmp.into(), (*ssa).into());
58 *ssa = tmp;
59 }
60
copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile)61 fn copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile) {
62 for ssa in &mut vec[..] {
63 self.copy_ssa(ssa, reg_file);
64 }
65 }
66
copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue)67 fn copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue) {
68 match ssa.file() {
69 RegFile::Pred => (),
70 RegFile::UPred => self.copy_ssa(ssa, RegFile::Pred),
71 _ => panic!("Not a predicate value"),
72 }
73 }
74
copy_pred_if_upred(&mut self, pred: &mut Pred)75 fn copy_pred_if_upred(&mut self, pred: &mut Pred) {
76 match &mut pred.pred_ref {
77 PredRef::None => (),
78 PredRef::SSA(ssa) => {
79 self.copy_pred_ssa_if_uniform(ssa);
80 }
81 PredRef::Reg(_) => panic!("Not in SSA form"),
82 }
83 }
84
copy_src_if_upred(&mut self, src: &mut Src)85 fn copy_src_if_upred(&mut self, src: &mut Src) {
86 match &mut src.src_ref {
87 SrcRef::True | SrcRef::False => (),
88 SrcRef::SSA(ssa) => {
89 assert!(ssa.comps() == 1);
90 self.copy_pred_ssa_if_uniform(&mut ssa[0]);
91 }
92 SrcRef::Reg(_) => panic!("Not in SSA form"),
93 _ => panic!("Not a predicate source"),
94 }
95 }
96
copy_src_if_not_same_file(&mut self, src: &mut Src)97 fn copy_src_if_not_same_file(&mut self, src: &mut Src) {
98 let SrcRef::SSA(vec) = &mut src.src_ref else {
99 return;
100 };
101
102 if vec.comps() == 1 {
103 return;
104 }
105
106 let mut all_same = true;
107 let file = vec[0].file();
108 for i in 1..vec.comps() {
109 let c_file = vec[usize::from(i)].file();
110 if c_file != file {
111 debug_assert!(c_file.to_warp() == file.to_warp());
112 all_same = false;
113 }
114 }
115
116 if !all_same {
117 self.copy_ssa_ref(vec, file.to_warp());
118 }
119 }
120
copy_alu_src( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )121 fn copy_alu_src(
122 &mut self,
123 src: &mut Src,
124 reg_file: RegFile,
125 src_type: SrcType,
126 ) {
127 let val = match src_type {
128 SrcType::GPR
129 | SrcType::ALU
130 | SrcType::F32
131 | SrcType::F16
132 | SrcType::F16v2
133 | SrcType::I32
134 | SrcType::B32 => self.alloc_ssa(reg_file, 1),
135 SrcType::F64 => self.alloc_ssa(reg_file, 2),
136 SrcType::Pred => self.alloc_ssa(reg_file, 1),
137 _ => panic!("Unknown source type"),
138 };
139
140 if DEBUG.annotate() {
141 self.push_instr(Instr::new_boxed(OpAnnotate {
142 annotation: "copy generated by legalizer".into(),
143 }));
144 }
145
146 if val.comps() == 1 {
147 self.copy_to(val.into(), src.src_ref.into());
148 } else {
149 match src.src_ref {
150 SrcRef::Imm32(u) => {
151 // Immediates go in the top bits
152 self.copy_to(val[0].into(), 0.into());
153 self.copy_to(val[1].into(), u.into());
154 }
155 SrcRef::CBuf(cb) => {
156 // CBufs load 8B
157 self.copy_to(val[0].into(), cb.into());
158 self.copy_to(val[1].into(), cb.offset(4).into());
159 }
160 SrcRef::SSA(vec) => {
161 assert!(vec.comps() == 2);
162 self.copy_to(val[0].into(), vec[0].into());
163 self.copy_to(val[1].into(), vec[1].into());
164 }
165 _ => panic!("Invalid 64-bit SrcRef"),
166 }
167 }
168
169 src.src_ref = val.into();
170 }
171
copy_alu_src_if_not_reg( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )172 fn copy_alu_src_if_not_reg(
173 &mut self,
174 src: &mut Src,
175 reg_file: RegFile,
176 src_type: SrcType,
177 ) {
178 if !src_is_reg(src, reg_file) {
179 self.copy_alu_src(src, reg_file, src_type);
180 }
181 }
182
copy_alu_src_if_not_reg_or_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )183 fn copy_alu_src_if_not_reg_or_imm(
184 &mut self,
185 src: &mut Src,
186 reg_file: RegFile,
187 src_type: SrcType,
188 ) {
189 if !src_is_reg(src, reg_file)
190 && !matches!(&src.src_ref, SrcRef::Imm32(_))
191 {
192 self.copy_alu_src(src, reg_file, src_type);
193 }
194 }
195
copy_alu_src_if_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )196 fn copy_alu_src_if_imm(
197 &mut self,
198 src: &mut Src,
199 reg_file: RegFile,
200 src_type: SrcType,
201 ) {
202 if src_is_imm(src) {
203 self.copy_alu_src(src, reg_file, src_type);
204 }
205 }
206
copy_alu_src_if_ineg_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )207 fn copy_alu_src_if_ineg_imm(
208 &mut self,
209 src: &mut Src,
210 reg_file: RegFile,
211 src_type: SrcType,
212 ) {
213 assert!(src_type == SrcType::I32);
214 if src_is_imm(src) && src.src_mod.is_ineg() {
215 self.copy_alu_src(src, reg_file, src_type);
216 }
217 }
218
copy_alu_src_if_both_not_reg( &mut self, src1: &Src, src2: &mut Src, reg_file: RegFile, src_type: SrcType, )219 fn copy_alu_src_if_both_not_reg(
220 &mut self,
221 src1: &Src,
222 src2: &mut Src,
223 reg_file: RegFile,
224 src_type: SrcType,
225 ) {
226 if !src_is_reg(src1, reg_file) && !src_is_reg(src2, reg_file) {
227 self.copy_alu_src(src2, reg_file, src_type);
228 }
229 }
230
copy_alu_src_and_lower_fmod( &mut self, src: &mut Src, src_type: SrcType, )231 fn copy_alu_src_and_lower_fmod(
232 &mut self,
233 src: &mut Src,
234 src_type: SrcType,
235 ) {
236 match src_type {
237 SrcType::F16 | SrcType::F16v2 => {
238 let val = self.alloc_ssa(RegFile::GPR, 1);
239 self.push_op(OpHAdd2 {
240 dst: val.into(),
241 srcs: [Src::new_zero().fneg(), *src],
242 saturate: false,
243 ftz: false,
244 f32: false,
245 });
246 *src = val.into();
247 }
248 SrcType::F32 => {
249 let val = self.alloc_ssa(RegFile::GPR, 1);
250 self.push_op(OpFAdd {
251 dst: val.into(),
252 srcs: [Src::new_zero().fneg(), *src],
253 saturate: false,
254 rnd_mode: FRndMode::NearestEven,
255 ftz: false,
256 });
257 *src = val.into();
258 }
259 SrcType::F64 => {
260 let val = self.alloc_ssa(RegFile::GPR, 2);
261 self.push_op(OpDAdd {
262 dst: val.into(),
263 srcs: [Src::new_zero().fneg(), *src],
264 rnd_mode: FRndMode::NearestEven,
265 });
266 *src = val.into();
267 }
268 _ => panic!("Invalid ffabs srouce type"),
269 }
270 }
271
copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef)272 fn copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef) {
273 for ssa in &mut ssa_ref[..] {
274 if ssa.is_uniform() {
275 let warp = self.alloc_ssa(ssa.file().to_warp(), 1)[0];
276 self.copy_to(warp.into(), (*ssa).into());
277 *ssa = warp;
278 }
279 }
280 }
281 }
282
283 impl LegalizeBuildHelpers for LegalizeBuilder<'_> {}
284
legalize_instr( sm: &dyn ShaderModel, b: &mut LegalizeBuilder, bl: &impl BlockLiveness, block_uniform: bool, pinned: &HashSet<SSARef>, ip: usize, instr: &mut Instr, )285 fn legalize_instr(
286 sm: &dyn ShaderModel,
287 b: &mut LegalizeBuilder,
288 bl: &impl BlockLiveness,
289 block_uniform: bool,
290 pinned: &HashSet<SSARef>,
291 ip: usize,
292 instr: &mut Instr,
293 ) {
294 // Handle a few no-op cases up-front
295 match &instr.op {
296 Op::Undef(_)
297 | Op::PhiSrcs(_)
298 | Op::PhiDsts(_)
299 | Op::Pin(_)
300 | Op::Unpin(_)
301 | Op::RegOut(_) => {
302 // These are implemented by RA and can take pretty much anything
303 // you can throw at them.
304 debug_assert!(instr.pred.is_true());
305 return;
306 }
307 Op::Copy(_) => {
308 // OpCopy is implemented in a lowering pass and can handle anything
309 return;
310 }
311 Op::SrcBar(_) => {
312 // This is turned into a nop by calc_instr_deps
313 return;
314 }
315 Op::Swap(_) | Op::ParCopy(_) => {
316 // These are generated by RA and should not exist yet
317 panic!("Unsupported instruction");
318 }
319 _ => (),
320 }
321
322 if !instr.is_uniform() {
323 b.copy_pred_if_upred(&mut instr.pred);
324 }
325
326 let src_types = instr.src_types();
327 for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
328 *src = src.fold_imm(src_types[i]);
329 b.copy_src_if_not_same_file(src);
330
331 if !block_uniform {
332 // In non-uniform control-flow, we can't collect uniform vectors so
333 // we need to insert copies to warp regs which we can collect.
334 match &mut src.src_ref {
335 SrcRef::SSA(vec) => {
336 if vec.is_uniform()
337 && vec.comps() > 1
338 && !pinned.contains(vec)
339 {
340 b.copy_ssa_ref(vec, vec.file().unwrap().to_warp());
341 }
342 }
343 SrcRef::CBuf(CBufRef {
344 buf: CBuf::BindlessSSA(handle),
345 ..
346 }) => assert!(pinned.contains(handle)),
347 _ => (),
348 }
349 }
350 }
351
352 // OpBreak and OpBSsy impose additional RA constraints
353 match &mut instr.op {
354 Op::Break(OpBreak {
355 bar_in, bar_out, ..
356 })
357 | Op::BSSy(OpBSSy {
358 bar_in, bar_out, ..
359 }) => {
360 let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
361 if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
362 let gpr = b.bmov_to_gpr(*bar_in);
363 let tmp = b.bmov_to_bar(gpr.into());
364 *bar_in = tmp.into();
365 }
366 }
367 _ => (),
368 }
369
370 sm.legalize_op(b, &mut instr.op);
371
372 let mut vec_src_map: HashMap<SSARef, SSARef> = HashMap::new();
373 let mut vec_comps = HashSet::new();
374 for src in instr.srcs_mut() {
375 if let SrcRef::SSA(vec) = &src.src_ref {
376 if vec.comps() == 1 {
377 continue;
378 }
379
380 // If the same vector shows up twice in one instruction, that's
381 // okay. Just make it look the same as the previous source we
382 // fixed up.
383 if let Some(new_vec) = vec_src_map.get(vec) {
384 src.src_ref = (*new_vec).into();
385 continue;
386 }
387
388 let mut new_vec = *vec;
389 for c in 0..vec.comps() {
390 let ssa = vec[usize::from(c)];
391 // If the same SSA value shows up in multiple non-identical
392 // vector sources or as multiple components in the same
393 // source, we need to make a copy so it can get assigned to
394 // multiple different registers.
395 if vec_comps.get(&ssa).is_some() {
396 let copy = b.alloc_ssa(ssa.file(), 1)[0];
397 b.copy_to(copy.into(), ssa.into());
398 new_vec[usize::from(c)] = copy;
399 } else {
400 vec_comps.insert(ssa);
401 }
402 }
403
404 vec_src_map.insert(*vec, new_vec);
405 src.src_ref = new_vec.into();
406 }
407 }
408 }
409
410 impl Shader<'_> {
legalize(&mut self)411 pub fn legalize(&mut self) {
412 let sm = self.sm;
413 for f in &mut self.functions {
414 let live = SimpleLiveness::for_function(f);
415 let mut pinned = HashSet::new();
416
417 for (bi, b) in f.blocks.iter_mut().enumerate() {
418 let bl = live.block_live(bi);
419 let bu = b.uniform;
420
421 let mut instrs = Vec::new();
422 for (ip, mut instr) in b.instrs.drain(..).enumerate() {
423 if let Op::Pin(pin) = &instr.op {
424 if let Dst::SSA(ssa) = &pin.dst {
425 pinned.insert(*ssa);
426 }
427 }
428
429 let mut b = SSAInstrBuilder::new(sm, &mut f.ssa_alloc);
430 legalize_instr(sm, &mut b, bl, bu, &pinned, ip, &mut instr);
431 b.push_instr(instr);
432 instrs.append(&mut b.as_vec());
433 }
434 b.instrs = instrs;
435 }
436 }
437 }
438 }
439