xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/legalize.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7 
8 use std::collections::{HashMap, HashSet};
9 
10 pub type LegalizeBuilder<'a> = SSAInstrBuilder<'a>;
11 
src_is_upred_reg(src: &Src) -> bool12 pub fn src_is_upred_reg(src: &Src) -> bool {
13     match &src.src_ref {
14         SrcRef::True | SrcRef::False => false,
15         SrcRef::SSA(ssa) => {
16             assert!(ssa.comps() == 1);
17             match ssa[0].file() {
18                 RegFile::Pred => false,
19                 RegFile::UPred => true,
20                 _ => panic!("Not a predicate source"),
21             }
22         }
23         SrcRef::Reg(_) => panic!("Not in SSA form"),
24         _ => panic!("Not a predicate source"),
25     }
26 }
27 
src_is_reg(src: &Src, reg_file: RegFile) -> bool28 pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool {
29     match src.src_ref {
30         SrcRef::Zero | SrcRef::True | SrcRef::False => true,
31         SrcRef::SSA(ssa) => ssa.file() == Some(reg_file),
32         SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
33         SrcRef::Reg(_) => panic!("Not in SSA form"),
34     }
35 }
36 
swap_srcs_if_not_reg( x: &mut Src, y: &mut Src, reg_file: RegFile, ) -> bool37 pub fn swap_srcs_if_not_reg(
38     x: &mut Src,
39     y: &mut Src,
40     reg_file: RegFile,
41 ) -> bool {
42     if !src_is_reg(x, reg_file) && src_is_reg(y, reg_file) {
43         std::mem::swap(x, y);
44         true
45     } else {
46         false
47     }
48 }
49 
src_is_imm(src: &Src) -> bool50 fn src_is_imm(src: &Src) -> bool {
51     matches!(src.src_ref, SrcRef::Imm32(_))
52 }
53 
54 pub trait LegalizeBuildHelpers: SSABuilder {
copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile)55     fn copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile) {
56         let tmp = self.alloc_ssa(reg_file, 1)[0];
57         self.copy_to(tmp.into(), (*ssa).into());
58         *ssa = tmp;
59     }
60 
copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile)61     fn copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile) {
62         for ssa in &mut vec[..] {
63             self.copy_ssa(ssa, reg_file);
64         }
65     }
66 
copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue)67     fn copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue) {
68         match ssa.file() {
69             RegFile::Pred => (),
70             RegFile::UPred => self.copy_ssa(ssa, RegFile::Pred),
71             _ => panic!("Not a predicate value"),
72         }
73     }
74 
copy_pred_if_upred(&mut self, pred: &mut Pred)75     fn copy_pred_if_upred(&mut self, pred: &mut Pred) {
76         match &mut pred.pred_ref {
77             PredRef::None => (),
78             PredRef::SSA(ssa) => {
79                 self.copy_pred_ssa_if_uniform(ssa);
80             }
81             PredRef::Reg(_) => panic!("Not in SSA form"),
82         }
83     }
84 
copy_src_if_upred(&mut self, src: &mut Src)85     fn copy_src_if_upred(&mut self, src: &mut Src) {
86         match &mut src.src_ref {
87             SrcRef::True | SrcRef::False => (),
88             SrcRef::SSA(ssa) => {
89                 assert!(ssa.comps() == 1);
90                 self.copy_pred_ssa_if_uniform(&mut ssa[0]);
91             }
92             SrcRef::Reg(_) => panic!("Not in SSA form"),
93             _ => panic!("Not a predicate source"),
94         }
95     }
96 
copy_src_if_not_same_file(&mut self, src: &mut Src)97     fn copy_src_if_not_same_file(&mut self, src: &mut Src) {
98         let SrcRef::SSA(vec) = &mut src.src_ref else {
99             return;
100         };
101 
102         if vec.comps() == 1 {
103             return;
104         }
105 
106         let mut all_same = true;
107         let file = vec[0].file();
108         for i in 1..vec.comps() {
109             let c_file = vec[usize::from(i)].file();
110             if c_file != file {
111                 debug_assert!(c_file.to_warp() == file.to_warp());
112                 all_same = false;
113             }
114         }
115 
116         if !all_same {
117             self.copy_ssa_ref(vec, file.to_warp());
118         }
119     }
120 
copy_alu_src( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )121     fn copy_alu_src(
122         &mut self,
123         src: &mut Src,
124         reg_file: RegFile,
125         src_type: SrcType,
126     ) {
127         let val = match src_type {
128             SrcType::GPR
129             | SrcType::ALU
130             | SrcType::F32
131             | SrcType::F16
132             | SrcType::F16v2
133             | SrcType::I32
134             | SrcType::B32 => self.alloc_ssa(reg_file, 1),
135             SrcType::F64 => self.alloc_ssa(reg_file, 2),
136             SrcType::Pred => self.alloc_ssa(reg_file, 1),
137             _ => panic!("Unknown source type"),
138         };
139 
140         if DEBUG.annotate() {
141             self.push_instr(Instr::new_boxed(OpAnnotate {
142                 annotation: "copy generated by legalizer".into(),
143             }));
144         }
145 
146         if val.comps() == 1 {
147             self.copy_to(val.into(), src.src_ref.into());
148         } else {
149             match src.src_ref {
150                 SrcRef::Imm32(u) => {
151                     // Immediates go in the top bits
152                     self.copy_to(val[0].into(), 0.into());
153                     self.copy_to(val[1].into(), u.into());
154                 }
155                 SrcRef::CBuf(cb) => {
156                     // CBufs load 8B
157                     self.copy_to(val[0].into(), cb.into());
158                     self.copy_to(val[1].into(), cb.offset(4).into());
159                 }
160                 SrcRef::SSA(vec) => {
161                     assert!(vec.comps() == 2);
162                     self.copy_to(val[0].into(), vec[0].into());
163                     self.copy_to(val[1].into(), vec[1].into());
164                 }
165                 _ => panic!("Invalid 64-bit SrcRef"),
166             }
167         }
168 
169         src.src_ref = val.into();
170     }
171 
copy_alu_src_if_not_reg( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )172     fn copy_alu_src_if_not_reg(
173         &mut self,
174         src: &mut Src,
175         reg_file: RegFile,
176         src_type: SrcType,
177     ) {
178         if !src_is_reg(src, reg_file) {
179             self.copy_alu_src(src, reg_file, src_type);
180         }
181     }
182 
copy_alu_src_if_not_reg_or_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )183     fn copy_alu_src_if_not_reg_or_imm(
184         &mut self,
185         src: &mut Src,
186         reg_file: RegFile,
187         src_type: SrcType,
188     ) {
189         if !src_is_reg(src, reg_file)
190             && !matches!(&src.src_ref, SrcRef::Imm32(_))
191         {
192             self.copy_alu_src(src, reg_file, src_type);
193         }
194     }
195 
copy_alu_src_if_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )196     fn copy_alu_src_if_imm(
197         &mut self,
198         src: &mut Src,
199         reg_file: RegFile,
200         src_type: SrcType,
201     ) {
202         if src_is_imm(src) {
203             self.copy_alu_src(src, reg_file, src_type);
204         }
205     }
206 
copy_alu_src_if_ineg_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )207     fn copy_alu_src_if_ineg_imm(
208         &mut self,
209         src: &mut Src,
210         reg_file: RegFile,
211         src_type: SrcType,
212     ) {
213         assert!(src_type == SrcType::I32);
214         if src_is_imm(src) && src.src_mod.is_ineg() {
215             self.copy_alu_src(src, reg_file, src_type);
216         }
217     }
218 
copy_alu_src_if_both_not_reg( &mut self, src1: &Src, src2: &mut Src, reg_file: RegFile, src_type: SrcType, )219     fn copy_alu_src_if_both_not_reg(
220         &mut self,
221         src1: &Src,
222         src2: &mut Src,
223         reg_file: RegFile,
224         src_type: SrcType,
225     ) {
226         if !src_is_reg(src1, reg_file) && !src_is_reg(src2, reg_file) {
227             self.copy_alu_src(src2, reg_file, src_type);
228         }
229     }
230 
copy_alu_src_and_lower_fmod( &mut self, src: &mut Src, src_type: SrcType, )231     fn copy_alu_src_and_lower_fmod(
232         &mut self,
233         src: &mut Src,
234         src_type: SrcType,
235     ) {
236         match src_type {
237             SrcType::F16 | SrcType::F16v2 => {
238                 let val = self.alloc_ssa(RegFile::GPR, 1);
239                 self.push_op(OpHAdd2 {
240                     dst: val.into(),
241                     srcs: [Src::new_zero().fneg(), *src],
242                     saturate: false,
243                     ftz: false,
244                     f32: false,
245                 });
246                 *src = val.into();
247             }
248             SrcType::F32 => {
249                 let val = self.alloc_ssa(RegFile::GPR, 1);
250                 self.push_op(OpFAdd {
251                     dst: val.into(),
252                     srcs: [Src::new_zero().fneg(), *src],
253                     saturate: false,
254                     rnd_mode: FRndMode::NearestEven,
255                     ftz: false,
256                 });
257                 *src = val.into();
258             }
259             SrcType::F64 => {
260                 let val = self.alloc_ssa(RegFile::GPR, 2);
261                 self.push_op(OpDAdd {
262                     dst: val.into(),
263                     srcs: [Src::new_zero().fneg(), *src],
264                     rnd_mode: FRndMode::NearestEven,
265                 });
266                 *src = val.into();
267             }
268             _ => panic!("Invalid ffabs srouce type"),
269         }
270     }
271 
copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef)272     fn copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef) {
273         for ssa in &mut ssa_ref[..] {
274             if ssa.is_uniform() {
275                 let warp = self.alloc_ssa(ssa.file().to_warp(), 1)[0];
276                 self.copy_to(warp.into(), (*ssa).into());
277                 *ssa = warp;
278             }
279         }
280     }
281 }
282 
283 impl LegalizeBuildHelpers for LegalizeBuilder<'_> {}
284 
legalize_instr( sm: &dyn ShaderModel, b: &mut LegalizeBuilder, bl: &impl BlockLiveness, block_uniform: bool, pinned: &HashSet<SSARef>, ip: usize, instr: &mut Instr, )285 fn legalize_instr(
286     sm: &dyn ShaderModel,
287     b: &mut LegalizeBuilder,
288     bl: &impl BlockLiveness,
289     block_uniform: bool,
290     pinned: &HashSet<SSARef>,
291     ip: usize,
292     instr: &mut Instr,
293 ) {
294     // Handle a few no-op cases up-front
295     match &instr.op {
296         Op::Undef(_)
297         | Op::PhiSrcs(_)
298         | Op::PhiDsts(_)
299         | Op::Pin(_)
300         | Op::Unpin(_)
301         | Op::RegOut(_) => {
302             // These are implemented by RA and can take pretty much anything
303             // you can throw at them.
304             debug_assert!(instr.pred.is_true());
305             return;
306         }
307         Op::Copy(_) => {
308             // OpCopy is implemented in a lowering pass and can handle anything
309             return;
310         }
311         Op::SrcBar(_) => {
312             // This is turned into a nop by calc_instr_deps
313             return;
314         }
315         Op::Swap(_) | Op::ParCopy(_) => {
316             // These are generated by RA and should not exist yet
317             panic!("Unsupported instruction");
318         }
319         _ => (),
320     }
321 
322     if !instr.is_uniform() {
323         b.copy_pred_if_upred(&mut instr.pred);
324     }
325 
326     let src_types = instr.src_types();
327     for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
328         *src = src.fold_imm(src_types[i]);
329         b.copy_src_if_not_same_file(src);
330 
331         if !block_uniform {
332             // In non-uniform control-flow, we can't collect uniform vectors so
333             // we need to insert copies to warp regs which we can collect.
334             match &mut src.src_ref {
335                 SrcRef::SSA(vec) => {
336                     if vec.is_uniform()
337                         && vec.comps() > 1
338                         && !pinned.contains(vec)
339                     {
340                         b.copy_ssa_ref(vec, vec.file().unwrap().to_warp());
341                     }
342                 }
343                 SrcRef::CBuf(CBufRef {
344                     buf: CBuf::BindlessSSA(handle),
345                     ..
346                 }) => assert!(pinned.contains(handle)),
347                 _ => (),
348             }
349         }
350     }
351 
352     // OpBreak and OpBSsy impose additional RA constraints
353     match &mut instr.op {
354         Op::Break(OpBreak {
355             bar_in, bar_out, ..
356         })
357         | Op::BSSy(OpBSSy {
358             bar_in, bar_out, ..
359         }) => {
360             let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
361             if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
362                 let gpr = b.bmov_to_gpr(*bar_in);
363                 let tmp = b.bmov_to_bar(gpr.into());
364                 *bar_in = tmp.into();
365             }
366         }
367         _ => (),
368     }
369 
370     sm.legalize_op(b, &mut instr.op);
371 
372     let mut vec_src_map: HashMap<SSARef, SSARef> = HashMap::new();
373     let mut vec_comps = HashSet::new();
374     for src in instr.srcs_mut() {
375         if let SrcRef::SSA(vec) = &src.src_ref {
376             if vec.comps() == 1 {
377                 continue;
378             }
379 
380             // If the same vector shows up twice in one instruction, that's
381             // okay. Just make it look the same as the previous source we
382             // fixed up.
383             if let Some(new_vec) = vec_src_map.get(vec) {
384                 src.src_ref = (*new_vec).into();
385                 continue;
386             }
387 
388             let mut new_vec = *vec;
389             for c in 0..vec.comps() {
390                 let ssa = vec[usize::from(c)];
391                 // If the same SSA value shows up in multiple non-identical
392                 // vector sources or as multiple components in the same
393                 // source, we need to make a copy so it can get assigned to
394                 // multiple different registers.
395                 if vec_comps.get(&ssa).is_some() {
396                     let copy = b.alloc_ssa(ssa.file(), 1)[0];
397                     b.copy_to(copy.into(), ssa.into());
398                     new_vec[usize::from(c)] = copy;
399                 } else {
400                     vec_comps.insert(ssa);
401                 }
402             }
403 
404             vec_src_map.insert(*vec, new_vec);
405             src.src_ref = new_vec.into();
406         }
407     }
408 }
409 
410 impl Shader<'_> {
legalize(&mut self)411     pub fn legalize(&mut self) {
412         let sm = self.sm;
413         for f in &mut self.functions {
414             let live = SimpleLiveness::for_function(f);
415             let mut pinned = HashSet::new();
416 
417             for (bi, b) in f.blocks.iter_mut().enumerate() {
418                 let bl = live.block_live(bi);
419                 let bu = b.uniform;
420 
421                 let mut instrs = Vec::new();
422                 for (ip, mut instr) in b.instrs.drain(..).enumerate() {
423                     if let Op::Pin(pin) = &instr.op {
424                         if let Dst::SSA(ssa) = &pin.dst {
425                             pinned.insert(*ssa);
426                         }
427                     }
428 
429                     let mut b = SSAInstrBuilder::new(sm, &mut f.ssa_alloc);
430                     legalize_instr(sm, &mut b, bl, bu, &pinned, ip, &mut instr);
431                     b.push_instr(instr);
432                     instrs.append(&mut b.as_vec());
433                 }
434                 b.instrs = instrs;
435             }
436         }
437     }
438 }
439