xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/lower_copy_swap.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::{
5     api::{GetDebugFlags, DEBUG},
6     ir::*,
7 };
8 
9 use std::cmp::max;
10 
11 struct LowerCopySwap {
12     slm_start: u32,
13     slm_size: u32,
14 }
15 
16 impl LowerCopySwap {
new(slm_size: u32) -> Self17     fn new(slm_size: u32) -> Self {
18         Self {
19             slm_start: slm_size,
20             slm_size: slm_size,
21         }
22     }
23 
lower_copy(&mut self, b: &mut impl Builder, copy: OpCopy)24     fn lower_copy(&mut self, b: &mut impl Builder, copy: OpCopy) {
25         let dst_reg = copy.dst.as_reg().unwrap();
26         assert!(dst_reg.comps() == 1);
27         assert!(copy.src.src_mod.is_none());
28         assert!(copy.src.is_uniform() || !dst_reg.is_uniform());
29 
30         match dst_reg.file() {
31             RegFile::GPR | RegFile::UGPR => match copy.src.src_ref {
32                 SrcRef::Zero | SrcRef::Imm32(_) => {
33                     b.push_op(OpMov {
34                         dst: copy.dst,
35                         src: copy.src,
36                         quad_lanes: 0xf,
37                     });
38                 }
39                 SrcRef::CBuf(_) => match dst_reg.file() {
40                     RegFile::GPR => {
41                         b.push_op(OpMov {
42                             dst: copy.dst,
43                             src: copy.src,
44                             quad_lanes: 0xf,
45                         });
46                     }
47                     RegFile::UGPR => {
48                         b.push_op(OpLdc {
49                             dst: copy.dst,
50                             cb: copy.src,
51                             offset: 0.into(),
52                             mode: LdcMode::Indexed,
53                             mem_type: MemType::B32,
54                         });
55                     }
56                     _ => panic!("Invalid cbuf destination"),
57                 },
58                 SrcRef::True | SrcRef::False => {
59                     panic!("Cannot copy to GPR");
60                 }
61                 SrcRef::Reg(src_reg) => match src_reg.file() {
62                     RegFile::GPR | RegFile::UGPR => {
63                         b.push_op(OpMov {
64                             dst: copy.dst,
65                             src: copy.src,
66                             quad_lanes: 0xf,
67                         });
68                     }
69                     RegFile::Bar => {
70                         b.push_op(OpBMov {
71                             dst: copy.dst,
72                             src: copy.src,
73                             clear: false,
74                         });
75                     }
76                     RegFile::Mem => {
77                         let access = MemAccess {
78                             mem_type: MemType::B32,
79                             space: MemSpace::Local,
80                             order: MemOrder::Strong(MemScope::CTA),
81                             eviction_priority: MemEvictionPriority::Normal,
82                         };
83                         let addr = self.slm_start + src_reg.base_idx() * 4;
84                         self.slm_size = max(self.slm_size, addr + 4);
85                         b.push_op(OpLd {
86                             dst: copy.dst,
87                             addr: Src::new_zero(),
88                             offset: addr.try_into().unwrap(),
89                             access: access,
90                         });
91                     }
92                     _ => panic!("Cannot copy to GPR"),
93                 },
94                 SrcRef::SSA(_) => panic!("Should be run after RA"),
95             },
96             RegFile::Pred | RegFile::UPred => match copy.src.src_ref {
97                 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => {
98                     panic!("Cannot copy to Pred");
99                 }
100                 SrcRef::True => {
101                     b.lop2_to(
102                         copy.dst,
103                         LogicOp2::PassB,
104                         Src::new_imm_bool(true),
105                         Src::new_imm_bool(true),
106                     );
107                 }
108                 SrcRef::False => {
109                     b.lop2_to(
110                         copy.dst,
111                         LogicOp2::PassB,
112                         Src::new_imm_bool(true),
113                         Src::new_imm_bool(false),
114                     );
115                 }
116                 SrcRef::Reg(src_reg) => match src_reg.file() {
117                     RegFile::Pred => {
118                         b.lop2_to(
119                             copy.dst,
120                             LogicOp2::PassB,
121                             Src::new_imm_bool(true),
122                             copy.src,
123                         );
124                     }
125                     RegFile::UPred => {
126                         // PLOP3 supports a UPred in src[2]
127                         b.push_op(OpPLop3 {
128                             dsts: [copy.dst, Dst::None],
129                             srcs: [true.into(), true.into(), copy.src],
130                             ops: [
131                                 LogicOp3::new_lut(&|_, _, z| z),
132                                 LogicOp3::new_const(false),
133                             ],
134                         });
135                     }
136                     _ => panic!("Cannot copy to Pred"),
137                 },
138                 SrcRef::SSA(_) => panic!("Should be run after RA"),
139             },
140             RegFile::Bar => match copy.src.src_ref {
141                 SrcRef::Reg(src_reg) => match src_reg.file() {
142                     RegFile::GPR | RegFile::UGPR => {
143                         b.push_op(OpBMov {
144                             dst: copy.dst,
145                             src: copy.src,
146                             clear: false,
147                         });
148                     }
149                     _ => panic!("Cannot copy to Bar"),
150                 },
151                 _ => panic!("Cannot copy to Bar"),
152             },
153             RegFile::Mem => match copy.src.src_ref {
154                 SrcRef::Reg(src_reg) => match src_reg.file() {
155                     RegFile::GPR => {
156                         let access = MemAccess {
157                             mem_type: MemType::B32,
158                             space: MemSpace::Local,
159                             order: MemOrder::Strong(MemScope::CTA),
160                             eviction_priority: MemEvictionPriority::Normal,
161                         };
162                         let addr = self.slm_start + dst_reg.base_idx() * 4;
163                         self.slm_size = max(self.slm_size, addr + 4);
164                         b.push_op(OpSt {
165                             addr: Src::new_zero(),
166                             data: copy.src,
167                             offset: addr.try_into().unwrap(),
168                             access: access,
169                         });
170                     }
171                     _ => panic!("Cannot copy to Mem"),
172                 },
173                 _ => panic!("Cannot copy to Mem"),
174             },
175             _ => panic!("Unhandled register file"),
176         }
177     }
178 
lower_r2ur(&mut self, b: &mut impl Builder, r2ur: OpR2UR)179     fn lower_r2ur(&mut self, b: &mut impl Builder, r2ur: OpR2UR) {
180         assert!(r2ur.src.src_mod.is_none());
181         if r2ur.src.is_uniform() {
182             let copy = OpCopy {
183                 dst: r2ur.dst,
184                 src: r2ur.src,
185             };
186             self.lower_copy(b, copy);
187         } else {
188             let src_file = r2ur.src.src_ref.as_reg().unwrap().file();
189             let dst_file = r2ur.dst.as_reg().unwrap().file();
190             match src_file {
191                 RegFile::GPR => {
192                     assert!(dst_file == RegFile::UGPR);
193                     b.push_op(r2ur);
194                 }
195                 RegFile::Pred => {
196                     assert!(dst_file == RegFile::UPred);
197                     // It doesn't matter what channel we take
198                     b.push_op(OpVote {
199                         op: VoteOp::Any,
200                         ballot: Dst::None,
201                         vote: r2ur.dst,
202                         pred: r2ur.src,
203                     });
204                 }
205                 _ => panic!("No matching uniform register file"),
206             }
207         }
208     }
209 
lower_swap(&mut self, b: &mut impl Builder, swap: OpSwap)210     fn lower_swap(&mut self, b: &mut impl Builder, swap: OpSwap) {
211         let x = *swap.dsts[0].as_reg().unwrap();
212         let y = *swap.dsts[1].as_reg().unwrap();
213 
214         assert!(x.file() == y.file());
215         assert!(x.file() != RegFile::Mem);
216         assert!(x.comps() == 1 && y.comps() == 1);
217         assert!(swap.srcs[0].src_mod.is_none());
218         assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y);
219         assert!(swap.srcs[1].src_mod.is_none());
220         assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x);
221 
222         if x == y {
223             // Nothing to do
224         } else if x.is_predicate() && b.sm() >= 70 {
225             b.push_op(OpPLop3 {
226                 dsts: [x.into(), y.into()],
227                 srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
228                 ops: [
229                     LogicOp3::new_lut(&|_, y, _| y),
230                     LogicOp3::new_lut(&|x, _, _| x),
231                 ],
232             });
233         } else {
234             b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
235             b.lop2_to(y.into(), LogicOp2::Xor, x.into(), y.into());
236             b.lop2_to(x.into(), LogicOp2::Xor, x.into(), y.into());
237         }
238     }
239 
run(&mut self, s: &mut Shader)240     fn run(&mut self, s: &mut Shader) {
241         let sm = s.sm;
242         s.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
243             match instr.op {
244                 Op::R2UR(r2ur) => {
245                     debug_assert!(instr.pred.is_true());
246                     let mut b = InstrBuilder::new(sm);
247                     if DEBUG.annotate() {
248                         b.push_instr(Instr::new_boxed(OpAnnotate {
249                             annotation: "r2ur lowered by lower_copy_swap"
250                                 .into(),
251                         }));
252                     }
253                     self.lower_r2ur(&mut b, r2ur);
254                     b.as_mapped_instrs()
255                 }
256                 Op::Copy(copy) => {
257                     debug_assert!(instr.pred.is_true());
258                     let mut b = InstrBuilder::new(sm);
259                     if DEBUG.annotate() {
260                         b.push_instr(Instr::new_boxed(OpAnnotate {
261                             annotation: "copy lowered by lower_copy_swap"
262                                 .into(),
263                         }));
264                     }
265                     self.lower_copy(&mut b, copy);
266                     b.as_mapped_instrs()
267                 }
268                 Op::Swap(swap) => {
269                     debug_assert!(instr.pred.is_true());
270                     let mut b = InstrBuilder::new(sm);
271                     if DEBUG.annotate() {
272                         b.push_instr(Instr::new_boxed(OpAnnotate {
273                             annotation: "swap lowered by lower_copy_swap"
274                                 .into(),
275                         }));
276                     }
277                     self.lower_swap(&mut b, swap);
278                     b.as_mapped_instrs()
279                 }
280                 _ => MappedInstrs::One(instr),
281             }
282         });
283     }
284 }
285 
286 impl Shader<'_> {
lower_copy_swap(&mut self)287     pub fn lower_copy_swap(&mut self) {
288         let mut pass = LowerCopySwap::new(self.info.slm_size);
289         pass.run(self);
290         self.info.slm_size = pass.slm_size;
291     }
292 }
293