xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/opt_lop.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::ir::*;
5 
6 use std::collections::HashMap;
7 use std::slice;
8 
9 struct LopEntry {
10     op: LogicOp3,
11     srcs_used: u8,
12     srcs: [Src; 3],
13 }
14 
15 struct LopPass {
16     use_counts: HashMap<SSAValue, u32>,
17     ssa_lop: HashMap<SSAValue, LopEntry>,
18 }
19 
src_as_bool(src: &Src) -> Option<bool>20 fn src_as_bool(src: &Src) -> Option<bool> {
21     assert!(src.src_mod.is_none());
22     match src.src_ref {
23         SrcRef::Zero | SrcRef::False | SrcRef::Imm32(0) => Some(false),
24         SrcRef::True | SrcRef::Imm32(u32::MAX) => Some(true),
25         _ => None,
26     }
27 }
28 
29 impl LopPass {
new(f: &Function) -> LopPass30     fn new(f: &Function) -> LopPass {
31         let mut use_counts = HashMap::new();
32         for b in &f.blocks {
33             for instr in &b.instrs {
34                 if let PredRef::SSA(ssa) = instr.pred.pred_ref {
35                     use_counts.entry(ssa).and_modify(|e| *e += 1).or_insert(1);
36                 }
37 
38                 for src in instr.srcs() {
39                     if let SrcRef::SSA(vec) = src.src_ref {
40                         for ssa in vec.iter() {
41                             use_counts
42                                 .entry(*ssa)
43                                 .and_modify(|e| *e += 1)
44                                 .or_insert(1);
45                         }
46                     }
47                 }
48             }
49         }
50         LopPass {
51             use_counts: use_counts,
52             ssa_lop: HashMap::new(),
53         }
54     }
55 
add_lop(&mut self, ssa: SSAValue, op: LogicOp3, srcs: [Src; 3])56     fn add_lop(&mut self, ssa: SSAValue, op: LogicOp3, srcs: [Src; 3]) {
57         let mut srcs_used = 0;
58         for i in 0..3 {
59             if op.src_used(i) {
60                 srcs_used |= 1 << i;
61                 assert!(src_as_bool(&srcs[i]).is_none());
62             }
63         }
64         let entry = LopEntry {
65             op: op,
66             srcs_used: srcs_used,
67             srcs: srcs,
68         };
69         self.ssa_lop.insert(ssa, entry);
70     }
71 
dedup_srcs(&self, op: &mut LogicOp3, srcs: &[Src; 3])72     fn dedup_srcs(&self, op: &mut LogicOp3, srcs: &[Src; 3]) {
73         for i in 0..2 {
74             for j in (i + 1)..3 {
75                 if srcs[i].src_ref == srcs[j].src_ref {
76                     *op = LogicOp3::new_lut(&|x, y, z| {
77                         let dup = [x, y, z][i];
78                         let si = match srcs[i].src_mod {
79                             SrcMod::None => dup,
80                             SrcMod::BNot => !dup,
81                             _ => panic!("Not a bitwise modifer"),
82                         };
83                         let sj = match srcs[j].src_mod {
84                             SrcMod::None => dup,
85                             SrcMod::BNot => !dup,
86                             _ => panic!("Not a bitwise modifer"),
87                         };
88 
89                         let mut s = [x, y, z];
90                         s[i] = si;
91                         s[j] = sj;
92 
93                         op.eval(s[0], s[1], s[2])
94                     });
95                 }
96             }
97         }
98     }
99 
try_prop_to_src( &self, ops: &mut [LogicOp3], srcs: &mut [Src; 3], src_idx: usize, )100     fn try_prop_to_src(
101         &self,
102         ops: &mut [LogicOp3],
103         srcs: &mut [Src; 3],
104         src_idx: usize,
105     ) {
106         loop {
107             assert!(srcs[src_idx].src_mod.is_none());
108             let ssa = match srcs[src_idx].src_ref {
109                 SrcRef::SSA(vec) => {
110                     assert!(vec.comps() == 1);
111                     vec[0]
112                 }
113                 _ => return,
114             };
115 
116             let Some(entry) = self.ssa_lop.get(&ssa) else {
117                 return;
118             };
119 
120             let entry_use_count = *self.use_counts.get(&ssa).unwrap();
121             if entry.srcs_used.count_ones() > 1 && entry_use_count > 1 {
122                 return;
123             }
124 
125             let mut entry_srcs = [usize::MAX; 3];
126             let mut next_src = 0_usize;
127             for i in 0..3 {
128                 if entry.srcs_used & (1 << i) == 0 {
129                     continue;
130                 }
131 
132                 let mut found = false;
133                 for j in 0..3 {
134                     if entry.srcs[i].src_ref == srcs[j].src_ref {
135                         entry_srcs[i] = j;
136                         found = true;
137                         break;
138                     }
139                 }
140                 if found {
141                     continue;
142                 }
143 
144                 loop {
145                     if next_src >= srcs.len() {
146                         return;
147                     }
148 
149                     // All callers of this function need to ensure that
150                     // constant sources are already folded so we know we
151                     // can always re-use them.
152                     if next_src == src_idx
153                         || src_as_bool(&srcs[next_src]).is_some()
154                     {
155                         entry_srcs[i] = next_src;
156                         next_src += 1;
157                         break;
158                     }
159                     next_src += 1;
160                 }
161             }
162 
163             // Clear out the propagated source. What we put here doesn't matter
164             // since it's no longer used.  It may be overwritten by one of the
165             // entry sources but there is no guarantee of this.
166             srcs[src_idx] = match ssa.file() {
167                 RegFile::GPR | RegFile::UGPR => SrcRef::Zero.into(),
168                 RegFile::Pred | RegFile::UPred => SrcRef::True.into(),
169                 RegFile::Carry | RegFile::Bar | RegFile::Mem => {
170                     panic!("Not a normal register");
171                 }
172             };
173 
174             for i in 0..3 {
175                 if entry_srcs[i] != usize::MAX {
176                     srcs[entry_srcs[i]] = entry.srcs[i];
177                 }
178             }
179             for op in ops.iter_mut() {
180                 *op = LogicOp3::new_lut(&|x, y, z| {
181                     let mut s = [x, y, z];
182                     let mut es = [0; 3];
183                     for i in 0..3 {
184                         if entry_srcs[i] != usize::MAX {
185                             es[i] = s[entry_srcs[i]];
186                         }
187                     }
188                     let e = entry.op.eval(es[0], es[1], es[2]);
189                     s[src_idx] = e;
190                     op.eval(s[0], s[1], s[2])
191                 });
192             }
193         }
194     }
195 
opt_lop3(&mut self, op: &mut OpLop3)196     fn opt_lop3(&mut self, op: &mut OpLop3) {
197         self.dedup_srcs(&mut op.op, &op.srcs);
198 
199         for (i, src) in op.srcs.iter_mut().enumerate() {
200             assert!(src.src_mod.is_none());
201 
202             if let Some(b) = src_as_bool(src) {
203                 op.op.fix_src(i, b);
204             }
205 
206             if !op.op.src_used(i) {
207                 // Replace unused sources with RZ
208                 *src = SrcRef::Zero.into();
209             }
210         }
211 
212         for i in 0..3 {
213             self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i);
214         }
215 
216         if let Dst::SSA(ssa) = op.dst {
217             assert!(ssa.comps() == 1);
218             self.add_lop(ssa[0], op.op, op.srcs);
219         }
220     }
221 
opt_plop3(&mut self, op: &mut OpPLop3)222     fn opt_plop3(&mut self, op: &mut OpPLop3) {
223         self.dedup_srcs(&mut op.ops[0], &op.srcs);
224         self.dedup_srcs(&mut op.ops[1], &op.srcs);
225 
226         // Replace unused sources with PT
227         for (i, src) in op.srcs.iter_mut().enumerate() {
228             if src.src_mod.is_bnot() {
229                 op.ops[0].invert_src(i);
230                 op.ops[1].invert_src(i);
231                 src.src_mod = SrcMod::None;
232             }
233 
234             if let Some(b) = src_as_bool(src) {
235                 op.ops[0].fix_src(i, b);
236                 op.ops[1].fix_src(i, b);
237             }
238 
239             if !op.ops[0].src_used(i) && !op.ops[1].src_used(i) {
240                 *src = SrcRef::True.into();
241             }
242         }
243 
244         for i in 0..3 {
245             self.try_prop_to_src(&mut op.ops, &mut op.srcs, i);
246         }
247 
248         for i in 0..2 {
249             if let Dst::SSA(ssa) = op.dsts[i] {
250                 assert!(ssa.comps() == 1);
251                 self.add_lop(ssa[0], op.ops[i], op.srcs);
252             }
253         }
254     }
255 
run(&mut self, f: &mut Function)256     fn run(&mut self, f: &mut Function) {
257         for b in &mut f.blocks {
258             for instr in &mut b.instrs {
259                 match &mut instr.op {
260                     Op::Lop3(op) => self.opt_lop3(op),
261                     Op::PLop3(op) => self.opt_plop3(op),
262                     _ => (),
263                 }
264             }
265         }
266     }
267 }
268 
269 impl Shader<'_> {
opt_lop(&mut self)270     pub fn opt_lop(&mut self) {
271         for f in &mut self.functions {
272             let mut pass = LopPass::new(f);
273             pass.run(f);
274         }
275     }
276 }
277