1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::ir::*;
5
6 use std::collections::HashMap;
7 use std::slice;
8
9 struct LopEntry {
10 op: LogicOp3,
11 srcs_used: u8,
12 srcs: [Src; 3],
13 }
14
15 struct LopPass {
16 use_counts: HashMap<SSAValue, u32>,
17 ssa_lop: HashMap<SSAValue, LopEntry>,
18 }
19
src_as_bool(src: &Src) -> Option<bool>20 fn src_as_bool(src: &Src) -> Option<bool> {
21 assert!(src.src_mod.is_none());
22 match src.src_ref {
23 SrcRef::Zero | SrcRef::False | SrcRef::Imm32(0) => Some(false),
24 SrcRef::True | SrcRef::Imm32(u32::MAX) => Some(true),
25 _ => None,
26 }
27 }
28
29 impl LopPass {
new(f: &Function) -> LopPass30 fn new(f: &Function) -> LopPass {
31 let mut use_counts = HashMap::new();
32 for b in &f.blocks {
33 for instr in &b.instrs {
34 if let PredRef::SSA(ssa) = instr.pred.pred_ref {
35 use_counts.entry(ssa).and_modify(|e| *e += 1).or_insert(1);
36 }
37
38 for src in instr.srcs() {
39 if let SrcRef::SSA(vec) = src.src_ref {
40 for ssa in vec.iter() {
41 use_counts
42 .entry(*ssa)
43 .and_modify(|e| *e += 1)
44 .or_insert(1);
45 }
46 }
47 }
48 }
49 }
50 LopPass {
51 use_counts: use_counts,
52 ssa_lop: HashMap::new(),
53 }
54 }
55
add_lop(&mut self, ssa: SSAValue, op: LogicOp3, srcs: [Src; 3])56 fn add_lop(&mut self, ssa: SSAValue, op: LogicOp3, srcs: [Src; 3]) {
57 let mut srcs_used = 0;
58 for i in 0..3 {
59 if op.src_used(i) {
60 srcs_used |= 1 << i;
61 assert!(src_as_bool(&srcs[i]).is_none());
62 }
63 }
64 let entry = LopEntry {
65 op: op,
66 srcs_used: srcs_used,
67 srcs: srcs,
68 };
69 self.ssa_lop.insert(ssa, entry);
70 }
71
dedup_srcs(&self, op: &mut LogicOp3, srcs: &[Src; 3])72 fn dedup_srcs(&self, op: &mut LogicOp3, srcs: &[Src; 3]) {
73 for i in 0..2 {
74 for j in (i + 1)..3 {
75 if srcs[i].src_ref == srcs[j].src_ref {
76 *op = LogicOp3::new_lut(&|x, y, z| {
77 let dup = [x, y, z][i];
78 let si = match srcs[i].src_mod {
79 SrcMod::None => dup,
80 SrcMod::BNot => !dup,
81 _ => panic!("Not a bitwise modifer"),
82 };
83 let sj = match srcs[j].src_mod {
84 SrcMod::None => dup,
85 SrcMod::BNot => !dup,
86 _ => panic!("Not a bitwise modifer"),
87 };
88
89 let mut s = [x, y, z];
90 s[i] = si;
91 s[j] = sj;
92
93 op.eval(s[0], s[1], s[2])
94 });
95 }
96 }
97 }
98 }
99
try_prop_to_src( &self, ops: &mut [LogicOp3], srcs: &mut [Src; 3], src_idx: usize, )100 fn try_prop_to_src(
101 &self,
102 ops: &mut [LogicOp3],
103 srcs: &mut [Src; 3],
104 src_idx: usize,
105 ) {
106 loop {
107 assert!(srcs[src_idx].src_mod.is_none());
108 let ssa = match srcs[src_idx].src_ref {
109 SrcRef::SSA(vec) => {
110 assert!(vec.comps() == 1);
111 vec[0]
112 }
113 _ => return,
114 };
115
116 let Some(entry) = self.ssa_lop.get(&ssa) else {
117 return;
118 };
119
120 let entry_use_count = *self.use_counts.get(&ssa).unwrap();
121 if entry.srcs_used.count_ones() > 1 && entry_use_count > 1 {
122 return;
123 }
124
125 let mut entry_srcs = [usize::MAX; 3];
126 let mut next_src = 0_usize;
127 for i in 0..3 {
128 if entry.srcs_used & (1 << i) == 0 {
129 continue;
130 }
131
132 let mut found = false;
133 for j in 0..3 {
134 if entry.srcs[i].src_ref == srcs[j].src_ref {
135 entry_srcs[i] = j;
136 found = true;
137 break;
138 }
139 }
140 if found {
141 continue;
142 }
143
144 loop {
145 if next_src >= srcs.len() {
146 return;
147 }
148
149 // All callers of this function need to ensure that
150 // constant sources are already folded so we know we
151 // can always re-use them.
152 if next_src == src_idx
153 || src_as_bool(&srcs[next_src]).is_some()
154 {
155 entry_srcs[i] = next_src;
156 next_src += 1;
157 break;
158 }
159 next_src += 1;
160 }
161 }
162
163 // Clear out the propagated source. What we put here doesn't matter
164 // since it's no longer used. It may be overwritten by one of the
165 // entry sources but there is no guarantee of this.
166 srcs[src_idx] = match ssa.file() {
167 RegFile::GPR | RegFile::UGPR => SrcRef::Zero.into(),
168 RegFile::Pred | RegFile::UPred => SrcRef::True.into(),
169 RegFile::Carry | RegFile::Bar | RegFile::Mem => {
170 panic!("Not a normal register");
171 }
172 };
173
174 for i in 0..3 {
175 if entry_srcs[i] != usize::MAX {
176 srcs[entry_srcs[i]] = entry.srcs[i];
177 }
178 }
179 for op in ops.iter_mut() {
180 *op = LogicOp3::new_lut(&|x, y, z| {
181 let mut s = [x, y, z];
182 let mut es = [0; 3];
183 for i in 0..3 {
184 if entry_srcs[i] != usize::MAX {
185 es[i] = s[entry_srcs[i]];
186 }
187 }
188 let e = entry.op.eval(es[0], es[1], es[2]);
189 s[src_idx] = e;
190 op.eval(s[0], s[1], s[2])
191 });
192 }
193 }
194 }
195
opt_lop3(&mut self, op: &mut OpLop3)196 fn opt_lop3(&mut self, op: &mut OpLop3) {
197 self.dedup_srcs(&mut op.op, &op.srcs);
198
199 for (i, src) in op.srcs.iter_mut().enumerate() {
200 assert!(src.src_mod.is_none());
201
202 if let Some(b) = src_as_bool(src) {
203 op.op.fix_src(i, b);
204 }
205
206 if !op.op.src_used(i) {
207 // Replace unused sources with RZ
208 *src = SrcRef::Zero.into();
209 }
210 }
211
212 for i in 0..3 {
213 self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i);
214 }
215
216 if let Dst::SSA(ssa) = op.dst {
217 assert!(ssa.comps() == 1);
218 self.add_lop(ssa[0], op.op, op.srcs);
219 }
220 }
221
opt_plop3(&mut self, op: &mut OpPLop3)222 fn opt_plop3(&mut self, op: &mut OpPLop3) {
223 self.dedup_srcs(&mut op.ops[0], &op.srcs);
224 self.dedup_srcs(&mut op.ops[1], &op.srcs);
225
226 // Replace unused sources with PT
227 for (i, src) in op.srcs.iter_mut().enumerate() {
228 if src.src_mod.is_bnot() {
229 op.ops[0].invert_src(i);
230 op.ops[1].invert_src(i);
231 src.src_mod = SrcMod::None;
232 }
233
234 if let Some(b) = src_as_bool(src) {
235 op.ops[0].fix_src(i, b);
236 op.ops[1].fix_src(i, b);
237 }
238
239 if !op.ops[0].src_used(i) && !op.ops[1].src_used(i) {
240 *src = SrcRef::True.into();
241 }
242 }
243
244 for i in 0..3 {
245 self.try_prop_to_src(&mut op.ops, &mut op.srcs, i);
246 }
247
248 for i in 0..2 {
249 if let Dst::SSA(ssa) = op.dsts[i] {
250 assert!(ssa.comps() == 1);
251 self.add_lop(ssa[0], op.ops[i], op.srcs);
252 }
253 }
254 }
255
run(&mut self, f: &mut Function)256 fn run(&mut self, f: &mut Function) {
257 for b in &mut f.blocks {
258 for instr in &mut b.instrs {
259 match &mut instr.op {
260 Op::Lop3(op) => self.opt_lop3(op),
261 Op::PLop3(op) => self.opt_plop3(op),
262 _ => (),
263 }
264 }
265 }
266 }
267 }
268
269 impl Shader<'_> {
opt_lop(&mut self)270 pub fn opt_lop(&mut self) {
271 for f in &mut self.functions {
272 let mut pass = LopPass::new(f);
273 pass.run(f);
274 }
275 }
276 }
277