xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/repair_ssa.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::ir::*;
5 use crate::union_find::UnionFind;
6 
7 use compiler::bitset::BitSet;
8 use std::cell::RefCell;
9 use std::collections::HashMap;
10 
11 struct Phi {
12     idx: u32,
13     orig: SSAValue,
14     dst: SSAValue,
15     srcs: HashMap<usize, SSAValue>,
16 }
17 
18 struct DefTrackerBlock {
19     pred: Vec<usize>,
20     succ: Vec<usize>,
21     defs: RefCell<HashMap<SSAValue, SSAValue>>,
22     phis: RefCell<Vec<Phi>>,
23 }
24 
get_ssa_or_phi( ssa_alloc: &mut SSAValueAllocator, phi_alloc: &mut PhiAllocator, blocks: &[DefTrackerBlock], needs_src: &mut BitSet, b_idx: usize, ssa: SSAValue, ) -> SSAValue25 fn get_ssa_or_phi(
26     ssa_alloc: &mut SSAValueAllocator,
27     phi_alloc: &mut PhiAllocator,
28     blocks: &[DefTrackerBlock],
29     needs_src: &mut BitSet,
30     b_idx: usize,
31     ssa: SSAValue,
32 ) -> SSAValue {
33     let b = &blocks[b_idx];
34     let mut b_defs = b.defs.borrow_mut();
35     if let Some(ssa) = b_defs.get(&ssa) {
36         return *ssa;
37     }
38 
39     let mut pred_ssa = None;
40     let mut all_same = true;
41     for p_idx in &b.pred {
42         if *p_idx >= b_idx {
43             // This is a loop back-edge, add a phi just in case.  We'll remove
44             // it later if it's not needed
45             all_same = false;
46         } else {
47             let p_ssa = get_ssa_or_phi(
48                 ssa_alloc, phi_alloc, blocks, needs_src, *p_idx, ssa,
49             );
50             if *pred_ssa.get_or_insert(p_ssa) != p_ssa {
51                 all_same = false;
52             }
53         }
54     }
55 
56     if all_same {
57         let pred_ssa = pred_ssa.expect("Undefined value");
58         b_defs.insert(ssa, pred_ssa);
59         pred_ssa
60     } else {
61         let phi_idx = phi_alloc.alloc();
62         let phi_ssa = ssa_alloc.alloc(ssa.file());
63         let mut phi = Phi {
64             idx: phi_idx,
65             orig: ssa,
66             dst: phi_ssa,
67             srcs: HashMap::new(),
68         };
69         for p_idx in &b.pred {
70             if *p_idx >= b_idx {
71                 needs_src.insert(*p_idx);
72                 continue;
73             }
74             // The earlier recursive call ensured this exists
75             let p_ssa = *blocks[*p_idx].defs.borrow().get(&ssa).unwrap();
76             phi.srcs.insert(*p_idx, p_ssa);
77         }
78         b.phis.borrow_mut().push(phi);
79         b_defs.insert(ssa, phi_ssa);
80         phi_ssa
81     }
82 }
83 
get_or_insert_phi_dsts(bb: &mut BasicBlock) -> &mut OpPhiDsts84 fn get_or_insert_phi_dsts(bb: &mut BasicBlock) -> &mut OpPhiDsts {
85     let ip = if let Some(ip) = bb.phi_dsts_ip() {
86         ip
87     } else {
88         bb.instrs.insert(0, Instr::new_boxed(OpPhiDsts::new()));
89         0
90     };
91     match &mut bb.instrs[ip].op {
92         Op::PhiDsts(phi) => phi,
93         _ => panic!("Expected to find the phi we just inserted"),
94     }
95 }
96 
get_or_insert_phi_srcs(bb: &mut BasicBlock) -> &mut OpPhiSrcs97 fn get_or_insert_phi_srcs(bb: &mut BasicBlock) -> &mut OpPhiSrcs {
98     let ip = if let Some(ip) = bb.phi_srcs_ip() {
99         ip
100     } else if let Some(ip) = bb.branch_ip() {
101         bb.instrs.insert(ip, Instr::new_boxed(OpPhiSrcs::new()));
102         ip
103     } else {
104         bb.instrs.push(Instr::new_boxed(OpPhiSrcs::new()));
105         bb.instrs.len() - 1
106     };
107     match &mut bb.instrs[ip].op {
108         Op::PhiSrcs(phi) => phi,
109         _ => panic!("Expected to find the phi we just inserted"),
110     }
111 }
112 
113 impl Function {
114     /// Repairs SSA form
115     ///
116     /// Certain passes such as register spilling may produce a program that is
117     /// no longer in SSA form.  This pass is able to repair SSA by inserting
118     /// phis as needed.  Even though we do not require dominance or that each
119     /// value be defined once we do require that, for every use of an SSAValue
120     /// and for every path from the start of the program to that use, there must
121     /// be some definition of the value along that path.
122     ///
123     /// The algorithm implemented here is based on the one in "Simple and
124     /// Efficient Construction of Static Single Assignment Form" by Braun, et.
125     /// al.  The primary difference between our implementation and the paper is
126     /// that we can't rewrite the IR on-the-fly.  Instead, we store everything
127     /// in hash tables and handle removing redundant phis with back-edges as a
128     /// separate pass between figuring out where phis are needed and actually
129     /// constructing the phi instructions.
repair_ssa(&mut self)130     pub fn repair_ssa(&mut self) {
131         // First, count the number of defs for each SSA value.  This will allow
132         // us to skip any SSA values which only have a single definition in
133         // later passes.
134         let mut has_mult_defs = false;
135         let mut num_defs = HashMap::new();
136         for b in &self.blocks {
137             for instr in &b.instrs {
138                 instr.for_each_ssa_def(|ssa| {
139                     num_defs
140                         .entry(*ssa)
141                         .and_modify(|e| {
142                             has_mult_defs = true;
143                             *e += 1;
144                         })
145                         .or_insert(1);
146                 });
147             }
148         }
149 
150         if !has_mult_defs {
151             return;
152         }
153 
154         let cfg = &mut self.blocks;
155         let ssa_alloc = &mut self.ssa_alloc;
156         let phi_alloc = &mut self.phi_alloc;
157 
158         let mut blocks = Vec::new();
159         let mut needs_src = BitSet::new();
160         for b_idx in 0..cfg.len() {
161             assert!(blocks.len() == b_idx);
162             blocks.push(DefTrackerBlock {
163                 pred: cfg.pred_indices(b_idx).to_vec(),
164                 succ: cfg.succ_indices(b_idx).to_vec(),
165                 defs: RefCell::new(HashMap::new()),
166                 phis: RefCell::new(Vec::new()),
167             });
168 
169             for instr in &mut cfg[b_idx].instrs {
170                 instr.for_each_ssa_use_mut(|ssa| {
171                     if num_defs.get(ssa).cloned().unwrap_or(0) > 1 {
172                         *ssa = get_ssa_or_phi(
173                             ssa_alloc,
174                             phi_alloc,
175                             &blocks,
176                             &mut needs_src,
177                             b_idx,
178                             *ssa,
179                         );
180                     }
181                 });
182 
183                 instr.for_each_ssa_def_mut(|ssa| {
184                     if num_defs.get(ssa).cloned().unwrap_or(0) > 1 {
185                         let new_ssa = ssa_alloc.alloc(ssa.file());
186                         blocks[b_idx].defs.borrow_mut().insert(*ssa, new_ssa);
187                         *ssa = new_ssa;
188                     }
189                 });
190             }
191         }
192 
193         // Populate phi sources for any back-edges
194         loop {
195             let Some(b_idx) = needs_src.iter().next() else {
196                 break;
197             };
198             needs_src.remove(b_idx);
199 
200             for s_idx in &blocks[b_idx].succ {
201                 if *s_idx <= b_idx {
202                     let s = &blocks[*s_idx];
203 
204                     // We do a mutable borrow here.  The algorithm is recursive
205                     // and may insert phis into other blocks.  However, because
206                     // this is phi exists, its destination should be in the def
207                     // set for s and so no new phis should need to be added.
208                     // RefCell's dynamic borrow checks will assert this.
209                     for phi in s.phis.borrow_mut().iter_mut() {
210                         phi.srcs.entry(b_idx).or_insert_with(|| {
211                             get_ssa_or_phi(
212                                 ssa_alloc,
213                                 phi_alloc,
214                                 &blocks,
215                                 &mut needs_src,
216                                 b_idx,
217                                 phi.orig,
218                             )
219                         });
220                     }
221                 }
222             }
223         }
224 
225         // For loop back-edges, we inserted a phi whether we need one or not.
226         // We want to eliminate any redundant phis.
227         let mut ssa_map = UnionFind::new();
228         if cfg.has_loop() {
229             let mut to_do = true;
230             while to_do {
231                 to_do = false;
232                 for b_idx in 0..cfg.len() {
233                     let b = &blocks[b_idx];
234                     b.phis.borrow_mut().retain_mut(|phi| {
235                         let mut ssa = None;
236                         for (_, p_ssa) in phi.srcs.iter_mut() {
237                             // Apply the remap to the phi sources so that we
238                             // pick up any remaps from previous loop iterations.
239                             *p_ssa = ssa_map.find(*p_ssa);
240 
241                             if *p_ssa == phi.dst {
242                                 continue;
243                             }
244                             if *ssa.get_or_insert(*p_ssa) != *p_ssa {
245                                 // Multiple unique sources
246                                 return true;
247                             }
248                         }
249 
250                         // All sources are identical or the phi destination so
251                         // we can delete this phi and add it to the remap
252                         let ssa = ssa.expect("Circular SSA def");
253                         // union(a, b) ensures that the representative is the representative
254                         // for a.  This means union(ssa, phi.dst) ensures that phi.dst gets
255                         // mapped to ssa, not the other way around.
256                         ssa_map.union(ssa, phi.dst);
257                         to_do = true;
258                         false
259                     });
260                 }
261             }
262         }
263 
264         // Now we apply the remap to instruction sources and place the actual
265         // phis
266         for b_idx in 0..cfg.len() {
267             // Grab the successor index for inserting OpPhiSrc before we take a
268             // mutable reference to the CFG.  There are no critical edges so we
269             // can only have an OpPhiSrc if there is a single successor.
270             let succ = cfg.succ_indices(b_idx);
271             let s_idx = if succ.len() == 1 {
272                 Some(succ[0])
273             } else {
274                 for s_idx in succ {
275                     debug_assert!(blocks[*s_idx].phis.borrow().is_empty());
276                 }
277                 None
278             };
279 
280             let bb = &mut cfg[b_idx];
281 
282             // First we have phi destinations
283             let b_phis = blocks[b_idx].phis.borrow();
284             if !b_phis.is_empty() {
285                 let phi_dst = get_or_insert_phi_dsts(bb);
286                 for phi in b_phis.iter() {
287                     phi_dst.dsts.push(phi.idx, phi.dst.into());
288                 }
289             }
290 
291             // Fix up any remapped SSA values in sources
292             if !ssa_map.is_empty() {
293                 for instr in &mut bb.instrs {
294                     instr.for_each_ssa_use_mut(|ssa| {
295                         *ssa = ssa_map.find(*ssa);
296                     });
297                 }
298             }
299 
300             if let Some(s_idx) = s_idx {
301                 let s_phis = blocks[s_idx].phis.borrow();
302                 if !s_phis.is_empty() {
303                     let phi_src = get_or_insert_phi_srcs(bb);
304                     for phi in s_phis.iter() {
305                         let mut ssa = *phi.srcs.get(&b_idx).unwrap();
306                         ssa = ssa_map.find(ssa);
307                         phi_src.srcs.push(phi.idx, ssa.into());
308                     }
309                 }
310             }
311         }
312     }
313 }
314