xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/to_cssa.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7 
8 use compiler::cfg::CFG;
9 use std::collections::HashMap;
10 use std::iter::Peekable;
11 
12 struct MergedIter<I: Iterator> {
13     a: Peekable<I>,
14     b: Peekable<I>,
15 }
16 
17 impl<I: Iterator> MergedIter<I> {
new(a: I, b: I) -> Self18     fn new(a: I, b: I) -> Self {
19         Self {
20             a: a.peekable(),
21             b: b.peekable(),
22         }
23     }
24 }
25 
26 impl<I: Iterator> Iterator for MergedIter<I>
27 where
28     <I as Iterator>::Item: Ord,
29 {
30     type Item = <I as Iterator>::Item;
31 
next(&mut self) -> Option<<I as Iterator>::Item>32     fn next(&mut self) -> Option<<I as Iterator>::Item> {
33         if let Some(a) = self.a.peek() {
34             if let Some(b) = self.b.peek() {
35                 if a <= b {
36                     self.a.next()
37                 } else {
38                     self.b.next()
39                 }
40             } else {
41                 self.a.next()
42             }
43         } else {
44             self.b.next()
45         }
46     }
47 
size_hint(&self) -> (usize, Option<usize>)48     fn size_hint(&self) -> (usize, Option<usize>) {
49         let (a_max, a_size) = self.a.size_hint();
50         let (b_max, b_size) = self.b.size_hint();
51         (a_max + b_max, a_size.zip(b_size).map(|(a, b)| a + b))
52     }
53 }
54 
55 enum CoalesceItem {
56     SSA(SSAValue),
57     Phi(u32),
58 }
59 
60 struct CoalesceNode {
61     set: usize,
62     block: usize,
63     ip_1: usize,
64     item: CoalesceItem,
65 }
66 
67 struct CoalesceSet {
68     nodes: Vec<usize>,
69 }
70 
71 struct CoalesceGraph<'a> {
72     live: &'a SimpleLiveness,
73     nodes: Vec<CoalesceNode>,
74     sets: Vec<CoalesceSet>,
75     ssa_node: HashMap<SSAValue, usize>,
76     phi_node_file: HashMap<u32, (usize, RegFile)>,
77 }
78 
79 impl<'a> CoalesceGraph<'a> {
new(live: &'a SimpleLiveness) -> Self80     fn new(live: &'a SimpleLiveness) -> Self {
81         Self {
82             live: live,
83             nodes: Vec::new(),
84             sets: Vec::new(),
85             ssa_node: HashMap::new(),
86             phi_node_file: HashMap::new(),
87         }
88     }
89 
add_ssa(&mut self, ssa: SSAValue)90     fn add_ssa(&mut self, ssa: SSAValue) {
91         debug_assert!(self.sets.is_empty());
92 
93         // Set it to usize::MAX for now.  We'll update later
94         if self.ssa_node.insert(ssa, usize::MAX).is_none() {
95             let (block, ip) = self.live.def_block_ip(&ssa);
96             self.nodes.push(CoalesceNode {
97                 set: usize::MAX,
98                 block: block,
99                 ip_1: ip + 1,
100                 item: CoalesceItem::SSA(ssa),
101             });
102         }
103     }
104 
add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize)105     fn add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize) {
106         debug_assert!(self.sets.is_empty());
107 
108         // Record the register file now.  We'll set the node later
109         let old = self.phi_node_file.insert(phi, (usize::MAX, file));
110         debug_assert!(old.is_none());
111 
112         self.nodes.push(CoalesceNode {
113             set: usize::MAX,
114             block: block,
115             ip_1: 0,
116             item: CoalesceItem::Phi(phi),
117         });
118     }
119 
add_phi_src(&mut self, phi: u32, block: usize)120     fn add_phi_src(&mut self, phi: u32, block: usize) {
121         debug_assert!(self.sets.is_empty());
122 
123         self.nodes.push(CoalesceNode {
124             set: usize::MAX,
125             block: block,
126             ip_1: usize::MAX,
127             item: CoalesceItem::Phi(phi),
128         });
129     }
130 
init_sets<N>(&mut self, cfg: &CFG<N>)131     fn init_sets<N>(&mut self, cfg: &CFG<N>) {
132         // Sort the nodes by dom_dfs_pre_index followed by ip+1.  Stash the
133         // dom_dfs_pre_index in the set for now.  We don't actually fill out
134         // the set field until later.
135         for n in self.nodes.iter_mut() {
136             n.set = cfg.dom_dfs_pre_index(n.block);
137         }
138         self.nodes
139             .sort_by(|a, b| a.set.cmp(&b.set).then(a.ip_1.cmp(&b.ip_1)));
140 
141         for ni in 0..self.nodes.len() {
142             match &self.nodes[ni].item {
143                 CoalesceItem::SSA(ssa) => {
144                     let old = self.ssa_node.insert(*ssa, ni);
145                     debug_assert!(old == Some(usize::MAX));
146 
147                     self.nodes[ni].set = self.sets.len();
148                     self.sets.push(CoalesceSet { nodes: vec![ni] });
149                 }
150                 CoalesceItem::Phi(phi) => {
151                     let (pn, _) = self.phi_node_file.get_mut(phi).unwrap();
152 
153                     // We only want one set per phi and phi_node contains the
154                     // index to any one of the nodes.
155                     if *pn == usize::MAX {
156                         self.nodes[ni].set = self.sets.len();
157                         self.sets.push(CoalesceSet { nodes: vec![ni] });
158                         *pn = ni;
159                     } else {
160                         let s = self.nodes[*pn].set;
161                         self.nodes[ni].set = s;
162                     }
163                 }
164             }
165         }
166     }
167 
node_dominates<N>(&self, p: usize, c: usize, cfg: &CFG<N>) -> bool168     fn node_dominates<N>(&self, p: usize, c: usize, cfg: &CFG<N>) -> bool {
169         if self.nodes[p].block == self.nodes[c].block {
170             self.nodes[p].ip_1 <= self.nodes[c].ip_1
171         } else {
172             cfg.dominates(self.nodes[p].block, self.nodes[c].block)
173         }
174     }
175 
phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool176     fn phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool {
177         if phi.ip_1 == 0 {
178             self.live.block_live(phi.block).is_live_in(ssa)
179         } else {
180             debug_assert!(phi.ip_1 == usize::MAX);
181             self.live.block_live(phi.block).is_live_out(ssa)
182         }
183     }
184 
nodes_interfere(&self, a: usize, b: usize) -> bool185     fn nodes_interfere(&self, a: usize, b: usize) -> bool {
186         let a = &self.nodes[a];
187         let b = &self.nodes[b];
188 
189         match &a.item {
190             CoalesceItem::SSA(a_ssa) => match &b.item {
191                 CoalesceItem::SSA(b_ssa) => self.live.interferes(a_ssa, b_ssa),
192                 CoalesceItem::Phi(_) => self.phi_ssa_interferes(b, a_ssa),
193             },
194             CoalesceItem::Phi(_) => match &b.item {
195                 CoalesceItem::SSA(b_ssa) => self.phi_ssa_interferes(a, b_ssa),
196                 CoalesceItem::Phi(_) => {
197                     // Phi nodes represent the temporary SSA value made between
198                     // the parallel copy and the phi in the Boissinot algorithm
199                     // so they interfere if and only if they're in the same
200                     // block and both at the start or both at the end.
201                     a.block == b.block && a.ip_1 == b.ip_1
202                 }
203             },
204         }
205     }
206 
sets_interfere<N>(&self, a: usize, b: usize, cfg: &CFG<N>) -> bool207     pub fn sets_interfere<N>(&self, a: usize, b: usize, cfg: &CFG<N>) -> bool {
208         let a = &self.sets[a];
209         let b = &self.sets[b];
210 
211         // Stack of nodes which dominate the current node
212         let mut dom = Vec::new();
213 
214         for n in MergedIter::new(a.nodes.iter(), b.nodes.iter()) {
215             while let Some(p) = dom.last() {
216                 if !self.node_dominates(*p, *n, cfg) {
217                     dom.pop();
218                 } else {
219                     break;
220                 }
221             }
222 
223             if let Some(p) = dom.last() {
224                 if self.nodes_interfere(*n, *p) {
225                     return true;
226                 }
227             }
228 
229             dom.push(*n);
230         }
231 
232         false
233     }
234 
sets_merge(&mut self, a: usize, b: usize) -> usize235     pub fn sets_merge(&mut self, a: usize, b: usize) -> usize {
236         let a_nodes = std::mem::take(&mut self.sets[a].nodes);
237         let b_nodes = std::mem::take(&mut self.sets[b].nodes);
238         let nodes = MergedIter::new(a_nodes.into_iter(), b_nodes.into_iter());
239 
240         self.sets[a].nodes = nodes
241             .map(|n| {
242                 self.nodes[n].set = a;
243                 n
244             })
245             .collect();
246 
247         a
248     }
249 
ssa_set(&self, ssa: &SSAValue) -> usize250     pub fn ssa_set(&self, ssa: &SSAValue) -> usize {
251         self.nodes[*self.ssa_node.get(ssa).unwrap()].set
252     }
253 
phi_set_file(&self, phi: &u32) -> (usize, RegFile)254     pub fn phi_set_file(&self, phi: &u32) -> (usize, RegFile) {
255         let (n, file) = self.phi_node_file.get(phi).unwrap();
256         (self.nodes[*n].set, *file)
257     }
258 }
259 
260 impl Function {
261     /// Convert a function to CSSA (Conventional SSA) form
262     ///
263     /// In "Translating Out of Static Single Assignment Form" by Sreedhar, et.
264     /// al., they define CSSA form via what they call the Phi Congruence
265     /// Property:
266     ///
267     /// > The occurrences of all resources which belong to the same phi
268     /// > congruence class in a program can be replaced by a representative
269     /// > resource. After the replacement, the phi instruction can be
270     /// > eliminated without violating the semantics of the original program.
271     ///
272     /// A more compiler-theoretic definition of CSSA form is a version of SSA
273     /// form in which, for each phi, none of the SSA values involved in the phi
274     /// (either as a source or destination) interfere.  While most of the papers
275     /// discussing CSSA form do so in the context of out-of-SSA, this property
276     /// is also useful for SSA-based spilling and register allocation.
277     ///
278     /// Our implementation is based on the algorithm described in "Revisiting
279     /// Out-of-SSA Translation for Correctness, Code Quality, and Effciency" by
280     /// Boissinot et. al.  The primary difference between this algorithm and
281     /// the one in that paper is that we don't actually insert parallel copies
282     /// and remove redundant entries.  Instead, we treat OpPhiSrcs and OpPhiDsts
283     /// as as the parallel copies with the phi index standing in for all of the
284     /// SSA values used directly by the phi.  Then, instead of removing copies
285     /// where the source and destination don't interfere, we insert copies
286     /// whenever the source or destination and phi index do interfere.  This
287     /// lets us avoid inserting pointless instructions.
to_cssa(&mut self)288     pub fn to_cssa(&mut self) {
289         let live = SimpleLiveness::for_function(self);
290 
291         let mut cg = CoalesceGraph::new(&live);
292         for (bi, b) in self.blocks.iter().enumerate() {
293             if let Some(phi) = b.phi_dsts() {
294                 for (idx, dst) in phi.dsts.iter() {
295                     let vec = dst.as_ssa().unwrap();
296                     debug_assert!(vec.comps() == 1);
297                     cg.add_ssa(vec[0]);
298                     cg.add_phi_dst(*idx, vec[0].file(), bi);
299                 }
300             }
301 
302             if let Some(phi) = b.phi_srcs() {
303                 for (idx, src) in phi.srcs.iter() {
304                     if let SrcRef::SSA(vec) = src.src_ref {
305                         debug_assert!(vec.comps() == 1);
306                         cg.add_ssa(vec[0]);
307                     }
308                     cg.add_phi_src(*idx, bi);
309                 }
310             }
311         }
312         cg.init_sets(&self.blocks);
313 
314         for bi in 0..self.blocks.len() {
315             let block_instrs = std::mem::take(&mut self.blocks[bi].instrs);
316 
317             let mut instrs = Vec::new();
318             for mut instr in block_instrs.into_iter() {
319                 match &mut instr.op {
320                     Op::PhiDsts(phi) => {
321                         let mut pcopy = OpParCopy::new();
322                         for (idx, dst) in phi.dsts.iter_mut() {
323                             let (ps, file) = cg.phi_set_file(idx);
324 
325                             let vec = dst.as_ssa().unwrap();
326                             debug_assert!(vec.comps() == 1);
327                             debug_assert!(vec[0].file() == file);
328                             let ds = cg.ssa_set(&vec[0]);
329 
330                             if !cg.sets_interfere(ps, ds, &self.blocks) {
331                                 cg.sets_merge(ps, ds);
332                                 continue;
333                             }
334 
335                             let tmp = self.ssa_alloc.alloc(file);
336                             pcopy.push(*dst, tmp.into());
337                             *dst = tmp.into();
338                         }
339 
340                         instrs.push(instr);
341                         if !pcopy.is_empty() {
342                             if DEBUG.annotate() {
343                                 instrs.push(Instr::new_boxed(OpAnnotate {
344                                     annotation: "generated by to_cssa".into(),
345                                 }));
346                             }
347                             instrs.push(Instr::new_boxed(pcopy));
348                         }
349                     }
350                     Op::PhiSrcs(phi) => {
351                         let mut pcopy = OpParCopy::new();
352                         for (idx, src) in phi.srcs.iter_mut() {
353                             let (ps, file) = cg.phi_set_file(idx);
354 
355                             debug_assert!(src.src_mod.is_none());
356                             if let SrcRef::SSA(vec) = &src.src_ref {
357                                 debug_assert!(vec.comps() == 1);
358                                 if vec[0].file() == file {
359                                     let ss = cg.ssa_set(&vec[0]);
360                                     if cg.sets_interfere(ps, ss, &self.blocks) {
361                                         let tmp = self.ssa_alloc.alloc(file);
362                                         pcopy.push(tmp.into(), *src);
363                                         *src = tmp.into();
364                                     } else {
365                                         cg.sets_merge(ps, ss);
366                                     }
367                                     continue;
368                                 }
369                             }
370 
371                             // Non-SSA sources or sources with a different file
372                             // from the destination get an actual copy
373                             // instruction and are not considered part of the
374                             // parallel copy.
375                             let tmp = self.ssa_alloc.alloc(file);
376                             if DEBUG.annotate() {
377                                 instrs.push(Instr::new_boxed(OpAnnotate {
378                                     annotation: "generated by to_cssa".into(),
379                                 }));
380                             }
381                             instrs.push(Instr::new_boxed(OpCopy {
382                                 dst: tmp.into(),
383                                 src: *src,
384                             }));
385                             *src = tmp.into();
386                         }
387 
388                         if !pcopy.is_empty() {
389                             if DEBUG.annotate() {
390                                 instrs.push(Instr::new_boxed(OpAnnotate {
391                                     annotation: "generated by to_cssa".into(),
392                                 }));
393                             }
394                             instrs.push(Instr::new_boxed(pcopy));
395                         }
396                         instrs.push(instr);
397                     }
398                     _ => instrs.push(instr),
399                 }
400             }
401             self.blocks[bi].instrs = instrs;
402         }
403     }
404 }
405