1 // Copyright © 2023 Collabora, Ltd. 2 // SPDX-License-Identifier: MIT 3 4 use crate::api::{GetDebugFlags, DEBUG}; 5 use crate::ir::*; 6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness}; 7 8 use compiler::cfg::CFG; 9 use std::collections::HashMap; 10 use std::iter::Peekable; 11 12 struct MergedIter<I: Iterator> { 13 a: Peekable<I>, 14 b: Peekable<I>, 15 } 16 17 impl<I: Iterator> MergedIter<I> { new(a: I, b: I) -> Self18 fn new(a: I, b: I) -> Self { 19 Self { 20 a: a.peekable(), 21 b: b.peekable(), 22 } 23 } 24 } 25 26 impl<I: Iterator> Iterator for MergedIter<I> 27 where 28 <I as Iterator>::Item: Ord, 29 { 30 type Item = <I as Iterator>::Item; 31 next(&mut self) -> Option<<I as Iterator>::Item>32 fn next(&mut self) -> Option<<I as Iterator>::Item> { 33 if let Some(a) = self.a.peek() { 34 if let Some(b) = self.b.peek() { 35 if a <= b { 36 self.a.next() 37 } else { 38 self.b.next() 39 } 40 } else { 41 self.a.next() 42 } 43 } else { 44 self.b.next() 45 } 46 } 47 size_hint(&self) -> (usize, Option<usize>)48 fn size_hint(&self) -> (usize, Option<usize>) { 49 let (a_max, a_size) = self.a.size_hint(); 50 let (b_max, b_size) = self.b.size_hint(); 51 (a_max + b_max, a_size.zip(b_size).map(|(a, b)| a + b)) 52 } 53 } 54 55 enum CoalesceItem { 56 SSA(SSAValue), 57 Phi(u32), 58 } 59 60 struct CoalesceNode { 61 set: usize, 62 block: usize, 63 ip_1: usize, 64 item: CoalesceItem, 65 } 66 67 struct CoalesceSet { 68 nodes: Vec<usize>, 69 } 70 71 struct CoalesceGraph<'a> { 72 live: &'a SimpleLiveness, 73 nodes: Vec<CoalesceNode>, 74 sets: Vec<CoalesceSet>, 75 ssa_node: HashMap<SSAValue, usize>, 76 phi_node_file: HashMap<u32, (usize, RegFile)>, 77 } 78 79 impl<'a> CoalesceGraph<'a> { new(live: &'a SimpleLiveness) -> Self80 fn new(live: &'a SimpleLiveness) -> Self { 81 Self { 82 live: live, 83 nodes: Vec::new(), 84 sets: Vec::new(), 85 ssa_node: HashMap::new(), 86 phi_node_file: HashMap::new(), 87 } 88 } 89 add_ssa(&mut self, ssa: SSAValue)90 fn add_ssa(&mut self, ssa: SSAValue) { 91 debug_assert!(self.sets.is_empty()); 92 93 // Set it to usize::MAX for now. We'll update later 94 if self.ssa_node.insert(ssa, usize::MAX).is_none() { 95 let (block, ip) = self.live.def_block_ip(&ssa); 96 self.nodes.push(CoalesceNode { 97 set: usize::MAX, 98 block: block, 99 ip_1: ip + 1, 100 item: CoalesceItem::SSA(ssa), 101 }); 102 } 103 } 104 add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize)105 fn add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize) { 106 debug_assert!(self.sets.is_empty()); 107 108 // Record the register file now. We'll set the node later 109 let old = self.phi_node_file.insert(phi, (usize::MAX, file)); 110 debug_assert!(old.is_none()); 111 112 self.nodes.push(CoalesceNode { 113 set: usize::MAX, 114 block: block, 115 ip_1: 0, 116 item: CoalesceItem::Phi(phi), 117 }); 118 } 119 add_phi_src(&mut self, phi: u32, block: usize)120 fn add_phi_src(&mut self, phi: u32, block: usize) { 121 debug_assert!(self.sets.is_empty()); 122 123 self.nodes.push(CoalesceNode { 124 set: usize::MAX, 125 block: block, 126 ip_1: usize::MAX, 127 item: CoalesceItem::Phi(phi), 128 }); 129 } 130 init_sets<N>(&mut self, cfg: &CFG<N>)131 fn init_sets<N>(&mut self, cfg: &CFG<N>) { 132 // Sort the nodes by dom_dfs_pre_index followed by ip+1. Stash the 133 // dom_dfs_pre_index in the set for now. We don't actually fill out 134 // the set field until later. 135 for n in self.nodes.iter_mut() { 136 n.set = cfg.dom_dfs_pre_index(n.block); 137 } 138 self.nodes 139 .sort_by(|a, b| a.set.cmp(&b.set).then(a.ip_1.cmp(&b.ip_1))); 140 141 for ni in 0..self.nodes.len() { 142 match &self.nodes[ni].item { 143 CoalesceItem::SSA(ssa) => { 144 let old = self.ssa_node.insert(*ssa, ni); 145 debug_assert!(old == Some(usize::MAX)); 146 147 self.nodes[ni].set = self.sets.len(); 148 self.sets.push(CoalesceSet { nodes: vec![ni] }); 149 } 150 CoalesceItem::Phi(phi) => { 151 let (pn, _) = self.phi_node_file.get_mut(phi).unwrap(); 152 153 // We only want one set per phi and phi_node contains the 154 // index to any one of the nodes. 155 if *pn == usize::MAX { 156 self.nodes[ni].set = self.sets.len(); 157 self.sets.push(CoalesceSet { nodes: vec![ni] }); 158 *pn = ni; 159 } else { 160 let s = self.nodes[*pn].set; 161 self.nodes[ni].set = s; 162 } 163 } 164 } 165 } 166 } 167 node_dominates<N>(&self, p: usize, c: usize, cfg: &CFG<N>) -> bool168 fn node_dominates<N>(&self, p: usize, c: usize, cfg: &CFG<N>) -> bool { 169 if self.nodes[p].block == self.nodes[c].block { 170 self.nodes[p].ip_1 <= self.nodes[c].ip_1 171 } else { 172 cfg.dominates(self.nodes[p].block, self.nodes[c].block) 173 } 174 } 175 phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool176 fn phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool { 177 if phi.ip_1 == 0 { 178 self.live.block_live(phi.block).is_live_in(ssa) 179 } else { 180 debug_assert!(phi.ip_1 == usize::MAX); 181 self.live.block_live(phi.block).is_live_out(ssa) 182 } 183 } 184 nodes_interfere(&self, a: usize, b: usize) -> bool185 fn nodes_interfere(&self, a: usize, b: usize) -> bool { 186 let a = &self.nodes[a]; 187 let b = &self.nodes[b]; 188 189 match &a.item { 190 CoalesceItem::SSA(a_ssa) => match &b.item { 191 CoalesceItem::SSA(b_ssa) => self.live.interferes(a_ssa, b_ssa), 192 CoalesceItem::Phi(_) => self.phi_ssa_interferes(b, a_ssa), 193 }, 194 CoalesceItem::Phi(_) => match &b.item { 195 CoalesceItem::SSA(b_ssa) => self.phi_ssa_interferes(a, b_ssa), 196 CoalesceItem::Phi(_) => { 197 // Phi nodes represent the temporary SSA value made between 198 // the parallel copy and the phi in the Boissinot algorithm 199 // so they interfere if and only if they're in the same 200 // block and both at the start or both at the end. 201 a.block == b.block && a.ip_1 == b.ip_1 202 } 203 }, 204 } 205 } 206 sets_interfere<N>(&self, a: usize, b: usize, cfg: &CFG<N>) -> bool207 pub fn sets_interfere<N>(&self, a: usize, b: usize, cfg: &CFG<N>) -> bool { 208 let a = &self.sets[a]; 209 let b = &self.sets[b]; 210 211 // Stack of nodes which dominate the current node 212 let mut dom = Vec::new(); 213 214 for n in MergedIter::new(a.nodes.iter(), b.nodes.iter()) { 215 while let Some(p) = dom.last() { 216 if !self.node_dominates(*p, *n, cfg) { 217 dom.pop(); 218 } else { 219 break; 220 } 221 } 222 223 if let Some(p) = dom.last() { 224 if self.nodes_interfere(*n, *p) { 225 return true; 226 } 227 } 228 229 dom.push(*n); 230 } 231 232 false 233 } 234 sets_merge(&mut self, a: usize, b: usize) -> usize235 pub fn sets_merge(&mut self, a: usize, b: usize) -> usize { 236 let a_nodes = std::mem::take(&mut self.sets[a].nodes); 237 let b_nodes = std::mem::take(&mut self.sets[b].nodes); 238 let nodes = MergedIter::new(a_nodes.into_iter(), b_nodes.into_iter()); 239 240 self.sets[a].nodes = nodes 241 .map(|n| { 242 self.nodes[n].set = a; 243 n 244 }) 245 .collect(); 246 247 a 248 } 249 ssa_set(&self, ssa: &SSAValue) -> usize250 pub fn ssa_set(&self, ssa: &SSAValue) -> usize { 251 self.nodes[*self.ssa_node.get(ssa).unwrap()].set 252 } 253 phi_set_file(&self, phi: &u32) -> (usize, RegFile)254 pub fn phi_set_file(&self, phi: &u32) -> (usize, RegFile) { 255 let (n, file) = self.phi_node_file.get(phi).unwrap(); 256 (self.nodes[*n].set, *file) 257 } 258 } 259 260 impl Function { 261 /// Convert a function to CSSA (Conventional SSA) form 262 /// 263 /// In "Translating Out of Static Single Assignment Form" by Sreedhar, et. 264 /// al., they define CSSA form via what they call the Phi Congruence 265 /// Property: 266 /// 267 /// > The occurrences of all resources which belong to the same phi 268 /// > congruence class in a program can be replaced by a representative 269 /// > resource. After the replacement, the phi instruction can be 270 /// > eliminated without violating the semantics of the original program. 271 /// 272 /// A more compiler-theoretic definition of CSSA form is a version of SSA 273 /// form in which, for each phi, none of the SSA values involved in the phi 274 /// (either as a source or destination) interfere. While most of the papers 275 /// discussing CSSA form do so in the context of out-of-SSA, this property 276 /// is also useful for SSA-based spilling and register allocation. 277 /// 278 /// Our implementation is based on the algorithm described in "Revisiting 279 /// Out-of-SSA Translation for Correctness, Code Quality, and Effciency" by 280 /// Boissinot et. al. The primary difference between this algorithm and 281 /// the one in that paper is that we don't actually insert parallel copies 282 /// and remove redundant entries. Instead, we treat OpPhiSrcs and OpPhiDsts 283 /// as as the parallel copies with the phi index standing in for all of the 284 /// SSA values used directly by the phi. Then, instead of removing copies 285 /// where the source and destination don't interfere, we insert copies 286 /// whenever the source or destination and phi index do interfere. This 287 /// lets us avoid inserting pointless instructions. to_cssa(&mut self)288 pub fn to_cssa(&mut self) { 289 let live = SimpleLiveness::for_function(self); 290 291 let mut cg = CoalesceGraph::new(&live); 292 for (bi, b) in self.blocks.iter().enumerate() { 293 if let Some(phi) = b.phi_dsts() { 294 for (idx, dst) in phi.dsts.iter() { 295 let vec = dst.as_ssa().unwrap(); 296 debug_assert!(vec.comps() == 1); 297 cg.add_ssa(vec[0]); 298 cg.add_phi_dst(*idx, vec[0].file(), bi); 299 } 300 } 301 302 if let Some(phi) = b.phi_srcs() { 303 for (idx, src) in phi.srcs.iter() { 304 if let SrcRef::SSA(vec) = src.src_ref { 305 debug_assert!(vec.comps() == 1); 306 cg.add_ssa(vec[0]); 307 } 308 cg.add_phi_src(*idx, bi); 309 } 310 } 311 } 312 cg.init_sets(&self.blocks); 313 314 for bi in 0..self.blocks.len() { 315 let block_instrs = std::mem::take(&mut self.blocks[bi].instrs); 316 317 let mut instrs = Vec::new(); 318 for mut instr in block_instrs.into_iter() { 319 match &mut instr.op { 320 Op::PhiDsts(phi) => { 321 let mut pcopy = OpParCopy::new(); 322 for (idx, dst) in phi.dsts.iter_mut() { 323 let (ps, file) = cg.phi_set_file(idx); 324 325 let vec = dst.as_ssa().unwrap(); 326 debug_assert!(vec.comps() == 1); 327 debug_assert!(vec[0].file() == file); 328 let ds = cg.ssa_set(&vec[0]); 329 330 if !cg.sets_interfere(ps, ds, &self.blocks) { 331 cg.sets_merge(ps, ds); 332 continue; 333 } 334 335 let tmp = self.ssa_alloc.alloc(file); 336 pcopy.push(*dst, tmp.into()); 337 *dst = tmp.into(); 338 } 339 340 instrs.push(instr); 341 if !pcopy.is_empty() { 342 if DEBUG.annotate() { 343 instrs.push(Instr::new_boxed(OpAnnotate { 344 annotation: "generated by to_cssa".into(), 345 })); 346 } 347 instrs.push(Instr::new_boxed(pcopy)); 348 } 349 } 350 Op::PhiSrcs(phi) => { 351 let mut pcopy = OpParCopy::new(); 352 for (idx, src) in phi.srcs.iter_mut() { 353 let (ps, file) = cg.phi_set_file(idx); 354 355 debug_assert!(src.src_mod.is_none()); 356 if let SrcRef::SSA(vec) = &src.src_ref { 357 debug_assert!(vec.comps() == 1); 358 if vec[0].file() == file { 359 let ss = cg.ssa_set(&vec[0]); 360 if cg.sets_interfere(ps, ss, &self.blocks) { 361 let tmp = self.ssa_alloc.alloc(file); 362 pcopy.push(tmp.into(), *src); 363 *src = tmp.into(); 364 } else { 365 cg.sets_merge(ps, ss); 366 } 367 continue; 368 } 369 } 370 371 // Non-SSA sources or sources with a different file 372 // from the destination get an actual copy 373 // instruction and are not considered part of the 374 // parallel copy. 375 let tmp = self.ssa_alloc.alloc(file); 376 if DEBUG.annotate() { 377 instrs.push(Instr::new_boxed(OpAnnotate { 378 annotation: "generated by to_cssa".into(), 379 })); 380 } 381 instrs.push(Instr::new_boxed(OpCopy { 382 dst: tmp.into(), 383 src: *src, 384 })); 385 *src = tmp.into(); 386 } 387 388 if !pcopy.is_empty() { 389 if DEBUG.annotate() { 390 instrs.push(Instr::new_boxed(OpAnnotate { 391 annotation: "generated by to_cssa".into(), 392 })); 393 } 394 instrs.push(Instr::new_boxed(pcopy)); 395 } 396 instrs.push(instr); 397 } 398 _ => instrs.push(instr), 399 } 400 } 401 self.blocks[bi].instrs = instrs; 402 } 403 } 404 } 405