// Copyright © 2023 Collabora, Ltd. // SPDX-License-Identifier: MIT use crate::api::{GetDebugFlags, DEBUG}; use crate::ir::*; use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness}; use compiler::cfg::CFG; use std::collections::HashMap; use std::iter::Peekable; struct MergedIter { a: Peekable, b: Peekable, } impl MergedIter { fn new(a: I, b: I) -> Self { Self { a: a.peekable(), b: b.peekable(), } } } impl Iterator for MergedIter where ::Item: Ord, { type Item = ::Item; fn next(&mut self) -> Option<::Item> { if let Some(a) = self.a.peek() { if let Some(b) = self.b.peek() { if a <= b { self.a.next() } else { self.b.next() } } else { self.a.next() } } else { self.b.next() } } fn size_hint(&self) -> (usize, Option) { let (a_max, a_size) = self.a.size_hint(); let (b_max, b_size) = self.b.size_hint(); (a_max + b_max, a_size.zip(b_size).map(|(a, b)| a + b)) } } enum CoalesceItem { SSA(SSAValue), Phi(u32), } struct CoalesceNode { set: usize, block: usize, ip_1: usize, item: CoalesceItem, } struct CoalesceSet { nodes: Vec, } struct CoalesceGraph<'a> { live: &'a SimpleLiveness, nodes: Vec, sets: Vec, ssa_node: HashMap, phi_node_file: HashMap, } impl<'a> CoalesceGraph<'a> { fn new(live: &'a SimpleLiveness) -> Self { Self { live: live, nodes: Vec::new(), sets: Vec::new(), ssa_node: HashMap::new(), phi_node_file: HashMap::new(), } } fn add_ssa(&mut self, ssa: SSAValue) { debug_assert!(self.sets.is_empty()); // Set it to usize::MAX for now. We'll update later if self.ssa_node.insert(ssa, usize::MAX).is_none() { let (block, ip) = self.live.def_block_ip(&ssa); self.nodes.push(CoalesceNode { set: usize::MAX, block: block, ip_1: ip + 1, item: CoalesceItem::SSA(ssa), }); } } fn add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize) { debug_assert!(self.sets.is_empty()); // Record the register file now. We'll set the node later let old = self.phi_node_file.insert(phi, (usize::MAX, file)); debug_assert!(old.is_none()); self.nodes.push(CoalesceNode { set: usize::MAX, block: block, ip_1: 0, item: CoalesceItem::Phi(phi), }); } fn add_phi_src(&mut self, phi: u32, block: usize) { debug_assert!(self.sets.is_empty()); self.nodes.push(CoalesceNode { set: usize::MAX, block: block, ip_1: usize::MAX, item: CoalesceItem::Phi(phi), }); } fn init_sets(&mut self, cfg: &CFG) { // Sort the nodes by dom_dfs_pre_index followed by ip+1. Stash the // dom_dfs_pre_index in the set for now. We don't actually fill out // the set field until later. for n in self.nodes.iter_mut() { n.set = cfg.dom_dfs_pre_index(n.block); } self.nodes .sort_by(|a, b| a.set.cmp(&b.set).then(a.ip_1.cmp(&b.ip_1))); for ni in 0..self.nodes.len() { match &self.nodes[ni].item { CoalesceItem::SSA(ssa) => { let old = self.ssa_node.insert(*ssa, ni); debug_assert!(old == Some(usize::MAX)); self.nodes[ni].set = self.sets.len(); self.sets.push(CoalesceSet { nodes: vec![ni] }); } CoalesceItem::Phi(phi) => { let (pn, _) = self.phi_node_file.get_mut(phi).unwrap(); // We only want one set per phi and phi_node contains the // index to any one of the nodes. if *pn == usize::MAX { self.nodes[ni].set = self.sets.len(); self.sets.push(CoalesceSet { nodes: vec![ni] }); *pn = ni; } else { let s = self.nodes[*pn].set; self.nodes[ni].set = s; } } } } } fn node_dominates(&self, p: usize, c: usize, cfg: &CFG) -> bool { if self.nodes[p].block == self.nodes[c].block { self.nodes[p].ip_1 <= self.nodes[c].ip_1 } else { cfg.dominates(self.nodes[p].block, self.nodes[c].block) } } fn phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool { if phi.ip_1 == 0 { self.live.block_live(phi.block).is_live_in(ssa) } else { debug_assert!(phi.ip_1 == usize::MAX); self.live.block_live(phi.block).is_live_out(ssa) } } fn nodes_interfere(&self, a: usize, b: usize) -> bool { let a = &self.nodes[a]; let b = &self.nodes[b]; match &a.item { CoalesceItem::SSA(a_ssa) => match &b.item { CoalesceItem::SSA(b_ssa) => self.live.interferes(a_ssa, b_ssa), CoalesceItem::Phi(_) => self.phi_ssa_interferes(b, a_ssa), }, CoalesceItem::Phi(_) => match &b.item { CoalesceItem::SSA(b_ssa) => self.phi_ssa_interferes(a, b_ssa), CoalesceItem::Phi(_) => { // Phi nodes represent the temporary SSA value made between // the parallel copy and the phi in the Boissinot algorithm // so they interfere if and only if they're in the same // block and both at the start or both at the end. a.block == b.block && a.ip_1 == b.ip_1 } }, } } pub fn sets_interfere(&self, a: usize, b: usize, cfg: &CFG) -> bool { let a = &self.sets[a]; let b = &self.sets[b]; // Stack of nodes which dominate the current node let mut dom = Vec::new(); for n in MergedIter::new(a.nodes.iter(), b.nodes.iter()) { while let Some(p) = dom.last() { if !self.node_dominates(*p, *n, cfg) { dom.pop(); } else { break; } } if let Some(p) = dom.last() { if self.nodes_interfere(*n, *p) { return true; } } dom.push(*n); } false } pub fn sets_merge(&mut self, a: usize, b: usize) -> usize { let a_nodes = std::mem::take(&mut self.sets[a].nodes); let b_nodes = std::mem::take(&mut self.sets[b].nodes); let nodes = MergedIter::new(a_nodes.into_iter(), b_nodes.into_iter()); self.sets[a].nodes = nodes .map(|n| { self.nodes[n].set = a; n }) .collect(); a } pub fn ssa_set(&self, ssa: &SSAValue) -> usize { self.nodes[*self.ssa_node.get(ssa).unwrap()].set } pub fn phi_set_file(&self, phi: &u32) -> (usize, RegFile) { let (n, file) = self.phi_node_file.get(phi).unwrap(); (self.nodes[*n].set, *file) } } impl Function { /// Convert a function to CSSA (Conventional SSA) form /// /// In "Translating Out of Static Single Assignment Form" by Sreedhar, et. /// al., they define CSSA form via what they call the Phi Congruence /// Property: /// /// > The occurrences of all resources which belong to the same phi /// > congruence class in a program can be replaced by a representative /// > resource. After the replacement, the phi instruction can be /// > eliminated without violating the semantics of the original program. /// /// A more compiler-theoretic definition of CSSA form is a version of SSA /// form in which, for each phi, none of the SSA values involved in the phi /// (either as a source or destination) interfere. While most of the papers /// discussing CSSA form do so in the context of out-of-SSA, this property /// is also useful for SSA-based spilling and register allocation. /// /// Our implementation is based on the algorithm described in "Revisiting /// Out-of-SSA Translation for Correctness, Code Quality, and Effciency" by /// Boissinot et. al. The primary difference between this algorithm and /// the one in that paper is that we don't actually insert parallel copies /// and remove redundant entries. Instead, we treat OpPhiSrcs and OpPhiDsts /// as as the parallel copies with the phi index standing in for all of the /// SSA values used directly by the phi. Then, instead of removing copies /// where the source and destination don't interfere, we insert copies /// whenever the source or destination and phi index do interfere. This /// lets us avoid inserting pointless instructions. pub fn to_cssa(&mut self) { let live = SimpleLiveness::for_function(self); let mut cg = CoalesceGraph::new(&live); for (bi, b) in self.blocks.iter().enumerate() { if let Some(phi) = b.phi_dsts() { for (idx, dst) in phi.dsts.iter() { let vec = dst.as_ssa().unwrap(); debug_assert!(vec.comps() == 1); cg.add_ssa(vec[0]); cg.add_phi_dst(*idx, vec[0].file(), bi); } } if let Some(phi) = b.phi_srcs() { for (idx, src) in phi.srcs.iter() { if let SrcRef::SSA(vec) = src.src_ref { debug_assert!(vec.comps() == 1); cg.add_ssa(vec[0]); } cg.add_phi_src(*idx, bi); } } } cg.init_sets(&self.blocks); for bi in 0..self.blocks.len() { let block_instrs = std::mem::take(&mut self.blocks[bi].instrs); let mut instrs = Vec::new(); for mut instr in block_instrs.into_iter() { match &mut instr.op { Op::PhiDsts(phi) => { let mut pcopy = OpParCopy::new(); for (idx, dst) in phi.dsts.iter_mut() { let (ps, file) = cg.phi_set_file(idx); let vec = dst.as_ssa().unwrap(); debug_assert!(vec.comps() == 1); debug_assert!(vec[0].file() == file); let ds = cg.ssa_set(&vec[0]); if !cg.sets_interfere(ps, ds, &self.blocks) { cg.sets_merge(ps, ds); continue; } let tmp = self.ssa_alloc.alloc(file); pcopy.push(*dst, tmp.into()); *dst = tmp.into(); } instrs.push(instr); if !pcopy.is_empty() { if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by to_cssa".into(), })); } instrs.push(Instr::new_boxed(pcopy)); } } Op::PhiSrcs(phi) => { let mut pcopy = OpParCopy::new(); for (idx, src) in phi.srcs.iter_mut() { let (ps, file) = cg.phi_set_file(idx); debug_assert!(src.src_mod.is_none()); if let SrcRef::SSA(vec) = &src.src_ref { debug_assert!(vec.comps() == 1); if vec[0].file() == file { let ss = cg.ssa_set(&vec[0]); if cg.sets_interfere(ps, ss, &self.blocks) { let tmp = self.ssa_alloc.alloc(file); pcopy.push(tmp.into(), *src); *src = tmp.into(); } else { cg.sets_merge(ps, ss); } continue; } } // Non-SSA sources or sources with a different file // from the destination get an actual copy // instruction and are not considered part of the // parallel copy. let tmp = self.ssa_alloc.alloc(file); if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by to_cssa".into(), })); } instrs.push(Instr::new_boxed(OpCopy { dst: tmp.into(), src: *src, })); *src = tmp.into(); } if !pcopy.is_empty() { if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by to_cssa".into(), })); } instrs.push(Instr::new_boxed(pcopy)); } instrs.push(instr); } _ => instrs.push(instr), } } self.blocks[bi].instrs = instrs; } } }