// Copyright © 2023 Collabora, Ltd. // SPDX-License-Identifier: MIT #![allow(unstable_name_collisions)] use crate::api::{GetDebugFlags, DEBUG}; use crate::ir::*; use crate::liveness::{ BlockLiveness, LiveSet, Liveness, NextUseBlockLiveness, NextUseLiveness, }; use compiler::bitset::BitSet; use std::cell::RefCell; use std::cmp::{max, Ordering, Reverse}; use std::collections::{BinaryHeap, HashMap, HashSet}; struct PhiDstMap { ssa_phi: HashMap, } impl PhiDstMap { fn new() -> PhiDstMap { PhiDstMap { ssa_phi: HashMap::new(), } } fn add_phi_dst(&mut self, phi_idx: u32, dst: Dst) { let vec = dst.as_ssa().expect("Not an SSA destination"); debug_assert!(vec.comps() == 1); self.ssa_phi.insert(vec[0], phi_idx); } pub fn from_block(block: &BasicBlock) -> PhiDstMap { let mut map = PhiDstMap::new(); if let Some(phi) = block.phi_dsts() { for (idx, dst) in phi.dsts.iter() { map.add_phi_dst(*idx, *dst); } } map } fn get_phi_idx(&self, ssa: &SSAValue) -> Option<&u32> { self.ssa_phi.get(ssa) } } struct PhiSrcMap { phi_src: HashMap, } impl PhiSrcMap { fn new() -> PhiSrcMap { PhiSrcMap { phi_src: HashMap::new(), } } fn add_phi_src(&mut self, phi_idx: u32, src: Src) { debug_assert!(src.src_mod.is_none()); let vec = src.src_ref.as_ssa().expect("Not an SSA source"); debug_assert!(vec.comps() == 1); self.phi_src.insert(phi_idx, vec[0]); } pub fn from_block(block: &BasicBlock) -> PhiSrcMap { let mut map = PhiSrcMap::new(); if let Some(phi) = block.phi_srcs() { for (idx, src) in phi.srcs.iter() { map.add_phi_src(*idx, *src); } } map } pub fn get_src_ssa(&self, phi_idx: &u32) -> &SSAValue { self.phi_src.get(phi_idx).expect("Phi source missing") } } trait Spill { fn spill_file(&self, file: RegFile) -> RegFile; fn spill(&self, dst: SSAValue, src: Src) -> Box; fn fill(&self, dst: Dst, src: SSAValue) -> Box; } struct SpillUniform {} impl SpillUniform { fn new() -> Self { Self {} } } impl Spill for SpillUniform { fn spill_file(&self, file: RegFile) -> RegFile { debug_assert!(file.is_uniform()); file.to_warp() } fn spill(&self, dst: SSAValue, src: Src) -> Box { Instr::new_boxed(OpCopy { dst: dst.into(), src: src, }) } fn fill(&self, dst: Dst, src: SSAValue) -> Box { Instr::new_boxed(OpR2UR { dst: dst, src: src.into(), }) } } struct SpillPred {} impl SpillPred { fn new() -> Self { Self {} } } impl Spill for SpillPred { fn spill_file(&self, file: RegFile) -> RegFile { match file { RegFile::Pred => RegFile::GPR, RegFile::UPred => RegFile::UGPR, _ => panic!("Unsupported register file"), } } fn spill(&self, dst: SSAValue, src: Src) -> Box { assert!(matches!(dst.file(), RegFile::GPR | RegFile::UGPR)); if let Some(b) = src.as_bool() { let u32_src = if b { Src::new_imm_u32(!0) } else { Src::new_zero() }; Instr::new_boxed(OpCopy { dst: dst.into(), src: u32_src, }) } else { Instr::new_boxed(OpSel { dst: dst.into(), cond: src.bnot(), srcs: [Src::new_zero(), Src::new_imm_u32(!0)], }) } } fn fill(&self, dst: Dst, src: SSAValue) -> Box { assert!(matches!(src.file(), RegFile::GPR | RegFile::UGPR)); Instr::new_boxed(OpISetP { dst: dst, set_op: PredSetOp::And, cmp_op: IntCmpOp::Ne, cmp_type: IntCmpType::U32, ex: false, srcs: [Src::new_zero(), src.into()], accum: true.into(), low_cmp: true.into(), }) } } struct SpillBar {} impl SpillBar { fn new() -> Self { Self {} } } impl Spill for SpillBar { fn spill_file(&self, file: RegFile) -> RegFile { assert!(file == RegFile::Bar); RegFile::GPR } fn spill(&self, dst: SSAValue, src: Src) -> Box { assert!(dst.file() == RegFile::GPR); Instr::new_boxed(OpBMov { dst: dst.into(), src: src, clear: false, }) } fn fill(&self, dst: Dst, src: SSAValue) -> Box { assert!(src.file() == RegFile::GPR); Instr::new_boxed(OpBMov { dst: dst, src: src.into(), clear: false, }) } } struct SpillGPR {} impl SpillGPR { fn new() -> Self { Self {} } } impl Spill for SpillGPR { fn spill_file(&self, file: RegFile) -> RegFile { assert!(file == RegFile::GPR); RegFile::Mem } fn spill(&self, dst: SSAValue, src: Src) -> Box { assert!(dst.file() == RegFile::Mem); Instr::new_boxed(OpCopy { dst: dst.into(), src: src, }) } fn fill(&self, dst: Dst, src: SSAValue) -> Box { assert!(src.file() == RegFile::Mem); Instr::new_boxed(OpCopy { dst: dst, src: src.into(), }) } } #[derive(Eq, PartialEq)] struct SSANextUse { ssa: SSAValue, next_use: usize, } impl SSANextUse { fn new(ssa: SSAValue, next_use: usize) -> SSANextUse { SSANextUse { ssa: ssa, next_use: next_use, } } } impl Ord for SSANextUse { fn cmp(&self, other: &Self) -> Ordering { self.next_use .cmp(&other.next_use) .then_with(|| self.ssa.idx().cmp(&other.ssa.idx())) } } impl PartialOrd for SSANextUse { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } struct SpillCache<'a, S: Spill> { alloc: &'a mut SSAValueAllocator, spill: S, val_spill: HashMap, } impl<'a, S: Spill> SpillCache<'a, S> { fn new(alloc: &'a mut SSAValueAllocator, spill: S) -> SpillCache<'a, S> { SpillCache { alloc: alloc, spill: spill, val_spill: HashMap::new(), } } fn spill_file(&self, file: RegFile) -> RegFile { self.spill.spill_file(file) } fn get_spill(&mut self, ssa: SSAValue) -> SSAValue { *self.val_spill.entry(ssa).or_insert_with(|| { self.alloc.alloc(self.spill.spill_file(ssa.file())) }) } fn spill_src(&mut self, ssa: SSAValue, src: Src) -> Box { let dst = self.get_spill(ssa); self.spill.spill(dst, src) } fn spill(&mut self, ssa: SSAValue) -> Box { self.spill_src(ssa, ssa.into()) } fn fill_dst(&mut self, dst: Dst, ssa: SSAValue) -> Box { let src = self.get_spill(ssa); self.spill.fill(dst, src) } fn fill(&mut self, ssa: SSAValue) -> Box { self.fill_dst(ssa.into(), ssa) } } struct SpillChooser<'a> { bl: &'a NextUseBlockLiveness, pinned: &'a HashSet, ip: usize, count: usize, spills: BinaryHeap>, min_next_use: usize, } struct SpillChoiceIter { spills: BinaryHeap>, } impl<'a> SpillChooser<'a> { pub fn new( bl: &'a NextUseBlockLiveness, pinned: &'a HashSet, ip: usize, count: usize, ) -> Self { Self { bl: bl, pinned: pinned, ip: ip, count: count, spills: BinaryHeap::new(), min_next_use: ip + 1, } } pub fn add_candidate(&mut self, ssa: SSAValue) { // Don't spill anything that's pinned if self.pinned.contains(&ssa) { return; } // Ignore anything used sonner than spill options we've already // rejected. let next_use = self.bl.next_use_after_or_at_ip(&ssa, self.ip).unwrap(); if next_use < self.min_next_use { return; } self.spills.push(Reverse(SSANextUse::new(ssa, next_use))); if self.spills.len() > self.count { // Because we reversed the heap, pop actually removes the // one with the lowest next_use which is what we want here. let old = self.spills.pop().unwrap(); debug_assert!(self.spills.len() == self.count); self.min_next_use = max(self.min_next_use, old.0.next_use); } } } impl<'a> IntoIterator for SpillChooser<'a> { type Item = SSAValue; type IntoIter = SpillChoiceIter; fn into_iter(self) -> SpillChoiceIter { SpillChoiceIter { spills: self.spills, } } } impl Iterator for SpillChoiceIter { type Item = SSAValue; fn size_hint(&self) -> (usize, Option) { let len = self.spills.len(); (len, Some(len)) } fn next(&mut self) -> Option { self.spills.pop().map(|x| x.0.ssa) } } #[derive(Clone)] struct SSAState { // The set of variables which currently exist in registers w: LiveSet, // The set of variables which have already been spilled. These don't need // to be spilled again. s: HashSet, // The set of pinned variables p: HashSet, } fn spill_values( func: &mut Function, file: RegFile, limit: u32, spill: S, ) { let files = RegFileSet::from_iter([file]); let live = NextUseLiveness::for_function(func, &files); let blocks = &mut func.blocks; // Record the set of SSA values used within each loop let mut phi_dst_maps = Vec::new(); let mut phi_src_maps = Vec::new(); let mut loop_uses = HashMap::new(); for b_idx in 0..blocks.len() { phi_dst_maps.push(PhiDstMap::from_block(&blocks[b_idx])); phi_src_maps.push(PhiSrcMap::from_block(&blocks[b_idx])); if let Some(lh_idx) = blocks.loop_header_index(b_idx) { let uses = loop_uses .entry(lh_idx) .or_insert_with(|| RefCell::new(HashSet::new())); let uses = uses.get_mut(); for instr in &blocks[b_idx].instrs { instr.for_each_ssa_use(|ssa| { if ssa.file() == file { uses.insert(*ssa); } }); } } } if !loop_uses.is_empty() { // The previous loop only added values to the uses set for the // inner-most loop. Propagate from inner loops to outer loops. for b_idx in (0..blocks.len()).rev() { let Some(uses) = loop_uses.get(&b_idx) else { continue; }; let uses = uses.borrow(); let Some(dom) = blocks.dom_parent_index(b_idx) else { continue; }; let Some(dom_lh_idx) = blocks.loop_header_index(dom) else { continue; }; let mut parent_uses = loop_uses.get(&dom_lh_idx).unwrap().borrow_mut(); for ssa in uses.iter() { parent_uses.insert(*ssa); } } } let mut spill = SpillCache::new(&mut func.ssa_alloc, spill); let mut spilled_phis = BitSet::new(); let mut ssa_state_in: Vec = Vec::new(); let mut ssa_state_out: Vec = Vec::new(); for b_idx in 0..blocks.len() { let bl = live.block_live(b_idx); let preds = blocks.pred_indices(b_idx).to_vec(); let w = if preds.is_empty() { // This is the start block so we start with nothing in // registers. LiveSet::new() } else if preds.len() == 1 { // If we only have one predecessor then it can't possibly be a // loop header and we can just copy the predecessor's w. assert!(!blocks.is_loop_header(b_idx)); assert!(preds[0] < b_idx); let p_w = &ssa_state_out[preds[0]].w; LiveSet::from_iter( p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(), ) } else if !blocks[b_idx].uniform && file.is_uniform() { // If this is a non-uniform block, then we can't spill or fill any // uniform registers. The good news is that none of our non-uniform // predecessors could spill, either, so we know that everything that // was resident coming in will fit in the register file. let mut w = LiveSet::new(); for p_idx in &preds { if *p_idx < b_idx { let p_w = &ssa_state_out[*p_idx].w; w.extend( p_w.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(), ); } } debug_assert!(w.count(file) <= limit); w } else if blocks.is_loop_header(b_idx) { let mut i_b: HashSet = HashSet::from_iter(bl.iter_live_in().cloned()); if let Some(phi) = blocks[b_idx].phi_dsts() { for (_, dst) in phi.dsts.iter() { if let Dst::SSA(vec) = dst { assert!(vec.comps() == 1); let ssa = vec[0]; if ssa.file() == file { i_b.insert(ssa); } } } } let lu = loop_uses.get(&b_idx).unwrap().borrow(); let mut w = LiveSet::new(); let mut some = BinaryHeap::new(); for ssa in i_b.iter() { if lu.contains(ssa) { let next_use = bl.first_use(ssa).unwrap(); some.push(Reverse(SSANextUse::new(*ssa, next_use))); } } while w.count(file) < limit { let Some(entry) = some.pop() else { break; }; w.insert(entry.0.ssa); } // If we still have room, consider values which aren't used // inside the loop. if w.count(file) < limit { for ssa in i_b.iter() { debug_assert!(ssa.file() == file); if !lu.contains(ssa) { let next_use = bl.first_use(ssa).unwrap(); some.push(Reverse(SSANextUse::new(*ssa, next_use))); } } while w.count(file) < limit { let Some(entry) = some.pop() else { break; }; w.insert(entry.0.ssa); } } w } else { let phi_dst_map = &phi_dst_maps[b_idx]; struct SSAPredInfo { num_preds: usize, next_use: usize, } let mut live: HashMap = HashMap::new(); for p_idx in &preds { let phi_src_map = &phi_src_maps[*p_idx]; for mut ssa in ssa_state_out[*p_idx].w.iter().cloned() { if let Some(phi) = phi_dst_map.get_phi_idx(&ssa) { ssa = *phi_src_map.get_src_ssa(phi); } if let Some(next_use) = bl.first_use(&ssa) { live.entry(ssa) .and_modify(|e| e.num_preds += 1) .or_insert_with(|| SSAPredInfo { num_preds: 1, next_use: next_use, }); } } } let mut w = LiveSet::new(); let mut some = BinaryHeap::new(); for (ssa, info) in live.drain() { if info.num_preds == preds.len() { // This one is in all the input sets w.insert(ssa); } else { some.push(Reverse(SSANextUse::new(ssa, info.next_use))); } } while w.count(file) < limit { let Some(entry) = some.pop() else { break; }; let ssa = entry.0.ssa; assert!(ssa.file() == file); w.insert(ssa); } w }; let s = if preds.is_empty() { HashSet::new() } else if preds.len() == 1 { let p_s = &ssa_state_out[preds[0]].s; HashSet::from_iter( p_s.iter().filter(|ssa| bl.is_live_in(ssa)).cloned(), ) } else { let mut s = HashSet::new(); for p_idx in &preds { if *p_idx >= b_idx { continue; } // We diverge a bit from Braun and Hack here. They assume that // S is is a subset of W which is clearly bogus. Instead, we // take the union of all forward edge predecessor S_out and // intersect with live-in for the current block. for ssa in ssa_state_out[*p_idx].s.iter() { if bl.is_live_in(ssa) { s.insert(*ssa); } } } // The loop header heuristic sometimes drops stuff from W that has // never been spilled so we need to make sure everything live-in // which isn't in W is included in the spill set so that it gets // properly spilled when we spill across CF edges. if blocks.is_loop_header(b_idx) { for ssa in bl.iter_live_in() { if !w.contains(ssa) { s.insert(*ssa); } } } s }; let mut p = HashSet::new(); for p_idx in &preds { if *p_idx < b_idx { let p_p = &ssa_state_out[*p_idx].p; p.extend(p_p.iter().filter(|ssa| bl.is_live_in(ssa)).cloned()); } } for ssa in bl.iter_live_in() { debug_assert!(w.contains(ssa) || s.contains(ssa)); } let mut b = SSAState { w: w, s: s, p: p }; assert!(ssa_state_in.len() == b_idx); ssa_state_in.push(b.clone()); let bb = &mut blocks[b_idx]; let mut instrs = Vec::new(); for (ip, mut instr) in bb.instrs.drain(..).enumerate() { match &mut instr.op { Op::PhiDsts(phi) => { // For phis, anything that is not in W needs to be spilled // by setting the destination to some spill value. for (idx, dst) in phi.dsts.iter_mut() { let vec = dst.as_ssa().unwrap(); debug_assert!(vec.comps() == 1); let ssa = &vec[0]; if ssa.file() == file && !b.w.contains(ssa) { spilled_phis.insert((*idx).try_into().unwrap()); b.s.insert(*ssa); *dst = spill.get_spill(*ssa).into(); } } } Op::PhiSrcs(_) => { // We handle phi sources later. For now, leave them be. } Op::ParCopy(pcopy) => { let mut num_w_dsts = 0_u32; for (dst, src) in pcopy.dsts_srcs.iter_mut() { let dst_vec = dst.as_ssa().unwrap(); debug_assert!(dst_vec.comps() == 1); let dst_ssa = &dst_vec[0]; debug_assert!(src.src_mod.is_none()); let src_vec = src.src_ref.as_ssa().unwrap(); debug_assert!(src_vec.comps() == 1); let src_ssa = &src_vec[0]; debug_assert!(dst_ssa.file() == src_ssa.file()); if src_ssa.file() != file { continue; } // If it's not resident, rewrite to just move from one // spill to another, assuming that copying in spill // space is efficient if b.w.contains(src_ssa) { num_w_dsts += 1; } else { debug_assert!(b.s.contains(src_ssa)); b.s.insert(*dst_ssa); *src = spill.get_spill(*src_ssa).into(); *dst = spill.get_spill(*dst_ssa).into(); } } // We can now assume that a source is in W if and only if // the file matches. Remove all killed sources from W. for (_, src) in pcopy.dsts_srcs.iter() { let src_ssa = &src.src_ref.as_ssa().unwrap()[0]; if !bl.is_live_after_ip(src_ssa, ip) { b.w.remove(src_ssa); } } let rel_limit = limit - b.w.count(file); if num_w_dsts > rel_limit { // We can't spill uniform registers in a non-uniform // block assert!(bb.uniform || !file.is_uniform()); let count = num_w_dsts - rel_limit; let count = count.try_into().unwrap(); let mut spills = SpillChooser::new(bl, &b.p, ip, count); for (dst, _) in pcopy.dsts_srcs.iter() { let dst_ssa = &dst.as_ssa().unwrap()[0]; if dst_ssa.file() == file { spills.add_candidate(*dst_ssa); } } let spills: HashSet = HashSet::from_iter(spills); for (dst, src) in pcopy.dsts_srcs.iter_mut() { let dst_ssa = &dst.as_ssa().unwrap()[0]; let src_ssa = &src.src_ref.as_ssa().unwrap()[0]; if spills.contains(dst_ssa) { if b.s.insert(*src_ssa) { if DEBUG.annotate() { instrs.push(Instr::new_boxed( OpAnnotate { annotation: "generated by spill_values" .into(), }, )); } instrs.push(spill.spill(*src_ssa)); } b.s.insert(*dst_ssa); *src = spill.get_spill(*src_ssa).into(); *dst = spill.get_spill(*dst_ssa).into(); } } } for (dst, _) in pcopy.dsts_srcs.iter() { let dst_ssa = &dst.as_ssa().unwrap()[0]; if dst_ssa.file() == file { b.w.insert(*dst_ssa); } } } _ => { if file == RegFile::UGPR && !bb.uniform { // We can't spill UGPRs in a non-uniform block. // Instead, we depend on two facts: // // 1. Uniform instructions are not allowed in // non-uniform blocks // // 2. Non-uniform instructions can always accept a wave // register in leu of a uniform register // debug_assert!(spill.spill_file(file) == RegFile::GPR); instr.for_each_ssa_use_mut(|ssa| { if ssa.file() == file && !b.w.contains(ssa) { *ssa = spill.get_spill(*ssa).into(); } }); } else if file == RegFile::UPred && !bb.uniform { // We can't spill UPreds in a non-uniform block. // Instead, we depend on two facts: // // 1. Uniform instructions are not allowed in // non-uniform blocks // // 2. Non-uniform instructions can always accept a wave // register in leu of a uniform register // // 3. We can un-spill from a UGPR directly to a Pred // // This also shouldn't come up that often in practice // so it's okay to un-spill every time on the spot. // instr.for_each_ssa_use_mut(|ssa| { if ssa.file() == file && !b.w.contains(ssa) { if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by spill_values" .into(), })); } let tmp = spill.alloc.alloc(RegFile::Pred); instrs.push(spill.fill_dst(tmp.into(), *ssa)); *ssa = tmp; } }); } else { // First compute fills even though those have to come // after spills. let mut fills = Vec::new(); instr.for_each_ssa_use(|ssa| { if ssa.file() == file && !b.w.contains(ssa) { debug_assert!(b.s.contains(ssa)); debug_assert!(bb.uniform || !ssa.is_uniform()); fills.push(spill.fill(*ssa)); b.w.insert(*ssa); } }); let rel_pressure = bl.get_instr_pressure(ip, &instr)[file]; let abs_pressure = b.w.count(file) + u32::from(rel_pressure); if abs_pressure > limit { let count = abs_pressure - limit; let count = count.try_into().unwrap(); let mut spills = SpillChooser::new(bl, &b.p, ip, count); for ssa in b.w.iter() { spills.add_candidate(*ssa); } for ssa in spills { debug_assert!(ssa.file() == file); b.w.remove(&ssa); if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by spill_values" .into(), })); } instrs.push(spill.spill(ssa)); b.s.insert(ssa); } } if DEBUG.annotate() { instrs.push(Instr::new_boxed(OpAnnotate { annotation: "generated by spill_values".into(), })); } instrs.append(&mut fills); instr.for_each_ssa_use(|ssa| { if ssa.file() == file { debug_assert!(b.w.contains(ssa)); } }); b.w.insert_instr_top_down(ip, &instr, bl); } } } // OpPin takes the normal spilling path but we want to also mark any // of its destination SSA values as pinned. if matches!(&instr.op, Op::Pin(_)) { instr.for_each_ssa_def(|ssa| { b.p.insert(*ssa); }); } instrs.push(instr); } bb.instrs = instrs; assert!(ssa_state_out.len() == b_idx); ssa_state_out.push(b); } // Now that everthing is spilled, we handle phi sources and connect the // blocks by adding spills and fills as needed along edges. for p_idx in 0..blocks.len() { let succ = blocks.succ_indices(p_idx); if succ.len() != 1 { // We don't have any critical edges for s_idx in succ { debug_assert!(blocks.pred_indices(*s_idx).len() == 1); } continue; } let s_idx = succ[0]; let pb = &mut blocks[p_idx]; let p_out = &ssa_state_out[p_idx]; let s_in = &ssa_state_in[s_idx]; let phi_dst_map = &phi_dst_maps[s_idx]; let mut spills = Vec::new(); let mut fills = Vec::new(); if let Some(phi) = pb.phi_srcs_mut() { for (idx, src) in phi.srcs.iter_mut() { debug_assert!(src.src_mod.is_none()); let vec = src.src_ref.as_ssa().unwrap(); debug_assert!(vec.comps() == 1); let ssa = &vec[0]; if ssa.file() != file { continue; } if spilled_phis.get((*idx).try_into().unwrap()) { if !p_out.s.contains(ssa) { spills.push(*ssa); } *src = spill.get_spill(*ssa).into(); } else { if !p_out.w.contains(ssa) { fills.push(*ssa); } } } } for ssa in s_in.s.iter() { if p_out.w.contains(ssa) && !p_out.s.contains(ssa) { spills.push(*ssa); } } for ssa in s_in.w.iter() { if phi_dst_map.get_phi_idx(ssa).is_some() { continue; } if !p_out.w.contains(ssa) { fills.push(*ssa); } } if spills.is_empty() && fills.is_empty() { continue; } // Sort to ensure stability of the algorithm spills.sort_by_key(|ssa| ssa.idx()); fills.sort_by_key(|ssa| ssa.idx()); let mut instrs = Vec::new(); for ssa in spills { instrs.push(spill.spill(ssa)); } for ssa in fills { debug_assert!(pb.uniform || !ssa.is_uniform()); instrs.push(spill.fill(ssa)); } // Insert spills and fills right after the phi (if any) let ip = pb .phi_srcs_ip() .or_else(|| pb.branch_ip()) .unwrap_or_else(|| pb.instrs.len()); pb.instrs.splice(ip..ip, instrs.into_iter()); } } impl Function { /// Spill values from @file to fit within @limit registers /// /// This pass assumes that the function is already in CSSA form. See /// @to_cssa for more details. /// /// The algorithm implemented here is roughly based on "Register Spilling /// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack. The /// primary contributions of the Braun and Hack paper are the global /// next-use distances which are implemented by @NextUseLiveness and a /// heuristic for computing spill sets at block boundaries. The paper /// describes two sets: /// /// - W, the set of variables currently resident /// /// - S, the set of variables which have been spilled /// /// These sets are tracked as we walk instructions and [un]spill values to /// satisfy the given limit. When spills are required we spill the value /// with the nighest next-use IP. At block boundaries, Braun and Hack /// describe a heuristic for determining the starting W and S sets based on /// the W and S from the end of each of the forward edge predecessor blocks. /// /// What Braun and Hack do not describe is how to handle phis and parallel /// copies. Because we assume the function is already in CSSA form, we can /// use a fairly simple algorithm. On the first pass, we ignore phi sources /// and assign phi destinations based on W at the start of the block. If /// the phi destination is in W, we leave it alone. If it is not in W, then /// we allocate a new spill value and assign it to the phi destination. In /// a second pass, we handle phi sources based on the destination. If the /// destination is in W, we leave it alone. If the destination is spilled, /// we read from the spill value corresponding to the source, spilling first /// if needed. In the second pass, we also handle spilling across blocks as /// needed for values that do not pass through a phi. /// /// A special case is also required for parallel copies because they can /// have an unbounded number of destinations. For any source values not in /// W, we allocate a spill value for the destination and copy in the spill /// register file. For any sources which are in W, we try to leave as much /// in W as possible. However, since source values may not be killed by the /// copy and because one source value may be copied to arbitrarily many /// destinations, that is not always possible. Whenever we need to spill /// values, we spill according to the highest next-use of the destination /// and we spill the source first and then parallel copy the source into a /// spilled destination value. /// /// This all assumes that it's better to copy in spill space than to unspill /// just for the sake of a parallel copy. While this may not be true in /// general, especially not when spilling to memory, the register allocator /// is good at eliding unnecessary copies. pub fn spill_values(&mut self, file: RegFile, limit: u32) { match file { RegFile::GPR => { let spill = SpillGPR::new(); spill_values(self, file, limit, spill); } RegFile::UGPR => { let spill = SpillUniform::new(); spill_values(self, file, limit, spill); } RegFile::Pred => { let spill = SpillPred::new(); spill_values(self, file, limit, spill); } RegFile::UPred => { let spill = SpillPred::new(); spill_values(self, file, limit, spill); } RegFile::Bar => { let spill = SpillBar::new(); spill_values(self, file, limit, spill); } _ => panic!("Don't know how to spill {} registers", file), } self.repair_ssa(); self.opt_dce(); if DEBUG.print() { eprintln!("NAK IR after spilling {}:\n{}", file, self); } } }