1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::{
5 api::{GetDebugFlags, DEBUG},
6 ir::*,
7 };
8
9 use std::collections::HashMap;
10
11 struct CopyNode {
12 num_reads: usize,
13 src: isize,
14 }
15
16 struct CopyGraph {
17 nodes: Vec<CopyNode>,
18 }
19
20 impl CopyGraph {
new() -> CopyGraph21 pub fn new() -> CopyGraph {
22 CopyGraph { nodes: Vec::new() }
23 }
24
add_node(&mut self) -> usize25 fn add_node(&mut self) -> usize {
26 let node_idx = self.nodes.len();
27 self.nodes.push(CopyNode {
28 num_reads: 0,
29 src: -1,
30 });
31 node_idx
32 }
33
num_reads(&self, node_idx: usize) -> usize34 fn num_reads(&self, node_idx: usize) -> usize {
35 self.nodes[node_idx].num_reads
36 }
37
src(&self, node_idx: usize) -> Option<usize>38 fn src(&self, node_idx: usize) -> Option<usize> {
39 if self.nodes[node_idx].src < 0 {
40 None
41 } else {
42 Some(self.nodes[node_idx].src.try_into().unwrap())
43 }
44 }
45
add_edge(&mut self, dst_idx: usize, src_idx: usize)46 fn add_edge(&mut self, dst_idx: usize, src_idx: usize) {
47 // Disallow self-loops
48 assert!(dst_idx != src_idx);
49
50 // Each node has in-degree at most 1
51 assert!(self.nodes[dst_idx].src == -1);
52 self.nodes[dst_idx].src = src_idx.try_into().unwrap();
53 self.nodes[src_idx].num_reads += 1;
54 }
55
del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool56 fn del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool {
57 assert!(self.nodes[dst_idx].src >= 0);
58 self.nodes[dst_idx].src = -1;
59 self.nodes[src_idx].num_reads -= 1;
60 self.nodes[src_idx].num_reads == 0
61 }
62 }
63
copy_needs_tmp(dst: &RegRef, src: &SrcRef) -> bool64 fn copy_needs_tmp(dst: &RegRef, src: &SrcRef) -> bool {
65 if let Some(src_reg) = src.as_reg() {
66 (dst.file() == RegFile::Mem && src_reg.file() == RegFile::Mem)
67 || (dst.file() == RegFile::Bar && src_reg.file() == RegFile::Bar)
68 } else {
69 false
70 }
71 }
72
cycle_use_swap(pc: &OpParCopy, file: RegFile) -> bool73 fn cycle_use_swap(pc: &OpParCopy, file: RegFile) -> bool {
74 match file {
75 RegFile::GPR => {
76 if let Some(tmp) = &pc.tmp {
77 debug_assert!(tmp.file() == RegFile::GPR);
78 false
79 } else {
80 true
81 }
82 }
83 RegFile::Bar | RegFile::Mem => {
84 let tmp = &pc.tmp.expect("This copy needs a temporary");
85 assert!(tmp.comps() >= 2, "Memory cycles need 2 temporaries");
86 false
87 }
88 _ => true,
89 }
90 }
91
lower_par_copy(pc: OpParCopy, sm: &dyn ShaderModel) -> MappedInstrs92 fn lower_par_copy(pc: OpParCopy, sm: &dyn ShaderModel) -> MappedInstrs {
93 let mut graph = CopyGraph::new();
94 let mut vals = Vec::new();
95 let mut reg_to_idx = HashMap::new();
96
97 for (i, (dst, _)) in pc.dsts_srcs.iter().enumerate() {
98 // Destinations must be pairwise unique
99 let reg = dst.as_reg().unwrap();
100 assert!(reg_to_idx.get(reg).is_none());
101
102 // Everything must be scalar
103 assert!(reg.comps() == 1);
104
105 let node_idx = graph.add_node();
106 assert!(node_idx == i && vals.len() == i);
107 vals.push(SrcRef::from(*reg));
108 reg_to_idx.insert(*reg, i);
109 }
110
111 for (dst_idx, (_, src)) in pc.dsts_srcs.iter().enumerate() {
112 assert!(src.src_mod.is_none());
113 let src = src.src_ref;
114
115 let src_idx = if let SrcRef::Reg(reg) = src {
116 // Everything must be scalar
117 assert!(reg.comps() == 1);
118
119 *reg_to_idx.entry(reg).or_insert_with(|| {
120 let node_idx = graph.add_node();
121 assert!(node_idx == vals.len());
122 vals.push(src);
123 node_idx
124 })
125 } else {
126 // We can't have bindless CBufs because we can't resolve cycles
127 // containing one.
128 assert!(src.get_reg().is_none());
129
130 let node_idx = graph.add_node();
131 assert!(node_idx == vals.len());
132 vals.push(src);
133 node_idx
134 };
135
136 if dst_idx != src_idx {
137 graph.add_edge(dst_idx, src_idx);
138 }
139 }
140
141 let mut b = InstrBuilder::new(sm);
142
143 let mut ready = Vec::new();
144 for i in 0..pc.dsts_srcs.len() {
145 if graph.num_reads(i) == 0 {
146 ready.push(i);
147 }
148 }
149 while let Some(dst_idx) = ready.pop() {
150 if let Some(src_idx) = graph.src(dst_idx) {
151 let dst = *vals[dst_idx].as_reg().unwrap();
152 let src = vals[src_idx];
153 if copy_needs_tmp(&dst, &src) {
154 let tmp = pc.tmp.expect("This copy needs a temporary").comp(0);
155 b.copy_to(tmp.into(), src.into());
156 b.copy_to(dst.into(), tmp.into());
157 } else {
158 b.copy_to(dst.into(), src.into());
159 }
160 if graph.del_edge(dst_idx, src_idx) {
161 ready.push(src_idx);
162 }
163 }
164 }
165
166 // At this point, all we are left with in the graph are isolated nodes
167 // (no edges) and cycles.
168 //
169 // Proof:
170 //
171 // Without loss of generality, we can assume that there are no isolated
172 // nodes in the graph. By construction, no node has in-degree more than 1
173 // (that would indicate a duplicate destination). The loop above ensures
174 // that there are no nodes with an in-degree of 1 and an out-degree of 0.
175 //
176 // Suppose that there were a node with an in-degree of 0. Then, because no
177 // node has an in-degree greater than 1, the number of edges must be less
178 // than the number of nodes. This implies that there is some node N with
179 // with out-degree of 0. If N has an in-degree of 0 then it is isolated,
180 // which is a contradiction. If N has an in-degree of 1 then it is a node
181 // with in-degree of 1 and out-degree of 0 which is also a contradiction.
182 // Therefore, there are no nodes with in-degree of 0 and all nodes have an
183 // in-degree of 1.
184 //
185 // Since all nodes have an in-degree of 1, no node has an out-degree of 0
186 // and, because the sum of all in-degrees equals the sum of all out-degrees
187 // (they both equal the number of edges), every node must also have an
188 // out-degree of 1. Therefore, the graph only contains cycles.
189 //
190 // QED
191 for i in 0..pc.dsts_srcs.len() {
192 if graph.src(i).is_none() {
193 debug_assert!(graph.num_reads(i) == 0);
194 continue;
195 }
196
197 let file = vals[i].as_reg().unwrap().file();
198 if cycle_use_swap(&pc, file) {
199 loop {
200 let j = graph.src(i).unwrap();
201 // We're part of a cycle so j also has a source
202 let k = graph.src(j).unwrap();
203 let j_reg = vals[j].as_reg().unwrap();
204 let k_reg = vals[k].as_reg().unwrap();
205 b.swap(*j_reg, *k_reg);
206 graph.del_edge(i, j);
207 graph.del_edge(j, k);
208 if i == k {
209 // This was our last swap
210 break;
211 } else {
212 graph.add_edge(i, k);
213 }
214 }
215 } else {
216 let pc_tmp = pc.tmp.expect("This copy needs a temporary");
217 let tmp = pc_tmp.comp(0);
218
219 let mut p = i;
220 let mut p_reg = *vals[p].as_reg().unwrap();
221 b.copy_to(tmp.into(), p_reg.into());
222
223 loop {
224 let j = graph.src(p).unwrap();
225 if j == i {
226 break;
227 }
228
229 let j_reg = *vals[j].as_reg().unwrap();
230 debug_assert!(j_reg.file() == file);
231
232 if file == RegFile::Bar || file == RegFile::Mem {
233 let copy_tmp = pc_tmp.comp(1);
234 b.copy_to(copy_tmp.into(), j_reg.into());
235 b.copy_to(p_reg.into(), copy_tmp.into());
236 } else {
237 b.copy_to(p_reg.into(), j_reg.into());
238 }
239 graph.del_edge(p, j);
240
241 p = j;
242 p_reg = j_reg;
243 }
244
245 b.copy_to(p_reg.into(), tmp.into());
246 graph.del_edge(p, i);
247 }
248 }
249
250 b.as_mapped_instrs()
251 }
252
253 impl Shader<'_> {
lower_par_copies(&mut self)254 pub fn lower_par_copies(&mut self) {
255 let sm = self.sm;
256 self.map_instrs(|instr, _| -> MappedInstrs {
257 match instr.op {
258 Op::ParCopy(pc) => {
259 assert!(instr.pred.is_true());
260 let mut instrs = vec![];
261 if DEBUG.annotate() {
262 instrs.push(Instr::new_boxed(OpAnnotate {
263 annotation: "par_copy lowered by lower_par_copy"
264 .into(),
265 }));
266 }
267 match lower_par_copy(pc, sm) {
268 MappedInstrs::None => {
269 if let Some(instr) = instrs.pop() {
270 MappedInstrs::One(instr)
271 } else {
272 MappedInstrs::None
273 }
274 }
275 MappedInstrs::One(i) => {
276 instrs.push(i);
277 MappedInstrs::Many(instrs)
278 }
279 MappedInstrs::Many(i) => {
280 instrs.extend(i);
281 MappedInstrs::Many(instrs)
282 }
283 }
284 }
285 _ => MappedInstrs::One(instr),
286 }
287 });
288 }
289 }
290