xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/lower_par_copies.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::{
5     api::{GetDebugFlags, DEBUG},
6     ir::*,
7 };
8 
9 use std::collections::HashMap;
10 
11 struct CopyNode {
12     num_reads: usize,
13     src: isize,
14 }
15 
16 struct CopyGraph {
17     nodes: Vec<CopyNode>,
18 }
19 
20 impl CopyGraph {
new() -> CopyGraph21     pub fn new() -> CopyGraph {
22         CopyGraph { nodes: Vec::new() }
23     }
24 
add_node(&mut self) -> usize25     fn add_node(&mut self) -> usize {
26         let node_idx = self.nodes.len();
27         self.nodes.push(CopyNode {
28             num_reads: 0,
29             src: -1,
30         });
31         node_idx
32     }
33 
num_reads(&self, node_idx: usize) -> usize34     fn num_reads(&self, node_idx: usize) -> usize {
35         self.nodes[node_idx].num_reads
36     }
37 
src(&self, node_idx: usize) -> Option<usize>38     fn src(&self, node_idx: usize) -> Option<usize> {
39         if self.nodes[node_idx].src < 0 {
40             None
41         } else {
42             Some(self.nodes[node_idx].src.try_into().unwrap())
43         }
44     }
45 
add_edge(&mut self, dst_idx: usize, src_idx: usize)46     fn add_edge(&mut self, dst_idx: usize, src_idx: usize) {
47         // Disallow self-loops
48         assert!(dst_idx != src_idx);
49 
50         // Each node has in-degree at most 1
51         assert!(self.nodes[dst_idx].src == -1);
52         self.nodes[dst_idx].src = src_idx.try_into().unwrap();
53         self.nodes[src_idx].num_reads += 1;
54     }
55 
del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool56     fn del_edge(&mut self, dst_idx: usize, src_idx: usize) -> bool {
57         assert!(self.nodes[dst_idx].src >= 0);
58         self.nodes[dst_idx].src = -1;
59         self.nodes[src_idx].num_reads -= 1;
60         self.nodes[src_idx].num_reads == 0
61     }
62 }
63 
copy_needs_tmp(dst: &RegRef, src: &SrcRef) -> bool64 fn copy_needs_tmp(dst: &RegRef, src: &SrcRef) -> bool {
65     if let Some(src_reg) = src.as_reg() {
66         (dst.file() == RegFile::Mem && src_reg.file() == RegFile::Mem)
67             || (dst.file() == RegFile::Bar && src_reg.file() == RegFile::Bar)
68     } else {
69         false
70     }
71 }
72 
cycle_use_swap(pc: &OpParCopy, file: RegFile) -> bool73 fn cycle_use_swap(pc: &OpParCopy, file: RegFile) -> bool {
74     match file {
75         RegFile::GPR => {
76             if let Some(tmp) = &pc.tmp {
77                 debug_assert!(tmp.file() == RegFile::GPR);
78                 false
79             } else {
80                 true
81             }
82         }
83         RegFile::Bar | RegFile::Mem => {
84             let tmp = &pc.tmp.expect("This copy needs a temporary");
85             assert!(tmp.comps() >= 2, "Memory cycles need 2 temporaries");
86             false
87         }
88         _ => true,
89     }
90 }
91 
lower_par_copy(pc: OpParCopy, sm: &dyn ShaderModel) -> MappedInstrs92 fn lower_par_copy(pc: OpParCopy, sm: &dyn ShaderModel) -> MappedInstrs {
93     let mut graph = CopyGraph::new();
94     let mut vals = Vec::new();
95     let mut reg_to_idx = HashMap::new();
96 
97     for (i, (dst, _)) in pc.dsts_srcs.iter().enumerate() {
98         // Destinations must be pairwise unique
99         let reg = dst.as_reg().unwrap();
100         assert!(reg_to_idx.get(reg).is_none());
101 
102         // Everything must be scalar
103         assert!(reg.comps() == 1);
104 
105         let node_idx = graph.add_node();
106         assert!(node_idx == i && vals.len() == i);
107         vals.push(SrcRef::from(*reg));
108         reg_to_idx.insert(*reg, i);
109     }
110 
111     for (dst_idx, (_, src)) in pc.dsts_srcs.iter().enumerate() {
112         assert!(src.src_mod.is_none());
113         let src = src.src_ref;
114 
115         let src_idx = if let SrcRef::Reg(reg) = src {
116             // Everything must be scalar
117             assert!(reg.comps() == 1);
118 
119             *reg_to_idx.entry(reg).or_insert_with(|| {
120                 let node_idx = graph.add_node();
121                 assert!(node_idx == vals.len());
122                 vals.push(src);
123                 node_idx
124             })
125         } else {
126             // We can't have bindless CBufs because we can't resolve cycles
127             // containing one.
128             assert!(src.get_reg().is_none());
129 
130             let node_idx = graph.add_node();
131             assert!(node_idx == vals.len());
132             vals.push(src);
133             node_idx
134         };
135 
136         if dst_idx != src_idx {
137             graph.add_edge(dst_idx, src_idx);
138         }
139     }
140 
141     let mut b = InstrBuilder::new(sm);
142 
143     let mut ready = Vec::new();
144     for i in 0..pc.dsts_srcs.len() {
145         if graph.num_reads(i) == 0 {
146             ready.push(i);
147         }
148     }
149     while let Some(dst_idx) = ready.pop() {
150         if let Some(src_idx) = graph.src(dst_idx) {
151             let dst = *vals[dst_idx].as_reg().unwrap();
152             let src = vals[src_idx];
153             if copy_needs_tmp(&dst, &src) {
154                 let tmp = pc.tmp.expect("This copy needs a temporary").comp(0);
155                 b.copy_to(tmp.into(), src.into());
156                 b.copy_to(dst.into(), tmp.into());
157             } else {
158                 b.copy_to(dst.into(), src.into());
159             }
160             if graph.del_edge(dst_idx, src_idx) {
161                 ready.push(src_idx);
162             }
163         }
164     }
165 
166     // At this point, all we are left with in the graph are isolated nodes
167     // (no edges) and cycles.
168     //
169     // Proof:
170     //
171     // Without loss of generality, we can assume that there are no isolated
172     // nodes in the graph.  By construction, no node has in-degree more than 1
173     // (that would indicate a duplicate destination).  The loop above ensures
174     // that there are no nodes with an in-degree of 1 and an out-degree of 0.
175     //
176     // Suppose that there were a node with an in-degree of 0.  Then, because no
177     // node has an in-degree greater than 1, the number of edges must be less
178     // than the number of nodes.  This implies that there is some node N with
179     // with out-degree of 0.  If N has an in-degree of 0 then it is isolated,
180     // which is a contradiction.  If N has an in-degree of 1 then it is a node
181     // with in-degree of 1 and out-degree of 0 which is also a contradiction.
182     // Therefore, there are no nodes with in-degree of 0 and all nodes have an
183     // in-degree of 1.
184     //
185     // Since all nodes have an in-degree of 1, no node has an out-degree of 0
186     // and, because the sum of all in-degrees equals the sum of all out-degrees
187     // (they both equal the number of edges), every node must also have an
188     // out-degree of 1.  Therefore, the graph only contains cycles.
189     //
190     // QED
191     for i in 0..pc.dsts_srcs.len() {
192         if graph.src(i).is_none() {
193             debug_assert!(graph.num_reads(i) == 0);
194             continue;
195         }
196 
197         let file = vals[i].as_reg().unwrap().file();
198         if cycle_use_swap(&pc, file) {
199             loop {
200                 let j = graph.src(i).unwrap();
201                 // We're part of a cycle so j also has a source
202                 let k = graph.src(j).unwrap();
203                 let j_reg = vals[j].as_reg().unwrap();
204                 let k_reg = vals[k].as_reg().unwrap();
205                 b.swap(*j_reg, *k_reg);
206                 graph.del_edge(i, j);
207                 graph.del_edge(j, k);
208                 if i == k {
209                     // This was our last swap
210                     break;
211                 } else {
212                     graph.add_edge(i, k);
213                 }
214             }
215         } else {
216             let pc_tmp = pc.tmp.expect("This copy needs a temporary");
217             let tmp = pc_tmp.comp(0);
218 
219             let mut p = i;
220             let mut p_reg = *vals[p].as_reg().unwrap();
221             b.copy_to(tmp.into(), p_reg.into());
222 
223             loop {
224                 let j = graph.src(p).unwrap();
225                 if j == i {
226                     break;
227                 }
228 
229                 let j_reg = *vals[j].as_reg().unwrap();
230                 debug_assert!(j_reg.file() == file);
231 
232                 if file == RegFile::Bar || file == RegFile::Mem {
233                     let copy_tmp = pc_tmp.comp(1);
234                     b.copy_to(copy_tmp.into(), j_reg.into());
235                     b.copy_to(p_reg.into(), copy_tmp.into());
236                 } else {
237                     b.copy_to(p_reg.into(), j_reg.into());
238                 }
239                 graph.del_edge(p, j);
240 
241                 p = j;
242                 p_reg = j_reg;
243             }
244 
245             b.copy_to(p_reg.into(), tmp.into());
246             graph.del_edge(p, i);
247         }
248     }
249 
250     b.as_mapped_instrs()
251 }
252 
253 impl Shader<'_> {
lower_par_copies(&mut self)254     pub fn lower_par_copies(&mut self) {
255         let sm = self.sm;
256         self.map_instrs(|instr, _| -> MappedInstrs {
257             match instr.op {
258                 Op::ParCopy(pc) => {
259                     assert!(instr.pred.is_true());
260                     let mut instrs = vec![];
261                     if DEBUG.annotate() {
262                         instrs.push(Instr::new_boxed(OpAnnotate {
263                             annotation: "par_copy lowered by lower_par_copy"
264                                 .into(),
265                         }));
266                     }
267                     match lower_par_copy(pc, sm) {
268                         MappedInstrs::None => {
269                             if let Some(instr) = instrs.pop() {
270                                 MappedInstrs::One(instr)
271                             } else {
272                                 MappedInstrs::None
273                             }
274                         }
275                         MappedInstrs::One(i) => {
276                             instrs.push(i);
277                             MappedInstrs::Many(instrs)
278                         }
279                         MappedInstrs::Many(i) => {
280                             instrs.extend(i);
281                             MappedInstrs::Many(instrs)
282                         }
283                     }
284                 }
285                 _ => MappedInstrs::One(instr),
286             }
287         });
288     }
289 }
290