xref: /aosp_15_r20/external/mesa3d/src/nouveau/compiler/nak/qmd.rs (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 extern crate nvidia_headers;
5 
6 use compiler::bindings::*;
7 use nak_bindings::*;
8 use nvidia_headers::classes::{cla0c0, clc0c0, clc3c0, clc6c0};
9 
10 use bitview::*;
11 use paste::paste;
12 
13 type QMDBitView<'a> = BitMutView<'a, [u32]>;
14 
15 trait QMD {
16     const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout;
17 
new() -> Self18     fn new() -> Self;
set_barrier_count(&mut self, barrier_count: u8)19     fn set_barrier_count(&mut self, barrier_count: u8);
set_cbuf(&mut self, idx: u8, addr: u64, size: u32)20     fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32);
set_global_size(&mut self, width: u32, height: u32, depth: u32)21     fn set_global_size(&mut self, width: u32, height: u32, depth: u32);
set_local_size(&mut self, width: u16, height: u16, depth: u16)22     fn set_local_size(&mut self, width: u16, height: u16, depth: u16);
set_prog_addr(&mut self, addr: u64)23     fn set_prog_addr(&mut self, addr: u64);
set_register_count(&mut self, register_count: u8)24     fn set_register_count(&mut self, register_count: u8);
set_crs_size(&mut self, crs_size: u32)25     fn set_crs_size(&mut self, crs_size: u32);
set_slm_size(&mut self, slm_size: u32)26     fn set_slm_size(&mut self, slm_size: u32);
set_smem_size(&mut self, smem_size: u32, smem_max: u32)27     fn set_smem_size(&mut self, smem_size: u32, smem_max: u32);
28 }
29 
30 macro_rules! set_enum {
31     ($bv:expr, $cls:ident, $strct:ident, $field:ident, $enum:ident) => {
32         $bv.set_field(
33             paste! {$cls::[<$strct _ $field>]},
34             paste! {$cls::[<$strct _ $field _ $enum>]},
35         )
36     };
37 }
38 
39 macro_rules! set_field {
40     ($bv:expr, $cls:ident, $strct:ident, $field:ident, $val:expr) => {
41         $bv.set_field(paste! {$cls::[<$strct _ $field>]}, $val)
42     };
43 }
44 
45 macro_rules! set_array {
46     ($bv:expr, $cls:ident, $strct:ident, $f:ident, $i:expr, $x:expr) => {
47         $bv.set_field(paste! {$cls::[<$strct _ $f>]}($i), $x)
48     };
49 }
50 
51 macro_rules! qmd_init {
52     ($bv: expr, $c:ident, $s:ident, $mjv:expr, $mnv:expr) => {
53         set_field!($bv, $c, $s, QMD_MAJOR_VERSION, $mjv);
54         set_field!($bv, $c, $s, QMD_VERSION, $mnv);
55 
56         set_enum!($bv, $c, $s, API_VISIBLE_CALL_LIMIT, NO_CHECK);
57         set_enum!($bv, $c, $s, SAMPLER_INDEX, INDEPENDENTLY);
58     };
59 }
60 
61 macro_rules! qmd_impl_common {
62     ($c:ident, $s:ident) => {
63         fn set_barrier_count(&mut self, barrier_count: u8) {
64             let mut bv = QMDBitView::new(&mut self.qmd);
65             set_field!(bv, $c, $s, BARRIER_COUNT, barrier_count);
66         }
67 
68         const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout = {
69             let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]};
70             let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]};
71             let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]};
72             nak_qmd_dispatch_size_layout {
73                 x_start: w.start as u16,
74                 x_end: w.end as u16,
75                 y_start: h.start as u16,
76                 y_end: h.end as u16,
77                 z_start: d.start as u16,
78                 z_end: d.end as u16,
79             }
80         };
81 
82         fn set_global_size(&mut self, width: u32, height: u32, depth: u32) {
83             let mut bv = QMDBitView::new(&mut self.qmd);
84             set_field!(bv, $c, $s, CTA_RASTER_WIDTH, width);
85             set_field!(bv, $c, $s, CTA_RASTER_HEIGHT, height);
86             set_field!(bv, $c, $s, CTA_RASTER_DEPTH, depth);
87         }
88 
89         fn set_local_size(&mut self, width: u16, height: u16, depth: u16) {
90             let mut bv = QMDBitView::new(&mut self.qmd);
91             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION0, width);
92             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION1, height);
93             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION2, depth);
94         }
95 
96         fn set_slm_size(&mut self, slm_size: u32) {
97             let mut bv = QMDBitView::new(&mut self.qmd);
98             let slm_size = slm_size.next_multiple_of(0x10);
99             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
100             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_LOW_SIZE, slm_size);
101         }
102     };
103 }
104 
105 macro_rules! qmd_impl_set_crs_size {
106     ($c:ident, $s:ident) => {
107         fn set_crs_size(&mut self, crs_size: u32) {
108             let mut bv = QMDBitView::new(&mut self.qmd);
109             let crs_size = crs_size.next_multiple_of(0x200);
110             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_CRS_SIZE, crs_size);
111         }
112     };
113 }
114 
115 const SIZE_SHIFT: u8 = 0;
116 const SIZE_SHIFTED4_SHIFT: u8 = 4;
117 
118 macro_rules! qmd_impl_set_cbuf {
119     ($c:ident, $s:ident, $size_field:ident) => {
120         fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32) {
121             let mut bv = QMDBitView::new(&mut self.qmd);
122             let idx = idx.into();
123 
124             let addr_lo = addr as u32;
125             let addr_hi = (addr >> 32) as u32;
126             set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_LOWER, idx, addr_lo);
127             set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_UPPER, idx, addr_hi);
128 
129             paste! {
130                 let shift = [<$size_field _SHIFT>];
131                 let range = $c::[<$s _CONSTANT_BUFFER_ $size_field>](idx);
132                 assert!(((size >> shift) << shift) == size);
133                 bv.set_field(range, size >> shift);
134             }
135 
136             set_array!(bv, $c, $s, CONSTANT_BUFFER_VALID, idx, true);
137         }
138     };
139 }
140 
141 macro_rules! qmd_impl_set_prog_addr_32 {
142     ($c:ident, $s:ident) => {
143         fn set_prog_addr(&mut self, addr: u64) {
144             let mut bv = QMDBitView::new(&mut self.qmd);
145             set_field!(bv, $c, $s, PROGRAM_OFFSET, addr);
146         }
147     };
148 }
149 
150 macro_rules! qmd_impl_set_prog_addr_64 {
151     ($c:ident, $s:ident) => {
152         fn set_prog_addr(&mut self, addr: u64) {
153             let mut bv = QMDBitView::new(&mut self.qmd);
154 
155             let addr_lo = addr as u32;
156             let addr_hi = (addr >> 32) as u32;
157             set_field!(bv, $c, $s, PROGRAM_ADDRESS_LOWER, addr_lo);
158             set_field!(bv, $c, $s, PROGRAM_ADDRESS_UPPER, addr_hi);
159         }
160     };
161 }
162 
163 macro_rules! qmd_impl_set_register_count {
164     ($c:ident, $s:ident, $field:ident) => {
165         fn set_register_count(&mut self, register_count: u8) {
166             let mut bv = QMDBitView::new(&mut self.qmd);
167             set_field!(bv, $c, $s, $field, register_count);
168         }
169     };
170 }
171 
172 mod qmd_0_6 {
173     use crate::qmd::*;
174     use nvidia_headers::classes::cla0c0::qmd as cla0c0;
175 
176     #[repr(transparent)]
177     pub struct Qmd0_6 {
178         qmd: [u32; 64],
179     }
180 
181     impl QMD for Qmd0_6 {
new() -> Self182         fn new() -> Self {
183             let mut qmd = [0; 64];
184             let mut bv = QMDBitView::new(&mut qmd);
185             qmd_init!(bv, cla0c0, QMDV00_06, 0, 6);
186             set_field!(bv, cla0c0, QMDV00_06, SASS_VERSION, 0x30);
187             Self { qmd }
188         }
189 
190         qmd_impl_common!(cla0c0, QMDV00_06);
191         qmd_impl_set_crs_size!(cla0c0, QMDV00_06);
192         qmd_impl_set_cbuf!(cla0c0, QMDV00_06, SIZE);
193         qmd_impl_set_prog_addr_32!(cla0c0, QMDV00_06);
194         qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT);
195 
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)196         fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
197             let mut bv = QMDBitView::new(&mut self.qmd);
198 
199             let smem_size = smem_size.next_multiple_of(0x100);
200             set_field!(bv, cla0c0, QMDV00_06, SHARED_MEMORY_SIZE, smem_size);
201 
202             let l1_config = if smem_size <= (16 << 10) {
203                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB
204             } else if smem_size <= (32 << 10) {
205                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB
206             } else if smem_size <= (48 << 10) {
207                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB
208             } else {
209                 panic!("Iclalid shared memory size");
210             };
211             set_field!(bv, cla0c0, QMDV00_06, L1_CONFIGURATION, l1_config);
212         }
213     }
214 }
215 use qmd_0_6::Qmd0_6;
216 
217 mod qmd_2_1 {
218     use crate::qmd::*;
219     use nvidia_headers::classes::clc0c0::qmd as clc0c0;
220 
221     #[repr(transparent)]
222     pub struct Qmd2_1 {
223         qmd: [u32; 64],
224     }
225 
226     impl QMD for Qmd2_1 {
new() -> Self227         fn new() -> Self {
228             let mut qmd = [0; 64];
229             let mut bv = QMDBitView::new(&mut qmd);
230             qmd_init!(bv, clc0c0, QMDV02_01, 2, 1);
231             set_field!(bv, clc0c0, QMDV02_01, SM_GLOBAL_CACHING_ENABLE, true);
232             Self { qmd }
233         }
234 
235         qmd_impl_common!(clc0c0, QMDV02_01);
236         qmd_impl_set_crs_size!(clc0c0, QMDV02_01);
237         qmd_impl_set_cbuf!(clc0c0, QMDV02_01, SIZE_SHIFTED4);
238         qmd_impl_set_prog_addr_32!(clc0c0, QMDV02_01);
239         qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT);
240 
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)241         fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
242             let mut bv = QMDBitView::new(&mut self.qmd);
243 
244             let smem_size = smem_size.next_multiple_of(0x100);
245             set_field!(bv, clc0c0, QMDV02_01, SHARED_MEMORY_SIZE, smem_size);
246         }
247     }
248 }
249 use qmd_2_1::Qmd2_1;
250 
gv100_sm_config_smem_size(size: u32) -> u32251 fn gv100_sm_config_smem_size(size: u32) -> u32 {
252     let size = if size > 64 * 1024 {
253         96 * 1024
254     } else if size > 32 * 1024 {
255         64 * 1024
256     } else if size > 16 * 1024 {
257         32 * 1024
258     } else if size > 8 * 1024 {
259         16 * 1024
260     } else {
261         8 * 1024
262     };
263 
264     size / 4096 + 1
265 }
266 
267 macro_rules! qmd_impl_set_smem_size_bounded {
268     ($c:ident, $s:ident) => {
269         fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
270             let mut bv = QMDBitView::new(&mut self.qmd);
271 
272             let smem_size = smem_size.next_multiple_of(0x100);
273             set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size);
274 
275             let max = gv100_sm_config_smem_size(smem_max);
276             let min = gv100_sm_config_smem_size(smem_size.into());
277             let target = gv100_sm_config_smem_size(smem_size.into());
278             set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
279             set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
280             set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
281         }
282     };
283 }
284 
285 mod qmd_2_2 {
286     use crate::qmd::*;
287     use nvidia_headers::classes::clc3c0::qmd as clc3c0;
288 
289     #[repr(transparent)]
290     pub struct Qmd2_2 {
291         qmd: [u32; 64],
292     }
293 
294     impl QMD for Qmd2_2 {
new() -> Self295         fn new() -> Self {
296             let mut qmd = [0; 64];
297             let mut bv = QMDBitView::new(&mut qmd);
298             qmd_init!(bv, clc3c0, QMDV02_02, 2, 2);
299             set_field!(bv, clc3c0, QMDV02_02, SM_GLOBAL_CACHING_ENABLE, true);
300             Self { qmd }
301         }
302 
303         qmd_impl_common!(clc3c0, QMDV02_02);
304         qmd_impl_set_crs_size!(clc3c0, QMDV02_02);
305         qmd_impl_set_cbuf!(clc3c0, QMDV02_02, SIZE_SHIFTED4);
306         qmd_impl_set_prog_addr_64!(clc3c0, QMDV02_02);
307         qmd_impl_set_register_count!(clc3c0, QMDV02_02, REGISTER_COUNT_V);
308         qmd_impl_set_smem_size_bounded!(clc3c0, QMDV02_02);
309     }
310 }
311 use qmd_2_2::Qmd2_2;
312 
313 mod qmd_3_0 {
314     use crate::qmd::*;
315     use nvidia_headers::classes::clc6c0::qmd as clc6c0;
316 
317     #[repr(transparent)]
318     pub struct Qmd3_0 {
319         qmd: [u32; 64],
320     }
321 
322     impl QMD for Qmd3_0 {
new() -> Self323         fn new() -> Self {
324             let mut qmd = [0; 64];
325             let mut bv = QMDBitView::new(&mut qmd);
326             qmd_init!(bv, clc6c0, QMDV03_00, 3, 0);
327             set_field!(bv, clc6c0, QMDV03_00, SM_GLOBAL_CACHING_ENABLE, true);
328             Self { qmd }
329         }
330 
331         qmd_impl_common!(clc6c0, QMDV03_00);
332 
set_crs_size(&mut self, crs_size: u32)333         fn set_crs_size(&mut self, crs_size: u32) {
334             assert!(crs_size == 0);
335         }
336 
337         qmd_impl_set_cbuf!(clc6c0, QMDV03_00, SIZE_SHIFTED4);
338         qmd_impl_set_prog_addr_64!(clc6c0, QMDV03_00);
339         qmd_impl_set_register_count!(clc6c0, QMDV03_00, REGISTER_COUNT_V);
340         qmd_impl_set_smem_size_bounded!(clc6c0, QMDV03_00);
341     }
342 }
343 use qmd_3_0::Qmd3_0;
344 
fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q345 fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
346     let cs_info = unsafe {
347         assert!(info.stage == MESA_SHADER_COMPUTE);
348         &info.__bindgen_anon_1.cs
349     };
350 
351     let mut qmd = Q::new();
352 
353     qmd.set_barrier_count(info.num_control_barriers);
354     qmd.set_global_size(
355         qmd_info.global_size[0],
356         qmd_info.global_size[1],
357         qmd_info.global_size[2],
358     );
359     qmd.set_local_size(
360         cs_info.local_size[0],
361         cs_info.local_size[1],
362         cs_info.local_size[2],
363     );
364     qmd.set_prog_addr(qmd_info.addr);
365     qmd.set_register_count(info.num_gprs);
366     qmd.set_crs_size(info.crs_size);
367     qmd.set_slm_size(info.slm_size);
368 
369     assert!(qmd_info.smem_size >= cs_info.smem_size);
370     assert!(qmd_info.smem_size <= qmd_info.smem_max);
371     qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into());
372 
373     for i in 0..qmd_info.num_cbufs {
374         let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()];
375         if cb.size > 0 {
376             qmd.set_cbuf(cb.index.try_into().unwrap(), cb.addr, cb.size);
377         }
378     }
379 
380     qmd
381 }
382 
383 #[no_mangle]
nak_fill_qmd( dev: *const nv_device_info, info: *const nak_shader_info, qmd_info: *const nak_qmd_info, qmd_out: *mut ::std::os::raw::c_void, qmd_size: usize, )384 pub extern "C" fn nak_fill_qmd(
385     dev: *const nv_device_info,
386     info: *const nak_shader_info,
387     qmd_info: *const nak_qmd_info,
388     qmd_out: *mut ::std::os::raw::c_void,
389     qmd_size: usize,
390 ) {
391     assert!(!dev.is_null());
392     let dev = unsafe { &*dev };
393 
394     assert!(!info.is_null());
395     let info = unsafe { &*info };
396 
397     assert!(!qmd_info.is_null());
398     let qmd_info = unsafe { &*qmd_info };
399 
400     unsafe {
401         if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
402             let qmd_out = qmd_out as *mut Qmd3_0;
403             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
404             qmd_out.write(fill_qmd(info, qmd_info));
405         } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
406             let qmd_out = qmd_out as *mut Qmd2_2;
407             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
408             qmd_out.write(fill_qmd(info, qmd_info));
409         } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
410             let qmd_out = qmd_out as *mut Qmd2_1;
411             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
412             qmd_out.write(fill_qmd(info, qmd_info));
413         } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
414             let qmd_out = qmd_out as *mut Qmd0_6;
415             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
416             qmd_out.write(fill_qmd(info, qmd_info));
417         } else {
418             panic!("Unknown shader model");
419         }
420     }
421 }
422 
423 #[no_mangle]
nak_get_qmd_dispatch_size_layout( dev: &nv_device_info, ) -> nak_qmd_dispatch_size_layout424 pub extern "C" fn nak_get_qmd_dispatch_size_layout(
425     dev: &nv_device_info,
426 ) -> nak_qmd_dispatch_size_layout {
427     if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
428         Qmd3_0::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
429     } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
430         Qmd2_2::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
431     } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
432         Qmd2_1::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
433     } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
434         Qmd0_6::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
435     } else {
436         panic!("Unsupported shader model");
437     }
438 }
439