1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate nvidia_headers;
5
6 use compiler::bindings::*;
7 use nak_bindings::*;
8 use nvidia_headers::classes::{cla0c0, clc0c0, clc3c0, clc6c0};
9
10 use bitview::*;
11 use paste::paste;
12
13 type QMDBitView<'a> = BitMutView<'a, [u32]>;
14
15 trait QMD {
16 const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout;
17
new() -> Self18 fn new() -> Self;
set_barrier_count(&mut self, barrier_count: u8)19 fn set_barrier_count(&mut self, barrier_count: u8);
set_cbuf(&mut self, idx: u8, addr: u64, size: u32)20 fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32);
set_global_size(&mut self, width: u32, height: u32, depth: u32)21 fn set_global_size(&mut self, width: u32, height: u32, depth: u32);
set_local_size(&mut self, width: u16, height: u16, depth: u16)22 fn set_local_size(&mut self, width: u16, height: u16, depth: u16);
set_prog_addr(&mut self, addr: u64)23 fn set_prog_addr(&mut self, addr: u64);
set_register_count(&mut self, register_count: u8)24 fn set_register_count(&mut self, register_count: u8);
set_crs_size(&mut self, crs_size: u32)25 fn set_crs_size(&mut self, crs_size: u32);
set_slm_size(&mut self, slm_size: u32)26 fn set_slm_size(&mut self, slm_size: u32);
set_smem_size(&mut self, smem_size: u32, smem_max: u32)27 fn set_smem_size(&mut self, smem_size: u32, smem_max: u32);
28 }
29
30 macro_rules! set_enum {
31 ($bv:expr, $cls:ident, $strct:ident, $field:ident, $enum:ident) => {
32 $bv.set_field(
33 paste! {$cls::[<$strct _ $field>]},
34 paste! {$cls::[<$strct _ $field _ $enum>]},
35 )
36 };
37 }
38
39 macro_rules! set_field {
40 ($bv:expr, $cls:ident, $strct:ident, $field:ident, $val:expr) => {
41 $bv.set_field(paste! {$cls::[<$strct _ $field>]}, $val)
42 };
43 }
44
45 macro_rules! set_array {
46 ($bv:expr, $cls:ident, $strct:ident, $f:ident, $i:expr, $x:expr) => {
47 $bv.set_field(paste! {$cls::[<$strct _ $f>]}($i), $x)
48 };
49 }
50
51 macro_rules! qmd_init {
52 ($bv: expr, $c:ident, $s:ident, $mjv:expr, $mnv:expr) => {
53 set_field!($bv, $c, $s, QMD_MAJOR_VERSION, $mjv);
54 set_field!($bv, $c, $s, QMD_VERSION, $mnv);
55
56 set_enum!($bv, $c, $s, API_VISIBLE_CALL_LIMIT, NO_CHECK);
57 set_enum!($bv, $c, $s, SAMPLER_INDEX, INDEPENDENTLY);
58 };
59 }
60
61 macro_rules! qmd_impl_common {
62 ($c:ident, $s:ident) => {
63 fn set_barrier_count(&mut self, barrier_count: u8) {
64 let mut bv = QMDBitView::new(&mut self.qmd);
65 set_field!(bv, $c, $s, BARRIER_COUNT, barrier_count);
66 }
67
68 const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout = {
69 let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]};
70 let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]};
71 let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]};
72 nak_qmd_dispatch_size_layout {
73 x_start: w.start as u16,
74 x_end: w.end as u16,
75 y_start: h.start as u16,
76 y_end: h.end as u16,
77 z_start: d.start as u16,
78 z_end: d.end as u16,
79 }
80 };
81
82 fn set_global_size(&mut self, width: u32, height: u32, depth: u32) {
83 let mut bv = QMDBitView::new(&mut self.qmd);
84 set_field!(bv, $c, $s, CTA_RASTER_WIDTH, width);
85 set_field!(bv, $c, $s, CTA_RASTER_HEIGHT, height);
86 set_field!(bv, $c, $s, CTA_RASTER_DEPTH, depth);
87 }
88
89 fn set_local_size(&mut self, width: u16, height: u16, depth: u16) {
90 let mut bv = QMDBitView::new(&mut self.qmd);
91 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION0, width);
92 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION1, height);
93 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION2, depth);
94 }
95
96 fn set_slm_size(&mut self, slm_size: u32) {
97 let mut bv = QMDBitView::new(&mut self.qmd);
98 let slm_size = slm_size.next_multiple_of(0x10);
99 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
100 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_LOW_SIZE, slm_size);
101 }
102 };
103 }
104
105 macro_rules! qmd_impl_set_crs_size {
106 ($c:ident, $s:ident) => {
107 fn set_crs_size(&mut self, crs_size: u32) {
108 let mut bv = QMDBitView::new(&mut self.qmd);
109 let crs_size = crs_size.next_multiple_of(0x200);
110 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_CRS_SIZE, crs_size);
111 }
112 };
113 }
114
115 const SIZE_SHIFT: u8 = 0;
116 const SIZE_SHIFTED4_SHIFT: u8 = 4;
117
118 macro_rules! qmd_impl_set_cbuf {
119 ($c:ident, $s:ident, $size_field:ident) => {
120 fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32) {
121 let mut bv = QMDBitView::new(&mut self.qmd);
122 let idx = idx.into();
123
124 let addr_lo = addr as u32;
125 let addr_hi = (addr >> 32) as u32;
126 set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_LOWER, idx, addr_lo);
127 set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_UPPER, idx, addr_hi);
128
129 paste! {
130 let shift = [<$size_field _SHIFT>];
131 let range = $c::[<$s _CONSTANT_BUFFER_ $size_field>](idx);
132 assert!(((size >> shift) << shift) == size);
133 bv.set_field(range, size >> shift);
134 }
135
136 set_array!(bv, $c, $s, CONSTANT_BUFFER_VALID, idx, true);
137 }
138 };
139 }
140
141 macro_rules! qmd_impl_set_prog_addr_32 {
142 ($c:ident, $s:ident) => {
143 fn set_prog_addr(&mut self, addr: u64) {
144 let mut bv = QMDBitView::new(&mut self.qmd);
145 set_field!(bv, $c, $s, PROGRAM_OFFSET, addr);
146 }
147 };
148 }
149
150 macro_rules! qmd_impl_set_prog_addr_64 {
151 ($c:ident, $s:ident) => {
152 fn set_prog_addr(&mut self, addr: u64) {
153 let mut bv = QMDBitView::new(&mut self.qmd);
154
155 let addr_lo = addr as u32;
156 let addr_hi = (addr >> 32) as u32;
157 set_field!(bv, $c, $s, PROGRAM_ADDRESS_LOWER, addr_lo);
158 set_field!(bv, $c, $s, PROGRAM_ADDRESS_UPPER, addr_hi);
159 }
160 };
161 }
162
163 macro_rules! qmd_impl_set_register_count {
164 ($c:ident, $s:ident, $field:ident) => {
165 fn set_register_count(&mut self, register_count: u8) {
166 let mut bv = QMDBitView::new(&mut self.qmd);
167 set_field!(bv, $c, $s, $field, register_count);
168 }
169 };
170 }
171
172 mod qmd_0_6 {
173 use crate::qmd::*;
174 use nvidia_headers::classes::cla0c0::qmd as cla0c0;
175
176 #[repr(transparent)]
177 pub struct Qmd0_6 {
178 qmd: [u32; 64],
179 }
180
181 impl QMD for Qmd0_6 {
new() -> Self182 fn new() -> Self {
183 let mut qmd = [0; 64];
184 let mut bv = QMDBitView::new(&mut qmd);
185 qmd_init!(bv, cla0c0, QMDV00_06, 0, 6);
186 set_field!(bv, cla0c0, QMDV00_06, SASS_VERSION, 0x30);
187 Self { qmd }
188 }
189
190 qmd_impl_common!(cla0c0, QMDV00_06);
191 qmd_impl_set_crs_size!(cla0c0, QMDV00_06);
192 qmd_impl_set_cbuf!(cla0c0, QMDV00_06, SIZE);
193 qmd_impl_set_prog_addr_32!(cla0c0, QMDV00_06);
194 qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT);
195
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)196 fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
197 let mut bv = QMDBitView::new(&mut self.qmd);
198
199 let smem_size = smem_size.next_multiple_of(0x100);
200 set_field!(bv, cla0c0, QMDV00_06, SHARED_MEMORY_SIZE, smem_size);
201
202 let l1_config = if smem_size <= (16 << 10) {
203 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB
204 } else if smem_size <= (32 << 10) {
205 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB
206 } else if smem_size <= (48 << 10) {
207 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB
208 } else {
209 panic!("Iclalid shared memory size");
210 };
211 set_field!(bv, cla0c0, QMDV00_06, L1_CONFIGURATION, l1_config);
212 }
213 }
214 }
215 use qmd_0_6::Qmd0_6;
216
217 mod qmd_2_1 {
218 use crate::qmd::*;
219 use nvidia_headers::classes::clc0c0::qmd as clc0c0;
220
221 #[repr(transparent)]
222 pub struct Qmd2_1 {
223 qmd: [u32; 64],
224 }
225
226 impl QMD for Qmd2_1 {
new() -> Self227 fn new() -> Self {
228 let mut qmd = [0; 64];
229 let mut bv = QMDBitView::new(&mut qmd);
230 qmd_init!(bv, clc0c0, QMDV02_01, 2, 1);
231 set_field!(bv, clc0c0, QMDV02_01, SM_GLOBAL_CACHING_ENABLE, true);
232 Self { qmd }
233 }
234
235 qmd_impl_common!(clc0c0, QMDV02_01);
236 qmd_impl_set_crs_size!(clc0c0, QMDV02_01);
237 qmd_impl_set_cbuf!(clc0c0, QMDV02_01, SIZE_SHIFTED4);
238 qmd_impl_set_prog_addr_32!(clc0c0, QMDV02_01);
239 qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT);
240
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)241 fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
242 let mut bv = QMDBitView::new(&mut self.qmd);
243
244 let smem_size = smem_size.next_multiple_of(0x100);
245 set_field!(bv, clc0c0, QMDV02_01, SHARED_MEMORY_SIZE, smem_size);
246 }
247 }
248 }
249 use qmd_2_1::Qmd2_1;
250
gv100_sm_config_smem_size(size: u32) -> u32251 fn gv100_sm_config_smem_size(size: u32) -> u32 {
252 let size = if size > 64 * 1024 {
253 96 * 1024
254 } else if size > 32 * 1024 {
255 64 * 1024
256 } else if size > 16 * 1024 {
257 32 * 1024
258 } else if size > 8 * 1024 {
259 16 * 1024
260 } else {
261 8 * 1024
262 };
263
264 size / 4096 + 1
265 }
266
267 macro_rules! qmd_impl_set_smem_size_bounded {
268 ($c:ident, $s:ident) => {
269 fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
270 let mut bv = QMDBitView::new(&mut self.qmd);
271
272 let smem_size = smem_size.next_multiple_of(0x100);
273 set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size);
274
275 let max = gv100_sm_config_smem_size(smem_max);
276 let min = gv100_sm_config_smem_size(smem_size.into());
277 let target = gv100_sm_config_smem_size(smem_size.into());
278 set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
279 set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
280 set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
281 }
282 };
283 }
284
285 mod qmd_2_2 {
286 use crate::qmd::*;
287 use nvidia_headers::classes::clc3c0::qmd as clc3c0;
288
289 #[repr(transparent)]
290 pub struct Qmd2_2 {
291 qmd: [u32; 64],
292 }
293
294 impl QMD for Qmd2_2 {
new() -> Self295 fn new() -> Self {
296 let mut qmd = [0; 64];
297 let mut bv = QMDBitView::new(&mut qmd);
298 qmd_init!(bv, clc3c0, QMDV02_02, 2, 2);
299 set_field!(bv, clc3c0, QMDV02_02, SM_GLOBAL_CACHING_ENABLE, true);
300 Self { qmd }
301 }
302
303 qmd_impl_common!(clc3c0, QMDV02_02);
304 qmd_impl_set_crs_size!(clc3c0, QMDV02_02);
305 qmd_impl_set_cbuf!(clc3c0, QMDV02_02, SIZE_SHIFTED4);
306 qmd_impl_set_prog_addr_64!(clc3c0, QMDV02_02);
307 qmd_impl_set_register_count!(clc3c0, QMDV02_02, REGISTER_COUNT_V);
308 qmd_impl_set_smem_size_bounded!(clc3c0, QMDV02_02);
309 }
310 }
311 use qmd_2_2::Qmd2_2;
312
313 mod qmd_3_0 {
314 use crate::qmd::*;
315 use nvidia_headers::classes::clc6c0::qmd as clc6c0;
316
317 #[repr(transparent)]
318 pub struct Qmd3_0 {
319 qmd: [u32; 64],
320 }
321
322 impl QMD for Qmd3_0 {
new() -> Self323 fn new() -> Self {
324 let mut qmd = [0; 64];
325 let mut bv = QMDBitView::new(&mut qmd);
326 qmd_init!(bv, clc6c0, QMDV03_00, 3, 0);
327 set_field!(bv, clc6c0, QMDV03_00, SM_GLOBAL_CACHING_ENABLE, true);
328 Self { qmd }
329 }
330
331 qmd_impl_common!(clc6c0, QMDV03_00);
332
set_crs_size(&mut self, crs_size: u32)333 fn set_crs_size(&mut self, crs_size: u32) {
334 assert!(crs_size == 0);
335 }
336
337 qmd_impl_set_cbuf!(clc6c0, QMDV03_00, SIZE_SHIFTED4);
338 qmd_impl_set_prog_addr_64!(clc6c0, QMDV03_00);
339 qmd_impl_set_register_count!(clc6c0, QMDV03_00, REGISTER_COUNT_V);
340 qmd_impl_set_smem_size_bounded!(clc6c0, QMDV03_00);
341 }
342 }
343 use qmd_3_0::Qmd3_0;
344
fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q345 fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
346 let cs_info = unsafe {
347 assert!(info.stage == MESA_SHADER_COMPUTE);
348 &info.__bindgen_anon_1.cs
349 };
350
351 let mut qmd = Q::new();
352
353 qmd.set_barrier_count(info.num_control_barriers);
354 qmd.set_global_size(
355 qmd_info.global_size[0],
356 qmd_info.global_size[1],
357 qmd_info.global_size[2],
358 );
359 qmd.set_local_size(
360 cs_info.local_size[0],
361 cs_info.local_size[1],
362 cs_info.local_size[2],
363 );
364 qmd.set_prog_addr(qmd_info.addr);
365 qmd.set_register_count(info.num_gprs);
366 qmd.set_crs_size(info.crs_size);
367 qmd.set_slm_size(info.slm_size);
368
369 assert!(qmd_info.smem_size >= cs_info.smem_size);
370 assert!(qmd_info.smem_size <= qmd_info.smem_max);
371 qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into());
372
373 for i in 0..qmd_info.num_cbufs {
374 let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()];
375 if cb.size > 0 {
376 qmd.set_cbuf(cb.index.try_into().unwrap(), cb.addr, cb.size);
377 }
378 }
379
380 qmd
381 }
382
383 #[no_mangle]
nak_fill_qmd( dev: *const nv_device_info, info: *const nak_shader_info, qmd_info: *const nak_qmd_info, qmd_out: *mut ::std::os::raw::c_void, qmd_size: usize, )384 pub extern "C" fn nak_fill_qmd(
385 dev: *const nv_device_info,
386 info: *const nak_shader_info,
387 qmd_info: *const nak_qmd_info,
388 qmd_out: *mut ::std::os::raw::c_void,
389 qmd_size: usize,
390 ) {
391 assert!(!dev.is_null());
392 let dev = unsafe { &*dev };
393
394 assert!(!info.is_null());
395 let info = unsafe { &*info };
396
397 assert!(!qmd_info.is_null());
398 let qmd_info = unsafe { &*qmd_info };
399
400 unsafe {
401 if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
402 let qmd_out = qmd_out as *mut Qmd3_0;
403 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
404 qmd_out.write(fill_qmd(info, qmd_info));
405 } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
406 let qmd_out = qmd_out as *mut Qmd2_2;
407 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
408 qmd_out.write(fill_qmd(info, qmd_info));
409 } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
410 let qmd_out = qmd_out as *mut Qmd2_1;
411 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
412 qmd_out.write(fill_qmd(info, qmd_info));
413 } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
414 let qmd_out = qmd_out as *mut Qmd0_6;
415 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
416 qmd_out.write(fill_qmd(info, qmd_info));
417 } else {
418 panic!("Unknown shader model");
419 }
420 }
421 }
422
423 #[no_mangle]
nak_get_qmd_dispatch_size_layout( dev: &nv_device_info, ) -> nak_qmd_dispatch_size_layout424 pub extern "C" fn nak_get_qmd_dispatch_size_layout(
425 dev: &nv_device_info,
426 ) -> nak_qmd_dispatch_size_layout {
427 if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
428 Qmd3_0::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
429 } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
430 Qmd2_2::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
431 } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
432 Qmd2_1::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
433 } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
434 Qmd0_6::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
435 } else {
436 panic!("Unsupported shader model");
437 }
438 }
439