1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::ir::*;
5 use crate::legalize::{
6 src_is_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers, LegalizeBuilder,
7 };
8 use bitview::*;
9
10 use std::collections::HashMap;
11 use std::ops::Range;
12
13 pub struct ShaderModel50 {
14 sm: u8,
15 }
16
17 impl ShaderModel50 {
new(sm: u8) -> Self18 pub fn new(sm: u8) -> Self {
19 assert!(sm >= 50 && sm < 70);
20 Self { sm }
21 }
22 }
23
24 impl ShaderModel for ShaderModel50 {
sm(&self) -> u825 fn sm(&self) -> u8 {
26 self.sm
27 }
28
num_regs(&self, file: RegFile) -> u3229 fn num_regs(&self, file: RegFile) -> u32 {
30 match file {
31 RegFile::GPR => 255,
32 RegFile::UGPR => 0,
33 RegFile::Pred => 7,
34 RegFile::UPred => 0,
35 RegFile::Carry => 1,
36 RegFile::Bar => 0,
37 RegFile::Mem => RegRef::MAX_IDX + 1,
38 }
39 }
40
crs_size(&self, max_crs_depth: u32) -> u3241 fn crs_size(&self, max_crs_depth: u32) -> u32 {
42 if max_crs_depth <= 16 {
43 0
44 } else if max_crs_depth <= 32 {
45 1024
46 } else {
47 ((max_crs_depth + 32) * 16).next_multiple_of(512)
48 }
49 }
50
op_can_be_uniform(&self, _op: &Op) -> bool51 fn op_can_be_uniform(&self, _op: &Op) -> bool {
52 false
53 }
54
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)55 fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op) {
56 as_sm50_op_mut(op).legalize(b);
57 }
58
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>59 fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32> {
60 encode_sm50_shader(self, s)
61 }
62 }
63
64 trait SM50Op {
legalize(&mut self, b: &mut LegalizeBuilder)65 fn legalize(&mut self, b: &mut LegalizeBuilder);
encode(&self, e: &mut SM50Encoder<'_>)66 fn encode(&self, e: &mut SM50Encoder<'_>);
67 }
68
69 struct SM50Encoder<'a> {
70 sm: &'a ShaderModel50,
71 ip: usize,
72 labels: &'a HashMap<Label, usize>,
73 inst: [u32; 2],
74 sched: u32,
75 }
76
77 impl BitViewable for SM50Encoder<'_> {
bits(&self) -> usize78 fn bits(&self) -> usize {
79 BitView::new(&self.inst).bits()
80 }
81
get_bit_range_u64(&self, range: Range<usize>) -> u6482 fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
83 BitView::new(&self.inst).get_bit_range_u64(range)
84 }
85 }
86
87 impl BitMutViewable for SM50Encoder<'_> {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)88 fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
89 BitMutView::new(&mut self.inst).set_bit_range_u64(range, val);
90 }
91 }
92
93 impl SetFieldU64 for SM50Encoder<'_> {
set_field_u64(&mut self, range: Range<usize>, val: u64)94 fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
95 BitMutView::new(&mut self.inst).set_field_u64(range, val);
96 }
97 }
98
99 impl SM50Encoder<'_> {
set_opcode(&mut self, opcode: u16)100 fn set_opcode(&mut self, opcode: u16) {
101 self.set_field(48..64, opcode);
102 }
103
set_pred_reg(&mut self, range: Range<usize>, reg: RegRef)104 fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) {
105 assert!(range.len() == 3);
106 assert!(reg.file() == RegFile::Pred);
107 assert!(reg.base_idx() <= 7);
108 assert!(reg.comps() == 1);
109 self.set_field(range, reg.base_idx());
110 }
111
set_pred(&mut self, pred: &Pred)112 fn set_pred(&mut self, pred: &Pred) {
113 assert!(!pred.is_false());
114 self.set_pred_reg(
115 16..19,
116 match pred.pred_ref {
117 PredRef::None => RegRef::zero(RegFile::Pred, 1),
118 PredRef::Reg(reg) => reg,
119 PredRef::SSA(_) => panic!("SSA values must be lowered"),
120 },
121 );
122 self.set_bit(19, pred.pred_inv);
123 }
124
set_instr_deps(&mut self, deps: &InstrDeps)125 fn set_instr_deps(&mut self, deps: &InstrDeps) {
126 let mut sched = BitMutView::new(&mut self.sched);
127
128 sched.set_field(0..4, deps.delay);
129 sched.set_bit(4, deps.yld);
130 sched.set_field(5..8, deps.wr_bar().unwrap_or(7));
131 sched.set_field(8..11, deps.rd_bar().unwrap_or(7));
132 sched.set_field(11..17, deps.wt_bar_mask);
133 sched.set_field(17..21, deps.reuse_mask);
134 }
135
set_reg(&mut self, range: Range<usize>, reg: RegRef)136 fn set_reg(&mut self, range: Range<usize>, reg: RegRef) {
137 assert!(range.len() == 8);
138 assert!(reg.file() == RegFile::GPR);
139 self.set_field(range, reg.base_idx());
140 }
141
set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef)142 fn set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef) {
143 match src_ref {
144 SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
145 SrcRef::Reg(reg) => self.set_reg(range, reg),
146 _ => panic!("Not a register"),
147 }
148 }
149
set_reg_src(&mut self, range: Range<usize>, src: Src)150 fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
151 assert!(src.src_mod.is_none());
152 self.set_reg_src_ref(range, src.src_ref);
153 }
154
set_reg_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )155 fn set_reg_fmod_src(
156 &mut self,
157 range: Range<usize>,
158 abs_bit: usize,
159 neg_bit: usize,
160 src: Src,
161 ) {
162 self.set_reg_src_ref(range, src.src_ref);
163 self.set_bit(abs_bit, src.src_mod.has_fabs());
164 self.set_bit(neg_bit, src.src_mod.has_fneg());
165 }
166
set_reg_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )167 fn set_reg_ineg_src(
168 &mut self,
169 range: Range<usize>,
170 neg_bit: usize,
171 src: Src,
172 ) {
173 self.set_reg_src_ref(range, src.src_ref);
174 self.set_bit(neg_bit, src.src_mod.is_ineg());
175 }
176
set_reg_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )177 fn set_reg_bnot_src(
178 &mut self,
179 range: Range<usize>,
180 not_bit: usize,
181 src: Src,
182 ) {
183 self.set_reg_src_ref(range, src.src_ref);
184 self.set_bit(not_bit, src.src_mod.is_bnot());
185 }
186
set_pred_dst(&mut self, range: Range<usize>, dst: Dst)187 fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
188 match dst {
189 Dst::None => {
190 self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1));
191 }
192 Dst::Reg(reg) => self.set_pred_reg(range, reg),
193 _ => panic!("Not a register"),
194 }
195 }
196
set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src)197 fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src) {
198 // The default for predicates is true
199 let true_reg = RegRef::new(RegFile::Pred, 7, 1);
200
201 let (not, reg) = match src.src_ref {
202 SrcRef::True => (false, true_reg),
203 SrcRef::False => (true, true_reg),
204 SrcRef::Reg(reg) => (false, reg),
205 _ => panic!("Not a register"),
206 };
207 self.set_pred_reg(range, reg);
208 self.set_bit(not_bit, not ^ src.src_mod.is_bnot());
209 }
210
set_dst(&mut self, dst: Dst)211 fn set_dst(&mut self, dst: Dst) {
212 let reg = match dst {
213 Dst::None => RegRef::zero(RegFile::GPR, 1),
214 Dst::Reg(reg) => reg,
215 _ => panic!("invalid dst {dst}"),
216 };
217 self.set_reg(0..8, reg);
218 }
219
set_src_imm32(&mut self, range: Range<usize>, u: u32)220 fn set_src_imm32(&mut self, range: Range<usize>, u: u32) {
221 assert!(range.len() == 32);
222 self.set_field(range, u);
223 }
224
set_src_imm_i20( &mut self, range: Range<usize>, sign_bit: usize, i: u32, )225 fn set_src_imm_i20(
226 &mut self,
227 range: Range<usize>,
228 sign_bit: usize,
229 i: u32,
230 ) {
231 assert!(range.len() == 19);
232 assert!((i & 0xfff80000) == 0 || (i & 0xfff80000) == 0xfff80000);
233
234 self.set_field(range, i & 0x7ffff);
235 self.set_field(sign_bit..sign_bit + 1, (i & 0x80000) >> 19);
236 }
237
set_src_imm_f20( &mut self, range: Range<usize>, sign_bit: usize, f: u32, )238 fn set_src_imm_f20(
239 &mut self,
240 range: Range<usize>,
241 sign_bit: usize,
242 f: u32,
243 ) {
244 assert!(range.len() == 19);
245 assert!((f & 0x00000fff) == 0);
246
247 self.set_field(range, (f >> 12) & 0x7ffff);
248 self.set_field(sign_bit..sign_bit + 1, f >> 31);
249 }
250
set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef)251 fn set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef) {
252 let mut v = BitMutView::new_subset(self, range);
253
254 assert!(cb.offset % 4 == 0);
255
256 v.set_field(0..14, cb.offset >> 2);
257 if let CBuf::Binding(idx) = cb.buf {
258 v.set_field(14..19, idx);
259 } else {
260 panic!("Must be a bound constant buffer");
261 }
262 }
263
set_cb_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )264 fn set_cb_fmod_src(
265 &mut self,
266 range: Range<usize>,
267 abs_bit: usize,
268 neg_bit: usize,
269 src: Src,
270 ) {
271 if let SrcRef::CBuf(cb) = &src.src_ref {
272 self.set_src_cb(range, cb);
273 } else {
274 panic!("Not a CBuf source");
275 }
276
277 self.set_bit(abs_bit, src.src_mod.has_fabs());
278 self.set_bit(neg_bit, src.src_mod.has_fneg());
279 }
280
set_cb_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )281 fn set_cb_ineg_src(
282 &mut self,
283 range: Range<usize>,
284 neg_bit: usize,
285 src: Src,
286 ) {
287 if let SrcRef::CBuf(cb) = &src.src_ref {
288 self.set_src_cb(range, cb);
289 } else {
290 panic!("Not a CBuf source");
291 }
292
293 self.set_bit(neg_bit, src.src_mod.is_ineg());
294 }
295
set_cb_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )296 fn set_cb_bnot_src(
297 &mut self,
298 range: Range<usize>,
299 not_bit: usize,
300 src: Src,
301 ) {
302 if let SrcRef::CBuf(cb) = &src.src_ref {
303 self.set_src_cb(range, cb);
304 } else {
305 panic!("Not a CBuf source");
306 }
307
308 self.set_bit(not_bit, src.src_mod.is_bnot());
309 }
310 }
311
312 //
313 // Legalization helpers
314 //
315
316 pub trait SM50LegalizeBuildHelpers: LegalizeBuildHelpers {
copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType)317 fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) {
318 if src.src_mod.has_fabs() {
319 self.copy_alu_src_and_lower_fmod(src, src_type);
320 }
321 }
322
copy_alu_src_if_i20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )323 fn copy_alu_src_if_i20_overflow(
324 &mut self,
325 src: &mut Src,
326 reg_file: RegFile,
327 src_type: SrcType,
328 ) {
329 if src.as_imm_not_i20().is_some() {
330 self.copy_alu_src(src, reg_file, src_type);
331 }
332 }
333
copy_alu_src_if_f20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )334 fn copy_alu_src_if_f20_overflow(
335 &mut self,
336 src: &mut Src,
337 reg_file: RegFile,
338 src_type: SrcType,
339 ) {
340 if src.as_imm_not_f20().is_some() {
341 self.copy_alu_src(src, reg_file, src_type);
342 }
343 }
344 }
345
346 impl SM50LegalizeBuildHelpers for LegalizeBuilder<'_> {}
347
348 /// Helper to legalize extended or external instructions
349 ///
350 /// These are instructions which reach out external units such as load/store
351 /// and texture ops. They typically can't take anything but GPRs and are the
352 /// only types of instructions that support vectors.
353 ///
legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder)354 fn legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder) {
355 let src_types = op.src_types();
356 for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() {
357 match src_types[i] {
358 SrcType::SSA => {
359 assert!(src.as_ssa().is_some());
360 }
361 SrcType::GPR => {
362 assert!(src_is_reg(src, RegFile::GPR));
363 }
364 SrcType::ALU
365 | SrcType::F16
366 | SrcType::F16v2
367 | SrcType::F32
368 | SrcType::F64
369 | SrcType::I32
370 | SrcType::B32 => {
371 panic!("ALU srcs must be legalized explicitly");
372 }
373 SrcType::Pred => {
374 panic!("Predicates must be legalized explicitly");
375 }
376 SrcType::Carry => {
377 panic!("Carry values must be legalized explicitly");
378 }
379 SrcType::Bar => panic!("Barrier regs are Volta+"),
380 }
381 }
382 }
383
384 //
385 // Implementations of SM50Op for each op we support on Maxwell/Pascal
386 //
387
388 impl SM50Encoder<'_> {
set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode)389 fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) {
390 assert!(range.len() == 2);
391 self.set_field(
392 range,
393 match rnd_mode {
394 FRndMode::NearestEven => 0_u8,
395 FRndMode::NegInf => 1_u8,
396 FRndMode::PosInf => 2_u8,
397 FRndMode::Zero => 3_u8,
398 },
399 );
400 }
401 }
402
403 impl SM50Op for OpFAdd {
legalize(&mut self, b: &mut LegalizeBuilder)404 fn legalize(&mut self, b: &mut LegalizeBuilder) {
405 use RegFile::GPR;
406 let [src0, src1] = &mut self.srcs;
407 swap_srcs_if_not_reg(src0, src1, GPR);
408 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
409 }
410
encode(&self, e: &mut SM50Encoder<'_>)411 fn encode(&self, e: &mut SM50Encoder<'_>) {
412 if let Some(imm32) = self.srcs[1].as_imm_not_f20() {
413 e.set_opcode(0x0800);
414 e.set_dst(self.dst);
415 e.set_reg_fmod_src(8..16, 54, 56, self.srcs[0]);
416 e.set_src_imm32(20..52, imm32);
417 e.set_bit(55, self.ftz);
418 } else {
419 match &self.srcs[1].src_ref {
420 SrcRef::Zero | SrcRef::Reg(_) => {
421 e.set_opcode(0x5c58);
422 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
423 }
424 SrcRef::Imm32(imm32) => {
425 e.set_opcode(0x3858);
426 e.set_src_imm_f20(20..39, 56, *imm32);
427 assert!(self.srcs[1].src_mod.is_none());
428 }
429 SrcRef::CBuf(_) => {
430 e.set_opcode(0x4c58);
431 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
432 }
433 src => panic!("Invalid fadd src1: {src}"),
434 }
435
436 e.set_dst(self.dst);
437 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
438
439 e.set_rnd_mode(39..41, self.rnd_mode);
440 e.set_bit(44, self.ftz);
441 e.set_bit(50, self.saturate);
442 }
443 }
444 }
445
446 impl SM50Op for OpFFma {
legalize(&mut self, b: &mut LegalizeBuilder)447 fn legalize(&mut self, b: &mut LegalizeBuilder) {
448 use RegFile::GPR;
449 let [src0, src1, src2] = &mut self.srcs;
450 b.copy_alu_src_if_fabs(src0, SrcType::F32);
451 b.copy_alu_src_if_fabs(src1, SrcType::F32);
452 b.copy_alu_src_if_fabs(src2, SrcType::F32);
453 swap_srcs_if_not_reg(src0, src1, GPR);
454 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
455 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
456 if src_is_reg(src1, GPR) {
457 b.copy_alu_src_if_imm(src2, GPR, SrcType::F32);
458 } else {
459 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32);
460 }
461 }
462
encode(&self, e: &mut SM50Encoder<'_>)463 fn encode(&self, e: &mut SM50Encoder<'_>) {
464 // ffma doesn't have any abs flags.
465 assert!(!self.srcs[0].src_mod.has_fabs());
466 assert!(!self.srcs[1].src_mod.has_fabs());
467 assert!(!self.srcs[2].src_mod.has_fabs());
468
469 // There is one fneg bit shared by the two fmul sources
470 let fneg_fmul =
471 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
472 let fneg_src2 = self.srcs[2].src_mod.has_fneg();
473
474 match &self.srcs[2].src_ref {
475 SrcRef::Zero | SrcRef::Reg(_) => {
476 match &self.srcs[1].src_ref {
477 SrcRef::Zero | SrcRef::Reg(_) => {
478 e.set_opcode(0x5980);
479 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
480 }
481 SrcRef::Imm32(imm32) => {
482 e.set_opcode(0x3280);
483
484 // Technically, ffma also supports a 32-bit immediate,
485 // but only in the case where the destination is the
486 // same as src2. We don't support that right now.
487 e.set_src_imm_f20(20..39, 56, *imm32);
488 }
489 SrcRef::CBuf(cb) => {
490 e.set_opcode(0x4980);
491 e.set_src_cb(20..39, cb);
492 }
493 src => panic!("Invalid ffma src1: {src}"),
494 }
495
496 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
497 }
498 SrcRef::CBuf(cb) => {
499 e.set_opcode(0x5180);
500 e.set_src_cb(20..39, cb);
501 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
502 }
503 src => panic!("Invalid ffma src2: {src}"),
504 }
505
506 e.set_dst(self.dst);
507 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
508
509 e.set_bit(48, fneg_fmul);
510 e.set_bit(49, fneg_src2);
511 e.set_bit(50, self.saturate);
512 e.set_rnd_mode(51..53, self.rnd_mode);
513
514 e.set_bit(53, self.ftz);
515 e.set_bit(54, self.dnz);
516 }
517 }
518
519 impl SM50Op for OpFMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)520 fn legalize(&mut self, b: &mut LegalizeBuilder) {
521 use RegFile::GPR;
522 let [src0, src1] = &mut self.srcs;
523 swap_srcs_if_not_reg(src0, src1, GPR);
524 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
525 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
526 }
527
encode(&self, e: &mut SM50Encoder<'_>)528 fn encode(&self, e: &mut SM50Encoder<'_>) {
529 match &self.srcs[1].src_ref {
530 SrcRef::Zero | SrcRef::Reg(_) => {
531 e.set_opcode(0x5c60);
532 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
533 }
534 SrcRef::Imm32(imm32) => {
535 e.set_opcode(0x3860);
536 e.set_src_imm_f20(20..39, 56, *imm32);
537 assert!(self.srcs[1].src_mod.is_none());
538 }
539 SrcRef::CBuf(_) => {
540 e.set_opcode(0x4c60);
541 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
542 }
543 src => panic!("Invalid fmnmx src2: {src}"),
544 }
545
546 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
547 e.set_dst(self.dst);
548 e.set_pred_src(39..42, 42, self.min);
549 e.set_bit(44, self.ftz);
550 }
551 }
552
553 impl SM50Op for OpFMul {
legalize(&mut self, b: &mut LegalizeBuilder)554 fn legalize(&mut self, b: &mut LegalizeBuilder) {
555 use RegFile::GPR;
556 let [src0, src1] = &mut self.srcs;
557 b.copy_alu_src_if_fabs(src0, SrcType::F32);
558 b.copy_alu_src_if_fabs(src1, SrcType::F32);
559 swap_srcs_if_not_reg(src0, src1, GPR);
560 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
561 }
562
encode(&self, e: &mut SM50Encoder<'_>)563 fn encode(&self, e: &mut SM50Encoder<'_>) {
564 // fmul doesn't have any abs flags.
565 assert!(!self.srcs[0].src_mod.has_fabs());
566 assert!(!self.srcs[1].src_mod.has_fabs());
567
568 // There is one fneg bit shared by both sources
569 let fneg =
570 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
571
572 if let Some(mut imm32) = self.srcs[1].as_imm_not_f20() {
573 e.set_opcode(0x1e00);
574
575 e.set_bit(53, self.ftz);
576 e.set_bit(54, self.dnz);
577 e.set_bit(55, self.saturate);
578
579 if fneg {
580 // Flip the immediate sign bit
581 imm32 ^= 0x80000000;
582 }
583 e.set_src_imm32(20..52, imm32);
584 } else {
585 match &self.srcs[1].src_ref {
586 SrcRef::Zero | SrcRef::Reg(_) => {
587 e.set_opcode(0x5c68);
588 e.set_reg_src(20..28, self.srcs[1]);
589 }
590 SrcRef::Imm32(imm32) => {
591 e.set_opcode(0x3868);
592 e.set_src_imm_f20(20..39, 56, *imm32);
593 }
594 SrcRef::CBuf(cbuf) => {
595 e.set_opcode(0x4c68);
596 e.set_src_cb(20..39, cbuf);
597 }
598 src => panic!("Invalid fmul src1: {src}"),
599 }
600
601 e.set_rnd_mode(39..41, self.rnd_mode);
602 e.set_field(41..44, 0x0_u8); // TODO: PDIV
603 e.set_bit(44, self.ftz);
604 e.set_bit(45, self.dnz);
605 e.set_bit(48, fneg);
606 e.set_bit(50, self.saturate);
607 }
608
609 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
610 e.set_dst(self.dst);
611 }
612 }
613
614 impl SM50Op for OpRro {
legalize(&mut self, b: &mut LegalizeBuilder)615 fn legalize(&mut self, b: &mut LegalizeBuilder) {
616 use RegFile::GPR;
617 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::F32);
618 }
619
encode(&self, e: &mut SM50Encoder<'_>)620 fn encode(&self, e: &mut SM50Encoder<'_>) {
621 match &self.src.src_ref {
622 SrcRef::Zero | SrcRef::Reg(_) => {
623 e.set_opcode(0x5c90);
624 e.set_reg_fmod_src(20..28, 49, 45, self.src);
625 }
626 SrcRef::Imm32(imm32) => {
627 e.set_opcode(0x3890);
628 e.set_src_imm_f20(20..39, 56, *imm32);
629 assert!(self.src.src_mod.is_none());
630 }
631 SrcRef::CBuf(_) => {
632 e.set_opcode(0x4c90);
633 e.set_cb_fmod_src(20..39, 49, 45, self.src);
634 }
635 src => panic!("Invalid rro src: {src}"),
636 }
637
638 e.set_dst(self.dst);
639 e.set_field(
640 39..40,
641 match self.op {
642 RroOp::SinCos => 0u8,
643 RroOp::Exp2 => 1u8,
644 },
645 );
646 }
647 }
648
649 impl SM50Op for OpMuFu {
legalize(&mut self, b: &mut LegalizeBuilder)650 fn legalize(&mut self, b: &mut LegalizeBuilder) {
651 b.copy_alu_src_if_not_reg(&mut self.src, RegFile::GPR, SrcType::GPR);
652 }
653
encode(&self, e: &mut SM50Encoder<'_>)654 fn encode(&self, e: &mut SM50Encoder<'_>) {
655 e.set_opcode(0x5080);
656
657 e.set_dst(self.dst);
658 e.set_reg_fmod_src(8..16, 46, 48, self.src);
659
660 e.set_field(
661 20..24,
662 match self.op {
663 MuFuOp::Cos => 0_u8,
664 MuFuOp::Sin => 1_u8,
665 MuFuOp::Exp2 => 2_u8,
666 MuFuOp::Log2 => 3_u8,
667 MuFuOp::Rcp => 4_u8,
668 MuFuOp::Rsq => 5_u8,
669 MuFuOp::Rcp64H => 6_u8,
670 MuFuOp::Rsq64H => 7_u8,
671 // SQRT is only on SM52 and later
672 MuFuOp::Sqrt if e.sm.sm >= 52 => 8_u8,
673 MuFuOp::Sqrt => panic!("MUFU.SQRT not supported on SM50"),
674 MuFuOp::Tanh => panic!("MUFU.TANH not supported on SM50"),
675 },
676 );
677 }
678 }
679
680 impl SM50Encoder<'_> {
set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp)681 fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) {
682 assert!(range.len() == 4);
683 self.set_field(
684 range,
685 match op {
686 FloatCmpOp::OrdLt => 0x01_u8,
687 FloatCmpOp::OrdEq => 0x02_u8,
688 FloatCmpOp::OrdLe => 0x03_u8,
689 FloatCmpOp::OrdGt => 0x04_u8,
690 FloatCmpOp::OrdNe => 0x05_u8,
691 FloatCmpOp::OrdGe => 0x06_u8,
692 FloatCmpOp::UnordLt => 0x09_u8,
693 FloatCmpOp::UnordEq => 0x0a_u8,
694 FloatCmpOp::UnordLe => 0x0b_u8,
695 FloatCmpOp::UnordGt => 0x0c_u8,
696 FloatCmpOp::UnordNe => 0x0d_u8,
697 FloatCmpOp::UnordGe => 0x0e_u8,
698 FloatCmpOp::IsNum => 0x07_u8,
699 FloatCmpOp::IsNan => 0x08_u8,
700 },
701 );
702 }
703
set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp)704 fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) {
705 assert!(range.len() == 2);
706 self.set_field(
707 range,
708 match op {
709 PredSetOp::And => 0_u8,
710 PredSetOp::Or => 1_u8,
711 PredSetOp::Xor => 2_u8,
712 },
713 );
714 }
715
set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp)716 fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) {
717 assert!(range.len() == 3);
718 self.set_field(
719 range,
720 match op {
721 IntCmpOp::Eq => 2_u8,
722 IntCmpOp::Ne => 5_u8,
723 IntCmpOp::Lt => 1_u8,
724 IntCmpOp::Le => 3_u8,
725 IntCmpOp::Gt => 4_u8,
726 IntCmpOp::Ge => 6_u8,
727 },
728 );
729 }
730 }
731
732 impl SM50Op for OpFSet {
legalize(&mut self, b: &mut LegalizeBuilder)733 fn legalize(&mut self, b: &mut LegalizeBuilder) {
734 use RegFile::GPR;
735 let [src0, src1] = &mut self.srcs;
736 if swap_srcs_if_not_reg(src0, src1, GPR) {
737 self.cmp_op = self.cmp_op.flip();
738 }
739 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
740 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
741 }
742
encode(&self, e: &mut SM50Encoder<'_>)743 fn encode(&self, e: &mut SM50Encoder<'_>) {
744 match &self.srcs[1].src_ref {
745 SrcRef::Zero | SrcRef::Reg(_) => {
746 e.set_opcode(0x5800);
747 e.set_reg_fmod_src(20..28, 44, 53, self.srcs[1]);
748 }
749 SrcRef::Imm32(imm32) => {
750 e.set_opcode(0x3000);
751 e.set_src_imm_f20(20..39, 56, *imm32);
752 assert!(self.srcs[1].src_mod.is_none());
753 }
754 SrcRef::CBuf(_) => {
755 e.set_opcode(0x4800);
756 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
757 }
758 src => panic!("Invalid fset src1: {src}"),
759 }
760
761 e.set_reg_fmod_src(8..16, 54, 43, self.srcs[0]);
762 e.set_pred_src(39..42, 42, SrcRef::True.into());
763 e.set_float_cmp_op(48..52, self.cmp_op);
764 e.set_bit(52, true); // bool float
765 e.set_bit(55, self.ftz);
766 e.set_dst(self.dst);
767 }
768 }
769
770 impl SM50Op for OpFSetP {
legalize(&mut self, b: &mut LegalizeBuilder)771 fn legalize(&mut self, b: &mut LegalizeBuilder) {
772 use RegFile::GPR;
773 let [src0, src1] = &mut self.srcs;
774 if swap_srcs_if_not_reg(src0, src1, GPR) {
775 self.cmp_op = self.cmp_op.flip();
776 }
777 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
778 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
779 }
780
encode(&self, e: &mut SM50Encoder<'_>)781 fn encode(&self, e: &mut SM50Encoder<'_>) {
782 match &self.srcs[1].src_ref {
783 SrcRef::Zero | SrcRef::Reg(_) => {
784 e.set_opcode(0x5bb0);
785 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
786 }
787 SrcRef::Imm32(imm32) => {
788 e.set_opcode(0x36b0);
789 e.set_src_imm_f20(20..39, 56, *imm32);
790 assert!(self.srcs[1].src_mod.is_none());
791 }
792 SrcRef::CBuf(_) => {
793 e.set_opcode(0x4bb0);
794 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
795 }
796 src => panic!("Invalid fsetp src1: {src}"),
797 }
798
799 e.set_pred_dst(3..6, self.dst);
800 e.set_pred_dst(0..3, Dst::None); // dst1
801 e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
802 e.set_pred_src(39..42, 42, self.accum);
803 e.set_pred_set_op(45..47, self.set_op);
804 e.set_bit(47, self.ftz);
805 e.set_float_cmp_op(48..52, self.cmp_op);
806 }
807 }
808
809 impl SM50Op for OpFSwzAdd {
legalize(&mut self, b: &mut LegalizeBuilder)810 fn legalize(&mut self, b: &mut LegalizeBuilder) {
811 use RegFile::GPR;
812 b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
813 b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
814 }
815
encode(&self, e: &mut SM50Encoder<'_>)816 fn encode(&self, e: &mut SM50Encoder<'_>) {
817 e.set_opcode(0x50f8);
818
819 e.set_dst(self.dst);
820 e.set_reg_src(8..16, self.srcs[0]);
821 e.set_reg_src(20..28, self.srcs[1]);
822
823 e.set_field(
824 39..41,
825 match self.rnd_mode {
826 FRndMode::NearestEven => 0u8,
827 FRndMode::NegInf => 1u8,
828 FRndMode::PosInf => 2u8,
829 FRndMode::Zero => 3u8,
830 },
831 );
832
833 for (i, op) in self.ops.iter().enumerate() {
834 e.set_field(
835 28 + i * 2..28 + (i + 1) * 2,
836 match op {
837 FSwzAddOp::Add => 0u8,
838 FSwzAddOp::SubLeft => 1u8,
839 FSwzAddOp::SubRight => 2u8,
840 FSwzAddOp::MoveLeft => 3u8,
841 },
842 );
843 }
844
845 e.set_bit(38, false); /* .NDV */
846 e.set_bit(44, self.ftz);
847 e.set_bit(47, false); /* dst.CC */
848 }
849 }
850
851 impl SM50Op for OpDAdd {
legalize(&mut self, b: &mut LegalizeBuilder)852 fn legalize(&mut self, b: &mut LegalizeBuilder) {
853 use RegFile::GPR;
854 let [src0, src1] = &mut self.srcs;
855 swap_srcs_if_not_reg(src0, src1, GPR);
856 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
857 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
858 }
859
encode(&self, e: &mut SM50Encoder<'_>)860 fn encode(&self, e: &mut SM50Encoder<'_>) {
861 match &self.srcs[1].src_ref {
862 SrcRef::Zero | SrcRef::Reg(_) => {
863 e.set_opcode(0x5c70);
864 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
865 }
866 SrcRef::Imm32(imm32) => {
867 e.set_opcode(0x3870);
868 e.set_src_imm_f20(20..39, 56, *imm32);
869 assert!(self.srcs[1].src_mod.is_none());
870 }
871 SrcRef::CBuf(_) => {
872 e.set_opcode(0x4c70);
873 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
874 }
875 src => panic!("Invalid dadd src1: {src}"),
876 }
877
878 e.set_dst(self.dst);
879 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
880 e.set_rnd_mode(39..41, self.rnd_mode);
881 }
882 }
883
884 impl SM50Op for OpDFma {
legalize(&mut self, b: &mut LegalizeBuilder)885 fn legalize(&mut self, b: &mut LegalizeBuilder) {
886 use RegFile::GPR;
887 let [src0, src1, src2] = &mut self.srcs;
888 b.copy_alu_src_if_fabs(src0, SrcType::F64);
889 b.copy_alu_src_if_fabs(src1, SrcType::F64);
890 b.copy_alu_src_if_fabs(src2, SrcType::F64);
891 swap_srcs_if_not_reg(src0, src1, GPR);
892 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
893 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
894 if src_is_reg(src1, GPR) {
895 b.copy_alu_src_if_imm(src2, GPR, SrcType::F64);
896 } else {
897 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64);
898 }
899 }
900
encode(&self, e: &mut SM50Encoder<'_>)901 fn encode(&self, e: &mut SM50Encoder<'_>) {
902 // dfma doesn't have any abs flags.
903 assert!(!self.srcs[0].src_mod.has_fabs());
904 assert!(!self.srcs[1].src_mod.has_fabs());
905 assert!(!self.srcs[2].src_mod.has_fabs());
906
907 // There is one fneg bit shared by the two fmul sources
908 let fneg_fmul =
909 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
910 let fneg_src2 = self.srcs[2].src_mod.has_fneg();
911
912 match &self.srcs[2].src_ref {
913 SrcRef::Zero | SrcRef::Reg(_) => {
914 match &self.srcs[1].src_ref {
915 SrcRef::Zero | SrcRef::Reg(_) => {
916 e.set_opcode(0x5b70);
917 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
918 }
919 SrcRef::Imm32(imm32) => {
920 e.set_opcode(0x3670);
921 e.set_src_imm_f20(20..39, 56, *imm32);
922 }
923 SrcRef::CBuf(cb) => {
924 e.set_opcode(0x4b70);
925 e.set_src_cb(20..39, cb);
926 }
927 src => panic!("Invalid dfma src1: {src}"),
928 }
929
930 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
931 }
932 SrcRef::CBuf(cb) => {
933 e.set_opcode(0x5370);
934 e.set_src_cb(20..39, cb);
935 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
936 }
937 src => panic!("Invalid dfma src2: {src}"),
938 }
939
940 e.set_dst(self.dst);
941 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
942
943 e.set_bit(48, fneg_fmul);
944 e.set_bit(49, fneg_src2);
945
946 e.set_rnd_mode(50..52, self.rnd_mode);
947 }
948 }
949
950 impl SM50Op for OpDMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)951 fn legalize(&mut self, b: &mut LegalizeBuilder) {
952 use RegFile::GPR;
953 let [src0, src1] = &mut self.srcs;
954 swap_srcs_if_not_reg(src0, src1, GPR);
955 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
956 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
957 }
958
encode(&self, e: &mut SM50Encoder<'_>)959 fn encode(&self, e: &mut SM50Encoder<'_>) {
960 match &self.srcs[1].src_ref {
961 SrcRef::Zero | SrcRef::Reg(_) => {
962 e.set_opcode(0x5c50);
963 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
964 }
965 SrcRef::Imm32(imm32) => {
966 e.set_opcode(0x3850);
967 e.set_src_imm_f20(20..39, 56, *imm32);
968 assert!(self.srcs[1].src_mod.is_none());
969 }
970 SrcRef::CBuf(_) => {
971 e.set_opcode(0x4c50);
972 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
973 }
974 src => panic!("Invalid dmnmx src1: {src}"),
975 }
976
977 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
978 e.set_dst(self.dst);
979 e.set_pred_src(39..42, 42, self.min);
980 }
981 }
982
983 impl SM50Op for OpDMul {
legalize(&mut self, b: &mut LegalizeBuilder)984 fn legalize(&mut self, b: &mut LegalizeBuilder) {
985 use RegFile::GPR;
986 let [src0, src1] = &mut self.srcs;
987 b.copy_alu_src_if_fabs(src0, SrcType::F64);
988 b.copy_alu_src_if_fabs(src1, SrcType::F64);
989 swap_srcs_if_not_reg(src0, src1, GPR);
990 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
991 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
992 }
993
encode(&self, e: &mut SM50Encoder<'_>)994 fn encode(&self, e: &mut SM50Encoder<'_>) {
995 assert!(!self.srcs[0].src_mod.has_fabs());
996 assert!(!self.srcs[1].src_mod.has_fabs());
997
998 // There is one fneg bit shared by both sources
999 let fneg =
1000 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
1001
1002 match &self.srcs[1].src_ref {
1003 SrcRef::Zero | SrcRef::Reg(_) => {
1004 e.set_opcode(0x5c80);
1005 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1006 }
1007 SrcRef::Imm32(imm32) => {
1008 e.set_opcode(0x3880);
1009 e.set_src_imm_f20(20..39, 56, *imm32);
1010 }
1011 SrcRef::CBuf(cb) => {
1012 e.set_opcode(0x4c80);
1013 e.set_src_cb(20..39, cb);
1014 }
1015 src => panic!("Invalid dmul src1: {src}"),
1016 }
1017
1018 e.set_dst(self.dst);
1019 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
1020
1021 e.set_rnd_mode(39..41, self.rnd_mode);
1022 e.set_bit(48, fneg);
1023 }
1024 }
1025
1026 impl SM50Op for OpDSetP {
legalize(&mut self, b: &mut LegalizeBuilder)1027 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1028 use RegFile::GPR;
1029 let [src0, src1] = &mut self.srcs;
1030 if swap_srcs_if_not_reg(src0, src1, GPR) {
1031 self.cmp_op = self.cmp_op.flip();
1032 }
1033 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
1034 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
1035 }
1036
encode(&self, e: &mut SM50Encoder<'_>)1037 fn encode(&self, e: &mut SM50Encoder<'_>) {
1038 match &self.srcs[1].src_ref {
1039 SrcRef::Zero | SrcRef::Reg(_) => {
1040 e.set_opcode(0x5b80);
1041 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
1042 }
1043 SrcRef::Imm32(imm32) => {
1044 e.set_opcode(0x3680);
1045 e.set_src_imm_f20(20..39, 56, *imm32);
1046 assert!(self.srcs[1].src_mod.is_none());
1047 }
1048 SrcRef::CBuf(_) => {
1049 e.set_opcode(0x4b80);
1050 e.set_reg_fmod_src(20..39, 44, 6, self.srcs[1]);
1051 }
1052 src => panic!("Invalid dsetp src1: {src}"),
1053 }
1054
1055 e.set_pred_dst(3..6, self.dst);
1056 e.set_pred_dst(0..3, Dst::None); // dst1
1057 e.set_pred_src(39..42, 42, self.accum);
1058 e.set_pred_set_op(45..47, self.set_op);
1059 e.set_float_cmp_op(48..52, self.cmp_op);
1060 e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
1061 }
1062 }
1063
1064 impl SM50Op for OpBfe {
legalize(&mut self, b: &mut LegalizeBuilder)1065 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1066 use RegFile::GPR;
1067 b.copy_alu_src_if_not_reg(&mut self.base, GPR, SrcType::ALU);
1068 }
1069
encode(&self, e: &mut SM50Encoder<'_>)1070 fn encode(&self, e: &mut SM50Encoder<'_>) {
1071 match &self.range.src_ref {
1072 SrcRef::Zero | SrcRef::Reg(_) => {
1073 e.set_opcode(0x5c00);
1074 e.set_reg_src(20..28, self.range);
1075 }
1076 SrcRef::Imm32(imm32) => {
1077 e.set_opcode(0x3800);
1078 // Only the bottom 16 bits of the immediate matter
1079 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1080 }
1081 SrcRef::CBuf(cbuf) => {
1082 e.set_opcode(0x4c00);
1083 e.set_src_cb(20..39, cbuf);
1084 }
1085 src => panic!("Invalid bfe range: {src}"),
1086 }
1087
1088 if self.signed {
1089 e.set_bit(48, true);
1090 }
1091
1092 if self.reverse {
1093 e.set_bit(40, true);
1094 }
1095
1096 e.set_reg_src(8..16, self.base);
1097 e.set_dst(self.dst);
1098 }
1099 }
1100
1101 impl SM50Op for OpFlo {
legalize(&mut self, b: &mut LegalizeBuilder)1102 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1103 use RegFile::GPR;
1104 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1105 }
1106
encode(&self, e: &mut SM50Encoder<'_>)1107 fn encode(&self, e: &mut SM50Encoder<'_>) {
1108 match &self.src.src_ref {
1109 SrcRef::Zero | SrcRef::Reg(_) => {
1110 e.set_opcode(0x5c30);
1111 e.set_reg_src_ref(20..28, self.src.src_ref);
1112 }
1113 SrcRef::Imm32(imm32) => {
1114 e.set_opcode(0x3830);
1115 e.set_src_imm_i20(20..39, 56, *imm32);
1116 assert!(self.src.src_mod.is_none());
1117 }
1118 SrcRef::CBuf(cb) => {
1119 e.set_opcode(0x4c30);
1120 e.set_src_cb(20..39, cb);
1121 }
1122 src => panic!("Invalid flo src: {src}"),
1123 }
1124
1125 e.set_dst(self.dst);
1126 e.set_bit(40, self.src.src_mod.is_bnot());
1127 e.set_bit(48, self.signed);
1128 e.set_bit(41, self.return_shift_amount);
1129 e.set_bit(47, false); /* dst.CC */
1130 }
1131 }
1132
1133 impl SM50Op for OpIAdd2 {
legalize(&mut self, b: &mut LegalizeBuilder)1134 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1135 use RegFile::GPR;
1136 let [src0, src1] = &mut self.srcs;
1137 swap_srcs_if_not_reg(src0, src1, GPR);
1138 if src0.src_mod.is_ineg() && src1.src_mod.is_ineg() {
1139 assert!(self.carry_out.is_none());
1140 let val = b.alloc_ssa(GPR, 1);
1141 b.push_op(OpIAdd2 {
1142 dst: val.into(),
1143 carry_out: Dst::None,
1144 srcs: [Src::new_zero(), *src0],
1145 });
1146 *src0 = val.into();
1147 }
1148 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1149 if !self.carry_out.is_none() {
1150 b.copy_alu_src_if_ineg_imm(src1, GPR, SrcType::I32);
1151 }
1152 }
1153
encode(&self, e: &mut SM50Encoder<'_>)1154 fn encode(&self, e: &mut SM50Encoder<'_>) {
1155 // Hardware requires at least one of these be unmodified. Otherwise, it
1156 // encodes as iadd.po which isn't what we want.
1157 assert!(
1158 self.srcs[0].src_mod.is_none() || self.srcs[1].src_mod.is_none()
1159 );
1160
1161 let carry_out = match self.carry_out {
1162 Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1163 Dst::None => false,
1164 dst => panic!("Invalid iadd carry_out: {dst}"),
1165 };
1166
1167 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1168 e.set_opcode(0x1c00);
1169
1170 e.set_dst(self.dst);
1171 e.set_reg_ineg_src(8..16, 56, self.srcs[0]);
1172 e.set_src_imm32(20..52, imm32);
1173
1174 e.set_bit(52, carry_out);
1175 e.set_bit(53, false); // .X
1176 } else {
1177 match &self.srcs[1].src_ref {
1178 SrcRef::Zero | SrcRef::Reg(_) => {
1179 e.set_opcode(0x5c10);
1180 e.set_reg_ineg_src(20..28, 48, self.srcs[1]);
1181 }
1182 SrcRef::Imm32(imm32) => {
1183 e.set_opcode(0x3810);
1184 e.set_src_imm_i20(20..39, 56, *imm32);
1185 assert!(self.srcs[1].src_mod.is_none());
1186 }
1187 SrcRef::CBuf(_) => {
1188 e.set_opcode(0x4c10);
1189 e.set_cb_ineg_src(20..39, 48, self.srcs[1]);
1190 }
1191 src => panic!("Invalid iadd src1: {src}"),
1192 }
1193
1194 e.set_dst(self.dst);
1195 e.set_reg_ineg_src(8..16, 49, self.srcs[0]);
1196
1197 e.set_bit(43, false); // .X
1198 e.set_bit(47, carry_out);
1199 }
1200 }
1201 }
1202
1203 impl SM50Op for OpIAdd2X {
legalize(&mut self, b: &mut LegalizeBuilder)1204 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1205 use RegFile::GPR;
1206 let [src0, src1] = &mut self.srcs;
1207 swap_srcs_if_not_reg(src0, src1, GPR);
1208 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1209 }
1210
encode(&self, e: &mut SM50Encoder<'_>)1211 fn encode(&self, e: &mut SM50Encoder<'_>) {
1212 match self.carry_in.src_ref {
1213 SrcRef::Reg(reg) if reg.file() == RegFile::Carry => (),
1214 src => panic!("Invalid iadd.x carry_in: {src}"),
1215 }
1216
1217 let carry_out = match self.carry_out {
1218 Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1219 Dst::None => false,
1220 dst => panic!("Invalid iadd.x carry_out: {dst}"),
1221 };
1222
1223 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1224 e.set_opcode(0x1c00);
1225
1226 e.set_dst(self.dst);
1227 e.set_reg_bnot_src(8..16, 56, self.srcs[0]);
1228 e.set_src_imm32(20..52, imm32);
1229
1230 e.set_bit(52, carry_out);
1231 e.set_bit(53, true); // .X
1232 } else {
1233 match &self.srcs[1].src_ref {
1234 SrcRef::Zero | SrcRef::Reg(_) => {
1235 e.set_opcode(0x5c10);
1236 e.set_reg_bnot_src(20..28, 48, self.srcs[1]);
1237 }
1238 SrcRef::Imm32(imm32) => {
1239 e.set_opcode(0x3810);
1240 e.set_src_imm_i20(20..39, 56, *imm32);
1241 assert!(self.srcs[1].src_mod.is_none());
1242 }
1243 SrcRef::CBuf(_) => {
1244 e.set_opcode(0x4c10);
1245 e.set_cb_bnot_src(20..39, 48, self.srcs[1]);
1246 }
1247 src => panic!("Invalid iadd.x src1: {src}"),
1248 }
1249
1250 e.set_dst(self.dst);
1251 e.set_reg_bnot_src(8..16, 49, self.srcs[0]);
1252
1253 e.set_bit(43, true); // .X
1254 e.set_bit(47, carry_out);
1255 }
1256 }
1257 }
1258
1259 impl SM50Op for OpIMad {
legalize(&mut self, b: &mut LegalizeBuilder)1260 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1261 use RegFile::GPR;
1262 let [src0, src1, src2] = &mut self.srcs;
1263 swap_srcs_if_not_reg(src0, src1, GPR);
1264 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1265 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1266 if src_is_reg(src1, GPR) {
1267 b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU);
1268 } else {
1269 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU);
1270 }
1271 }
1272
encode(&self, e: &mut SM50Encoder<'_>)1273 fn encode(&self, e: &mut SM50Encoder<'_>) {
1274 // There is one ineg bit shared by the two imul sources
1275 let ineg_imul =
1276 self.srcs[0].src_mod.is_ineg() ^ self.srcs[1].src_mod.is_ineg();
1277 let ineg_src2 = self.srcs[2].src_mod.is_ineg();
1278
1279 match &self.srcs[2].src_ref {
1280 SrcRef::Zero | SrcRef::Reg(_) => {
1281 match &self.srcs[1].src_ref {
1282 SrcRef::Zero | SrcRef::Reg(_) => {
1283 e.set_opcode(0x5a00);
1284 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1285 }
1286 SrcRef::Imm32(imm32) => {
1287 e.set_opcode(0x3400);
1288 e.set_src_imm_i20(20..39, 56, *imm32);
1289 }
1290 SrcRef::CBuf(cb) => {
1291 e.set_opcode(0x4a00);
1292 e.set_src_cb(20..39, cb);
1293 }
1294 src => panic!("Invalid imad src1: {src}"),
1295 }
1296
1297 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
1298 }
1299 SrcRef::CBuf(cb) => {
1300 e.set_opcode(0x5200);
1301 e.set_src_cb(20..39, cb);
1302 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
1303 }
1304 src => panic!("Invalid imad src2: {src}"),
1305 }
1306
1307 e.set_dst(self.dst);
1308 e.set_reg_src(8..16, self.srcs[0]);
1309
1310 e.set_bit(48, self.signed); // src0 signed
1311 e.set_bit(51, ineg_imul);
1312 e.set_bit(52, ineg_src2);
1313 e.set_bit(53, self.signed); // src1 signed
1314 }
1315 }
1316
1317 impl SM50Op for OpIMul {
legalize(&mut self, b: &mut LegalizeBuilder)1318 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1319 use RegFile::GPR;
1320 let [src0, src1] = &mut self.srcs;
1321 if swap_srcs_if_not_reg(src0, src1, GPR) {
1322 self.signed.swap(0, 1);
1323 }
1324 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1325 }
1326
encode(&self, e: &mut SM50Encoder<'_>)1327 fn encode(&self, e: &mut SM50Encoder<'_>) {
1328 assert!(self.srcs[0].src_mod.is_none());
1329 assert!(self.srcs[1].src_mod.is_none());
1330
1331 if let Some(i) = self.srcs[1].as_imm_not_i20() {
1332 e.set_opcode(0x1fc0);
1333 e.set_src_imm32(20..52, i);
1334
1335 e.set_bit(53, self.high);
1336 e.set_bit(54, self.signed[0]);
1337 e.set_bit(55, self.signed[1]);
1338 } else {
1339 match &self.srcs[1].src_ref {
1340 SrcRef::Zero | SrcRef::Reg(_) => {
1341 e.set_opcode(0x5c38);
1342 e.set_reg_src(20..28, self.srcs[1]);
1343 }
1344 SrcRef::Imm32(imm32) => {
1345 e.set_opcode(0x3838);
1346 e.set_src_imm_i20(20..39, 56, *imm32);
1347 }
1348 SrcRef::CBuf(cb) => {
1349 e.set_opcode(0x4c38);
1350 e.set_src_cb(20..39, cb);
1351 }
1352 src => panic!("Invalid imul src1: {src}"),
1353 };
1354
1355 e.set_bit(39, self.high);
1356 e.set_bit(40, self.signed[0]);
1357 e.set_bit(41, self.signed[1]);
1358 }
1359
1360 e.set_dst(self.dst);
1361 e.set_reg_src(8..16, self.srcs[0]);
1362 }
1363 }
1364
1365 impl SM50Op for OpIMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)1366 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1367 use RegFile::GPR;
1368 let [src0, src1] = &mut self.srcs;
1369 swap_srcs_if_not_reg(src0, src1, GPR);
1370 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1371 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1372 }
1373
encode(&self, e: &mut SM50Encoder<'_>)1374 fn encode(&self, e: &mut SM50Encoder<'_>) {
1375 match &self.srcs[1].src_ref {
1376 SrcRef::Zero | SrcRef::Reg(_) => {
1377 e.set_opcode(0x5c20);
1378 e.set_reg_src(20..28, self.srcs[1]);
1379 }
1380 SrcRef::Imm32(imm32) => {
1381 e.set_opcode(0x3820);
1382 e.set_src_imm_i20(20..39, 56, *imm32);
1383 assert!(self.srcs[1].src_mod.is_none());
1384 }
1385 SrcRef::CBuf(cb) => {
1386 e.set_opcode(0x4c20);
1387 e.set_src_cb(20..39, cb);
1388 }
1389 src => panic!("Invalid imnmx src1: {src}"),
1390 }
1391
1392 e.set_dst(self.dst);
1393 e.set_reg_src(8..16, self.srcs[0]);
1394 e.set_pred_src(39..42, 42, self.min);
1395 e.set_bit(47, false); // .CC
1396 e.set_bit(
1397 48,
1398 match self.cmp_type {
1399 IntCmpType::U32 => false,
1400 IntCmpType::I32 => true,
1401 },
1402 );
1403 }
1404 }
1405
1406 impl SM50Op for OpISetP {
legalize(&mut self, b: &mut LegalizeBuilder)1407 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1408 use RegFile::GPR;
1409 let [src0, src1] = &mut self.srcs;
1410 if swap_srcs_if_not_reg(src0, src1, GPR) {
1411 self.cmp_op = self.cmp_op.flip();
1412 }
1413 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1414 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1415 }
1416
encode(&self, e: &mut SM50Encoder<'_>)1417 fn encode(&self, e: &mut SM50Encoder<'_>) {
1418 match &self.srcs[1].src_ref {
1419 SrcRef::Zero | SrcRef::Reg(_) => {
1420 e.set_opcode(0x5b60);
1421 e.set_reg_src(20..28, self.srcs[1]);
1422 }
1423 SrcRef::Imm32(imm32) => {
1424 e.set_opcode(0x3660);
1425 e.set_src_imm_i20(20..39, 56, *imm32);
1426 assert!(self.srcs[1].src_mod.is_none());
1427 }
1428 SrcRef::CBuf(cb) => {
1429 e.set_opcode(0x4b60);
1430 e.set_src_cb(20..39, cb);
1431 }
1432 src => panic!("Invalid isetp src1: {src}"),
1433 }
1434
1435 e.set_pred_dst(0..3, Dst::None); // dst1
1436 e.set_pred_dst(3..6, self.dst);
1437 e.set_reg_src(8..16, self.srcs[0]);
1438 e.set_pred_src(39..42, 42, self.accum);
1439
1440 // isetp.x seems to take the accumulator into account and we don't fully
1441 // understand how. Until we do, disallow it.
1442 assert!(!self.ex);
1443 e.set_bit(43, self.ex);
1444 e.set_pred_set_op(45..47, self.set_op);
1445
1446 e.set_field(
1447 48..49,
1448 match self.cmp_type {
1449 IntCmpType::U32 => 0_u32,
1450 IntCmpType::I32 => 1_u32,
1451 },
1452 );
1453 e.set_int_cmp_op(49..52, self.cmp_op);
1454 }
1455 }
1456
1457 impl SM50Op for OpLop2 {
legalize(&mut self, b: &mut LegalizeBuilder)1458 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1459 use RegFile::GPR;
1460 let [src0, src1] = &mut self.srcs;
1461 match self.op {
1462 LogicOp2::PassB => {
1463 *src0 = 0.into();
1464 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1465 }
1466 LogicOp2::And | LogicOp2::Or | LogicOp2::Xor => {
1467 swap_srcs_if_not_reg(src0, src1, GPR);
1468 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1469 }
1470 }
1471 }
1472
encode(&self, e: &mut SM50Encoder<'_>)1473 fn encode(&self, e: &mut SM50Encoder<'_>) {
1474 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1475 e.set_opcode(0x0400);
1476
1477 e.set_dst(self.dst);
1478 e.set_reg_bnot_src(8..16, 55, self.srcs[0]);
1479 e.set_src_imm32(20..52, imm32);
1480 e.set_field(
1481 53..55,
1482 match self.op {
1483 LogicOp2::And => 0_u8,
1484 LogicOp2::Or => 1_u8,
1485 LogicOp2::Xor => 2_u8,
1486 LogicOp2::PassB => {
1487 panic!("PASS_B is not supported for LOP32I");
1488 }
1489 },
1490 );
1491 e.set_bit(56, self.srcs[1].src_mod.is_bnot());
1492 } else {
1493 match &self.srcs[1].src_ref {
1494 SrcRef::Zero | SrcRef::Reg(_) => {
1495 e.set_opcode(0x5c40);
1496 e.set_reg_bnot_src(20..28, 40, self.srcs[1]);
1497 }
1498 SrcRef::Imm32(imm32) => {
1499 e.set_opcode(0x3840);
1500 e.set_src_imm_i20(20..39, 56, *imm32);
1501 assert!(self.srcs[1].src_mod.is_none());
1502 }
1503 SrcRef::CBuf(_) => {
1504 e.set_opcode(0x4c40);
1505 e.set_cb_bnot_src(20..39, 40, self.srcs[1]);
1506 }
1507 src => panic!("Invalid lop2 src1: {src}"),
1508 }
1509
1510 e.set_dst(self.dst);
1511 e.set_reg_bnot_src(8..16, 39, self.srcs[0]);
1512
1513 e.set_field(
1514 41..43,
1515 match self.op {
1516 LogicOp2::And => 0_u8,
1517 LogicOp2::Or => 1_u8,
1518 LogicOp2::Xor => 2_u8,
1519 LogicOp2::PassB => 3_u8,
1520 },
1521 );
1522
1523 e.set_pred_dst(48..51, Dst::None);
1524 }
1525 }
1526 }
1527
1528 impl SM50Op for OpPopC {
legalize(&mut self, b: &mut LegalizeBuilder)1529 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1530 use RegFile::GPR;
1531 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1532 }
1533
encode(&self, e: &mut SM50Encoder<'_>)1534 fn encode(&self, e: &mut SM50Encoder<'_>) {
1535 match &self.src.src_ref {
1536 SrcRef::Zero | SrcRef::Reg(_) => {
1537 e.set_opcode(0x5c08);
1538 e.set_reg_bnot_src(20..28, 40, self.src);
1539 }
1540 SrcRef::Imm32(imm32) => {
1541 e.set_opcode(0x3808);
1542 e.set_src_imm_i20(20..39, 56, *imm32);
1543 e.set_bit(40, self.src.src_mod.is_bnot());
1544 }
1545 SrcRef::CBuf(_) => {
1546 e.set_opcode(0x4c08);
1547 e.set_cb_bnot_src(20..39, 40, self.src);
1548 }
1549 src => panic!("Invalid popc src1: {src}"),
1550 }
1551
1552 e.set_dst(self.dst);
1553 }
1554 }
1555
1556 impl SM50Op for OpShf {
legalize(&mut self, b: &mut LegalizeBuilder)1557 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1558 use RegFile::GPR;
1559 b.copy_alu_src_if_not_reg(&mut self.high, GPR, SrcType::ALU);
1560 b.copy_alu_src_if_not_reg(&mut self.low, GPR, SrcType::GPR);
1561 b.copy_alu_src_if_not_reg_or_imm(&mut self.shift, GPR, SrcType::GPR);
1562 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::GPR);
1563 }
1564
encode(&self, e: &mut SM50Encoder<'_>)1565 fn encode(&self, e: &mut SM50Encoder<'_>) {
1566 match &self.shift.src_ref {
1567 SrcRef::Zero | SrcRef::Reg(_) => {
1568 e.set_opcode(if self.right { 0x5cf8 } else { 0x5bf8 });
1569 e.set_reg_src(20..28, self.shift);
1570 }
1571 SrcRef::Imm32(imm32) => {
1572 e.set_opcode(if self.right { 0x38f8 } else { 0x36f8 });
1573 e.set_src_imm_i20(20..39, 56, *imm32);
1574 assert!(self.shift.src_mod.is_none());
1575 }
1576 src => panic!("Invalid shf shift: {src}"),
1577 }
1578
1579 e.set_field(
1580 37..39,
1581 match self.data_type {
1582 IntType::I32 => 0_u8,
1583 IntType::U32 => 0_u8,
1584 IntType::U64 => 2_u8,
1585 IntType::I64 => 3_u8,
1586 _ => panic!("Invalid shift data type"),
1587 },
1588 );
1589
1590 e.set_dst(self.dst);
1591 e.set_reg_src(8..16, self.low);
1592 e.set_reg_src(39..47, self.high);
1593
1594 e.set_bit(47, false); // .CC
1595
1596 // If we're shifting left, the HW will throw an illegal instrucction
1597 // encoding error if we set .high and will give us the high part anyway
1598 // if we don't. This makes everything a bit more consistent.
1599 assert!(self.right || self.dst_high);
1600 e.set_bit(48, self.dst_high && self.right); // .high
1601
1602 e.set_bit(49, false); // .X
1603 e.set_bit(50, self.wrap);
1604 }
1605 }
1606
1607 impl SM50Op for OpShl {
legalize(&mut self, b: &mut LegalizeBuilder)1608 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1609 use RegFile::GPR;
1610 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1611 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1612 }
1613
encode(&self, e: &mut SM50Encoder<'_>)1614 fn encode(&self, e: &mut SM50Encoder<'_>) {
1615 e.set_dst(self.dst);
1616 e.set_reg_src(8..16, self.src);
1617 match &self.shift.src_ref {
1618 SrcRef::Zero | SrcRef::Reg(_) => {
1619 e.set_opcode(0x5c48);
1620 e.set_reg_src(20..28, self.shift);
1621 }
1622 SrcRef::Imm32(imm32) => {
1623 e.set_opcode(0x3848);
1624 e.set_src_imm_i20(20..39, 56, *imm32);
1625 }
1626 SrcRef::CBuf(cb) => {
1627 e.set_opcode(0x4c48);
1628 e.set_src_cb(20..39, cb);
1629 }
1630 src => panic!("Invalid shl shift: {src}"),
1631 }
1632
1633 e.set_bit(39, self.wrap);
1634 }
1635 }
1636
1637 impl SM50Op for OpShr {
legalize(&mut self, b: &mut LegalizeBuilder)1638 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1639 use RegFile::GPR;
1640 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1641 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1642 }
1643
encode(&self, e: &mut SM50Encoder<'_>)1644 fn encode(&self, e: &mut SM50Encoder<'_>) {
1645 e.set_dst(self.dst);
1646 e.set_reg_src(8..16, self.src);
1647 match &self.shift.src_ref {
1648 SrcRef::Zero | SrcRef::Reg(_) => {
1649 e.set_opcode(0x5c28);
1650 e.set_reg_src(20..28, self.shift);
1651 }
1652 SrcRef::Imm32(imm32) => {
1653 e.set_opcode(0x3828);
1654 e.set_src_imm_i20(20..39, 56, *imm32);
1655 }
1656 SrcRef::CBuf(cb) => {
1657 e.set_opcode(0x4c28);
1658 e.set_src_cb(20..39, cb);
1659 }
1660 src => panic!("Invalid shr shift: {src}"),
1661 }
1662
1663 e.set_bit(39, self.wrap);
1664 e.set_bit(48, self.signed);
1665 }
1666 }
1667
1668 impl SM50Op for OpF2F {
legalize(&mut self, b: &mut LegalizeBuilder)1669 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1670 use RegFile::GPR;
1671 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1672 }
1673
encode(&self, e: &mut SM50Encoder<'_>)1674 fn encode(&self, e: &mut SM50Encoder<'_>) {
1675 match &self.src.src_ref {
1676 SrcRef::Zero | SrcRef::Reg(_) => {
1677 e.set_opcode(0x5ca8);
1678 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1679 }
1680 SrcRef::Imm32(imm32) => {
1681 e.set_opcode(0x38a8);
1682 e.set_src_imm_i20(20..39, 56, *imm32);
1683 assert!(self.src.src_mod.is_none());
1684 }
1685 SrcRef::CBuf(_) => {
1686 e.set_opcode(0x4ca8);
1687 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1688 }
1689 src => panic!("Invalid f2f src: {src}"),
1690 }
1691
1692 // We can't span 32 bits
1693 assert!(
1694 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1695 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1696 );
1697 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1698 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1699
1700 e.set_rnd_mode(39..41, self.rnd_mode);
1701 e.set_bit(41, self.high);
1702 e.set_bit(42, self.integer_rnd);
1703 e.set_bit(44, self.ftz);
1704 e.set_bit(50, false); // saturate
1705
1706 e.set_dst(self.dst);
1707 }
1708 }
1709
1710 impl SM50Op for OpF2I {
legalize(&mut self, b: &mut LegalizeBuilder)1711 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1712 use RegFile::GPR;
1713 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1714 }
1715
encode(&self, e: &mut SM50Encoder<'_>)1716 fn encode(&self, e: &mut SM50Encoder<'_>) {
1717 match &self.src.src_ref {
1718 SrcRef::Zero | SrcRef::Reg(_) => {
1719 e.set_opcode(0x5cb0);
1720 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1721 }
1722 SrcRef::Imm32(imm32) => {
1723 e.set_opcode(0x38b0);
1724 e.set_src_imm_f20(20..39, 56, *imm32);
1725 assert!(self.src.src_mod.is_none());
1726 }
1727 SrcRef::CBuf(_) => {
1728 e.set_opcode(0x4cb0);
1729 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1730 }
1731 src => panic!("Invalid f2i src: {src}"),
1732 }
1733
1734 e.set_dst(self.dst);
1735
1736 // We can't span 32 bits
1737 assert!(
1738 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1739 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1740 );
1741 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1742 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1743 e.set_bit(12, self.dst_type.is_signed());
1744
1745 e.set_rnd_mode(39..41, self.rnd_mode);
1746 e.set_bit(44, self.ftz);
1747 e.set_bit(47, false); // .CC
1748 }
1749 }
1750
1751 impl SM50Op for OpI2F {
legalize(&mut self, b: &mut LegalizeBuilder)1752 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1753 use RegFile::GPR;
1754 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1755 }
1756
encode(&self, e: &mut SM50Encoder<'_>)1757 fn encode(&self, e: &mut SM50Encoder<'_>) {
1758 match &self.src.src_ref {
1759 SrcRef::Zero | SrcRef::Reg(_) => {
1760 e.set_opcode(0x5cb8);
1761 e.set_reg_ineg_src(20..28, 45, self.src);
1762 }
1763 SrcRef::Imm32(imm32) => {
1764 e.set_opcode(0x38b8);
1765 e.set_src_imm_i20(20..39, 56, *imm32);
1766 assert!(self.src.src_mod.is_none());
1767 }
1768 SrcRef::CBuf(_) => {
1769 e.set_opcode(0x4cb8);
1770 e.set_cb_ineg_src(20..39, 45, self.src);
1771 }
1772 src => panic!("Invalid i2f src: {src}"),
1773 }
1774
1775 e.set_dst(self.dst);
1776
1777 // We can't span 32 bits
1778 assert!(
1779 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1780 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1781 );
1782 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1783 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1784 e.set_bit(13, self.src_type.is_signed());
1785
1786 e.set_rnd_mode(39..41, self.rnd_mode);
1787 e.set_field(41..43, 0_u8); // TODO: subop
1788 e.set_bit(49, false); // iabs
1789 }
1790 }
1791
1792 impl SM50Op for OpI2I {
legalize(&mut self, b: &mut LegalizeBuilder)1793 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1794 use RegFile::GPR;
1795 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1796 }
1797
encode(&self, e: &mut SM50Encoder<'_>)1798 fn encode(&self, e: &mut SM50Encoder<'_>) {
1799 match &self.src.src_ref {
1800 SrcRef::Zero | SrcRef::Reg(_) => {
1801 e.set_opcode(0x5ce0);
1802 e.set_reg_src(20..28, self.src);
1803 }
1804 SrcRef::Imm32(imm32) => {
1805 e.set_opcode(0x38e0);
1806 e.set_src_imm_i20(20..39, 56, *imm32);
1807 }
1808 SrcRef::CBuf(cbuf) => {
1809 e.set_opcode(0x4ce0);
1810 e.set_src_cb(20..39, cbuf);
1811 }
1812 src => panic!("Invalid i2i src: {src}"),
1813 }
1814
1815 e.set_dst(self.dst);
1816
1817 // We can't span 32 bits
1818 assert!(
1819 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1820 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1821 );
1822 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1823 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1824 e.set_bit(12, self.dst_type.is_signed());
1825 e.set_bit(13, self.src_type.is_signed());
1826
1827 e.set_field(41..43, 0u8); // src.B1-3
1828 e.set_bit(45, self.neg);
1829 e.set_bit(47, false); // dst.CC
1830 e.set_bit(49, self.abs);
1831 e.set_bit(50, self.saturate);
1832 }
1833 }
1834
1835 impl SM50Op for OpMov {
legalize(&mut self, _b: &mut LegalizeBuilder)1836 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1837 // Nothing to do
1838 }
1839
encode(&self, e: &mut SM50Encoder<'_>)1840 fn encode(&self, e: &mut SM50Encoder<'_>) {
1841 match &self.src.src_ref {
1842 SrcRef::Zero | SrcRef::Reg(_) => {
1843 e.set_opcode(0x5c98);
1844 e.set_reg_src(20..28, self.src);
1845 e.set_field(39..43, self.quad_lanes);
1846 }
1847 SrcRef::Imm32(imm32) => {
1848 e.set_opcode(0x0100);
1849 e.set_src_imm32(20..52, *imm32);
1850 e.set_field(12..16, self.quad_lanes);
1851 }
1852 SrcRef::CBuf(cb) => {
1853 e.set_opcode(0x4c98);
1854 e.set_src_cb(20..39, cb);
1855 e.set_field(39..43, self.quad_lanes);
1856 }
1857 src => panic!("Invalid mov src: {src}"),
1858 }
1859
1860 e.set_dst(self.dst);
1861 }
1862 }
1863
1864 impl SM50Op for OpPrmt {
legalize(&mut self, b: &mut LegalizeBuilder)1865 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1866 use RegFile::GPR;
1867 b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
1868 b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
1869 }
1870
encode(&self, e: &mut SM50Encoder<'_>)1871 fn encode(&self, e: &mut SM50Encoder<'_>) {
1872 match &self.sel.src_ref {
1873 SrcRef::Zero | SrcRef::Reg(_) => {
1874 e.set_opcode(0x5bc0);
1875 e.set_reg_src(20..28, self.sel);
1876 }
1877 SrcRef::Imm32(imm32) => {
1878 e.set_opcode(0x36c0);
1879 // Only the bottom 16 bits matter
1880 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1881 }
1882 SrcRef::CBuf(cb) => {
1883 e.set_opcode(0x4bc0);
1884 e.set_src_cb(20..39, cb);
1885 }
1886 src => panic!("Invalid prmt selector: {src}"),
1887 }
1888
1889 e.set_dst(self.dst);
1890 e.set_reg_src(8..16, self.srcs[0]);
1891 e.set_reg_src(39..47, self.srcs[1]);
1892 e.set_field(
1893 48..51,
1894 match self.mode {
1895 PrmtMode::Index => 0_u8,
1896 PrmtMode::Forward4Extract => 1_u8,
1897 PrmtMode::Backward4Extract => 2_u8,
1898 PrmtMode::Replicate8 => 3_u8,
1899 PrmtMode::EdgeClampLeft => 4_u8,
1900 PrmtMode::EdgeClampRight => 5_u8,
1901 PrmtMode::Replicate16 => 6_u8,
1902 },
1903 );
1904 }
1905 }
1906
1907 impl SM50Op for OpSel {
legalize(&mut self, b: &mut LegalizeBuilder)1908 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1909 use RegFile::GPR;
1910 let [src0, src1] = &mut self.srcs;
1911 if swap_srcs_if_not_reg(src0, src1, GPR) {
1912 self.cond = self.cond.bnot();
1913 }
1914 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1915 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1916 }
1917
encode(&self, e: &mut SM50Encoder<'_>)1918 fn encode(&self, e: &mut SM50Encoder<'_>) {
1919 match &self.srcs[1].src_ref {
1920 SrcRef::Zero | SrcRef::Reg(_) => {
1921 e.set_opcode(0x5ca0);
1922 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1923 }
1924 SrcRef::Imm32(imm32) => {
1925 e.set_opcode(0x38a0);
1926 e.set_src_imm_i20(20..39, 56, *imm32);
1927 }
1928 SrcRef::CBuf(cbuf) => {
1929 e.set_opcode(0x4ca0);
1930 e.set_src_cb(20..39, cbuf);
1931 }
1932 src => panic!("Invalid sel src1: {src}"),
1933 }
1934
1935 e.set_dst(self.dst);
1936 e.set_reg_src(8..16, self.srcs[0]);
1937 e.set_pred_src(39..42, 42, self.cond);
1938 }
1939 }
1940
1941 impl SM50Op for OpShfl {
legalize(&mut self, b: &mut LegalizeBuilder)1942 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1943 use RegFile::GPR;
1944 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1945 b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU);
1946 b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU);
1947 }
1948
encode(&self, e: &mut SM50Encoder<'_>)1949 fn encode(&self, e: &mut SM50Encoder<'_>) {
1950 e.set_opcode(0xef10);
1951
1952 e.set_dst(self.dst);
1953 e.set_pred_dst(48..51, self.in_bounds);
1954 e.set_reg_src(8..16, self.src);
1955
1956 match &self.lane.src_ref {
1957 SrcRef::Zero | SrcRef::Reg(_) => {
1958 e.set_bit(28, false);
1959 e.set_reg_src(20..28, self.lane);
1960 }
1961 SrcRef::Imm32(imm32) => {
1962 e.set_bit(28, true);
1963 e.set_field(20..25, *imm32 & 0x1f);
1964 }
1965 src => panic!("Invalid shfl lane: {src}"),
1966 }
1967 match &self.c.src_ref {
1968 SrcRef::Zero | SrcRef::Reg(_) => {
1969 e.set_bit(29, false);
1970 e.set_reg_src(39..47, self.c);
1971 }
1972 SrcRef::Imm32(imm32) => {
1973 e.set_bit(29, true);
1974 e.set_field(34..47, *imm32 & 0x1f1f);
1975 }
1976 src => panic!("Invalid shfl c: {src}"),
1977 }
1978
1979 e.set_field(
1980 30..32,
1981 match self.op {
1982 ShflOp::Idx => 0u8,
1983 ShflOp::Up => 1u8,
1984 ShflOp::Down => 2u8,
1985 ShflOp::Bfly => 3u8,
1986 },
1987 );
1988 }
1989 }
1990
1991 impl SM50Op for OpPSetP {
legalize(&mut self, _b: &mut LegalizeBuilder)1992 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1993 // Nothing to do
1994 }
1995
encode(&self, e: &mut SM50Encoder<'_>)1996 fn encode(&self, e: &mut SM50Encoder<'_>) {
1997 e.set_opcode(0x5090);
1998
1999 e.set_pred_dst(3..6, self.dsts[0]);
2000 e.set_pred_dst(0..3, self.dsts[1]);
2001
2002 e.set_pred_src(12..15, 15, self.srcs[0]);
2003 e.set_pred_src(29..32, 32, self.srcs[1]);
2004 e.set_pred_src(39..42, 42, self.srcs[2]);
2005
2006 e.set_pred_set_op(24..26, self.ops[0]);
2007 e.set_pred_set_op(45..47, self.ops[1]);
2008 }
2009 }
2010
2011 impl SM50Encoder<'_> {
set_tex_dim(&mut self, range: Range<usize>, dim: TexDim)2012 fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) {
2013 assert!(range.len() == 3);
2014 self.set_field(
2015 range,
2016 match dim {
2017 TexDim::_1D => 0_u8,
2018 TexDim::Array1D => 1_u8,
2019 TexDim::_2D => 2_u8,
2020 TexDim::Array2D => 3_u8,
2021 TexDim::_3D => 4_u8,
2022 TexDim::Cube => 6_u8,
2023 TexDim::ArrayCube => 7_u8,
2024 },
2025 );
2026 }
2027
set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode)2028 fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) {
2029 assert!(range.len() == 2);
2030 self.set_field(
2031 range,
2032 match lod_mode {
2033 TexLodMode::Auto => 0_u8,
2034 TexLodMode::Zero => 1_u8,
2035 TexLodMode::Bias => 2_u8,
2036 TexLodMode::Lod => 3_u8,
2037 _ => panic!("Unknown LOD mode"),
2038 },
2039 );
2040 }
2041 }
2042
2043 impl SM50Op for OpTex {
legalize(&mut self, b: &mut LegalizeBuilder)2044 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2045 legalize_ext_instr(self, b);
2046 }
2047
encode(&self, e: &mut SM50Encoder<'_>)2048 fn encode(&self, e: &mut SM50Encoder<'_>) {
2049 e.set_opcode(0xdeb8);
2050
2051 e.set_dst(self.dsts[0]);
2052 assert!(self.dsts[1].is_none());
2053 assert!(self.fault.is_none());
2054 e.set_reg_src(8..16, self.srcs[0]);
2055 e.set_reg_src(20..28, self.srcs[1]);
2056
2057 e.set_tex_dim(28..31, self.dim);
2058 e.set_field(31..35, self.mask);
2059 e.set_bit(35, false); // ToDo: NDV
2060 e.set_bit(36, self.offset);
2061 e.set_tex_lod_mode(37..39, self.lod_mode);
2062 e.set_bit(49, false); // TODO: .NODEP
2063 e.set_bit(50, self.z_cmpr);
2064 }
2065 }
2066
2067 impl SM50Op for OpTld {
legalize(&mut self, b: &mut LegalizeBuilder)2068 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2069 legalize_ext_instr(self, b);
2070 }
2071
encode(&self, e: &mut SM50Encoder<'_>)2072 fn encode(&self, e: &mut SM50Encoder<'_>) {
2073 e.set_opcode(0xdd38);
2074
2075 e.set_dst(self.dsts[0]);
2076 assert!(self.dsts[1].is_none());
2077 assert!(self.fault.is_none());
2078 e.set_reg_src(8..16, self.srcs[0]);
2079 e.set_reg_src(20..28, self.srcs[1]);
2080
2081 e.set_tex_dim(28..31, self.dim);
2082 e.set_field(31..35, self.mask);
2083 e.set_bit(35, self.offset);
2084 e.set_bit(49, false); // TODO: .NODEP
2085 e.set_bit(50, self.is_ms);
2086
2087 assert!(
2088 self.lod_mode == TexLodMode::Zero
2089 || self.lod_mode == TexLodMode::Lod
2090 );
2091 e.set_bit(55, self.lod_mode == TexLodMode::Lod);
2092 }
2093 }
2094
2095 impl SM50Op for OpTld4 {
legalize(&mut self, b: &mut LegalizeBuilder)2096 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2097 legalize_ext_instr(self, b);
2098 }
2099
encode(&self, e: &mut SM50Encoder<'_>)2100 fn encode(&self, e: &mut SM50Encoder<'_>) {
2101 e.set_opcode(0xdef8);
2102
2103 e.set_dst(self.dsts[0]);
2104 assert!(self.dsts[1].is_none());
2105 assert!(self.fault.is_none());
2106 e.set_reg_src(8..16, self.srcs[0]);
2107 e.set_reg_src(20..28, self.srcs[1]);
2108
2109 e.set_tex_dim(28..31, self.dim);
2110 e.set_field(31..35, self.mask);
2111 e.set_bit(35, false); // ToDo: NDV
2112 e.set_field(
2113 36..38,
2114 match self.offset_mode {
2115 Tld4OffsetMode::None => 0_u8,
2116 Tld4OffsetMode::AddOffI => 1_u8,
2117 Tld4OffsetMode::PerPx => 2_u8,
2118 },
2119 );
2120 e.set_field(38..40, self.comp);
2121 e.set_bit(49, false); // TODO: .NODEP
2122 e.set_bit(50, self.z_cmpr);
2123 }
2124 }
2125
2126 impl SM50Op for OpTmml {
legalize(&mut self, b: &mut LegalizeBuilder)2127 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2128 legalize_ext_instr(self, b);
2129 }
2130
encode(&self, e: &mut SM50Encoder<'_>)2131 fn encode(&self, e: &mut SM50Encoder<'_>) {
2132 e.set_opcode(0xdf60);
2133
2134 e.set_dst(self.dsts[0]);
2135 assert!(self.dsts[1].is_none());
2136 e.set_reg_src(8..16, self.srcs[0]);
2137 e.set_reg_src(20..28, self.srcs[1]);
2138
2139 e.set_tex_dim(28..31, self.dim);
2140 e.set_field(31..35, self.mask);
2141 e.set_bit(35, false); // ToDo: NDV
2142 e.set_bit(49, false); // TODO: .NODEP
2143 }
2144 }
2145
2146 impl SM50Op for OpTxd {
legalize(&mut self, b: &mut LegalizeBuilder)2147 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2148 legalize_ext_instr(self, b);
2149 }
2150
encode(&self, e: &mut SM50Encoder<'_>)2151 fn encode(&self, e: &mut SM50Encoder<'_>) {
2152 e.set_opcode(0xde78);
2153
2154 e.set_dst(self.dsts[0]);
2155 assert!(self.dsts[1].is_none());
2156 assert!(self.fault.is_none());
2157 e.set_reg_src(8..16, self.srcs[0]);
2158 e.set_reg_src(20..28, self.srcs[1]);
2159
2160 e.set_tex_dim(28..31, self.dim);
2161 e.set_field(31..35, self.mask);
2162 e.set_bit(35, self.offset);
2163 e.set_bit(49, false); // TODO: .NODEP
2164 }
2165 }
2166
2167 impl SM50Op for OpTxq {
legalize(&mut self, b: &mut LegalizeBuilder)2168 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2169 legalize_ext_instr(self, b);
2170 }
2171
encode(&self, e: &mut SM50Encoder<'_>)2172 fn encode(&self, e: &mut SM50Encoder<'_>) {
2173 e.set_opcode(0xdf50);
2174
2175 e.set_dst(self.dsts[0]);
2176 assert!(self.dsts[1].is_none());
2177 e.set_reg_src(8..16, self.src);
2178
2179 e.set_field(
2180 22..28,
2181 match self.query {
2182 TexQuery::Dimension => 1_u8,
2183 TexQuery::TextureType => 2_u8,
2184 TexQuery::SamplerPos => 5_u8,
2185 // TexQuery::Filter => 0x10_u8,
2186 // TexQuery::Lod => 0x12_u8,
2187 // TexQuery::Wrap => 0x14_u8,
2188 // TexQuery::BorderColour => 0x16,
2189 },
2190 );
2191 e.set_field(31..35, self.mask);
2192 e.set_bit(49, false); // TODO: .NODEP
2193 }
2194 }
2195
2196 impl SM50Encoder<'_> {
set_mem_type(&mut self, range: Range<usize>, mem_type: MemType)2197 fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
2198 assert!(range.len() == 3);
2199 self.set_field(
2200 range,
2201 match mem_type {
2202 MemType::U8 => 0_u8,
2203 MemType::I8 => 1_u8,
2204 MemType::U16 => 2_u8,
2205 MemType::I16 => 3_u8,
2206 MemType::B32 => 4_u8,
2207 MemType::B64 => 5_u8,
2208 MemType::B128 => 6_u8,
2209 },
2210 );
2211 }
2212
set_mem_order(&mut self, _order: &MemOrder)2213 fn set_mem_order(&mut self, _order: &MemOrder) {
2214 // TODO: order and scope aren't present before SM70, what should we do?
2215 }
2216
set_mem_access(&mut self, access: &MemAccess)2217 fn set_mem_access(&mut self, access: &MemAccess) {
2218 self.set_field(
2219 45..46,
2220 match access.space.addr_type() {
2221 MemAddrType::A32 => 0_u8,
2222 MemAddrType::A64 => 1_u8,
2223 },
2224 );
2225 self.set_mem_type(48..51, access.mem_type);
2226 self.set_mem_order(&access.order);
2227 }
2228
set_image_dim(&mut self, range: Range<usize>, dim: ImageDim)2229 fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
2230 assert!(range.len() == 3);
2231 self.set_field(
2232 range,
2233 match dim {
2234 ImageDim::_1D => 0_u8,
2235 ImageDim::_1DBuffer => 1_u8,
2236 ImageDim::_1DArray => 2_u8,
2237 ImageDim::_2D => 3_u8,
2238 ImageDim::_2DArray => 4_u8,
2239 ImageDim::_3D => 5_u8,
2240 },
2241 );
2242 }
2243 }
2244
2245 impl SM50Op for OpSuLd {
legalize(&mut self, b: &mut LegalizeBuilder)2246 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2247 legalize_ext_instr(self, b);
2248 }
2249
encode(&self, e: &mut SM50Encoder<'_>)2250 fn encode(&self, e: &mut SM50Encoder<'_>) {
2251 e.set_opcode(0xeb00);
2252
2253 assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2254 e.set_field(20..24, self.mask);
2255 e.set_image_dim(33..36, self.image_dim);
2256
2257 // mem_eviction_policy not a thing for sm < 70
2258
2259 let scope = match self.mem_order {
2260 MemOrder::Constant => MemScope::System,
2261 MemOrder::Weak => MemScope::CTA,
2262 MemOrder::Strong(s) => s,
2263 };
2264
2265 e.set_field(
2266 24..26,
2267 match scope {
2268 MemScope::CTA => 0_u8,
2269 /* SM => 1_u8, */
2270 MemScope::GPU => 2_u8,
2271 MemScope::System => 3_u8,
2272 },
2273 );
2274
2275 e.set_dst(self.dst);
2276
2277 e.set_reg_src(8..16, self.coord);
2278 e.set_reg_src(39..47, self.handle);
2279 }
2280 }
2281
2282 impl SM50Op for OpSuSt {
legalize(&mut self, b: &mut LegalizeBuilder)2283 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2284 legalize_ext_instr(self, b);
2285 }
2286
encode(&self, e: &mut SM50Encoder<'_>)2287 fn encode(&self, e: &mut SM50Encoder<'_>) {
2288 e.set_opcode(0xeb20);
2289
2290 e.set_reg_src(8..16, self.coord);
2291 e.set_reg_src(0..8, self.data);
2292 e.set_reg_src(39..47, self.handle);
2293
2294 e.set_image_dim(33..36, self.image_dim);
2295 e.set_mem_order(&self.mem_order);
2296
2297 assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2298 e.set_field(20..24, self.mask);
2299 }
2300 }
2301
2302 impl SM50Encoder<'_> {
set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp)2303 fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) {
2304 self.set_field(
2305 range,
2306 match atom_op {
2307 AtomOp::Add => 0_u8,
2308 AtomOp::Min => 1_u8,
2309 AtomOp::Max => 2_u8,
2310 AtomOp::Inc => 3_u8,
2311 AtomOp::Dec => 4_u8,
2312 AtomOp::And => 5_u8,
2313 AtomOp::Or => 6_u8,
2314 AtomOp::Xor => 7_u8,
2315 AtomOp::Exch => 8_u8,
2316 AtomOp::CmpExch(_) => panic!("CmpExch is a separate opcode"),
2317 },
2318 );
2319 }
2320 }
2321
2322 impl SM50Op for OpSuAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2323 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2324 legalize_ext_instr(self, b);
2325 }
2326
encode(&self, e: &mut SM50Encoder<'_>)2327 fn encode(&self, e: &mut SM50Encoder<'_>) {
2328 if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2329 e.set_opcode(0xeac0);
2330 assert!(cmp_src == AtomCmpSrc::Packed);
2331 } else {
2332 e.set_opcode(0xea60);
2333 e.set_atom_op(29..33, self.atom_op);
2334 }
2335
2336 let atom_type: u8 = match self.atom_type {
2337 AtomType::U32 => 0,
2338 AtomType::I32 => 1,
2339 AtomType::F32 => 3,
2340 AtomType::U64 => 2,
2341 AtomType::I64 => 5,
2342 _ => panic!("Unsupported atom type {}", self.atom_type),
2343 };
2344
2345 e.set_image_dim(33..36, self.image_dim);
2346 e.set_field(36..39, atom_type);
2347
2348 // The hardware requires that we set .D on atomics. This is safe to do
2349 // in in the emit code because it only affects format conversion, not
2350 // surface coordinates and atomics are required to be performed with
2351 // image formats that that exactly match the shader data type. So, for
2352 // instance, a uint32_t atomic has to happen on an R32_UINT or R32_SINT
2353 // image.
2354 e.set_bit(52, true); // .D
2355
2356 e.set_dst(self.dst);
2357
2358 e.set_reg_src(20..28, self.data);
2359 e.set_reg_src(8..16, self.coord);
2360 e.set_reg_src(39..47, self.handle);
2361 }
2362 }
2363
2364 impl SM50Op for OpLd {
legalize(&mut self, b: &mut LegalizeBuilder)2365 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2366 legalize_ext_instr(self, b);
2367 }
2368
encode(&self, e: &mut SM50Encoder<'_>)2369 fn encode(&self, e: &mut SM50Encoder<'_>) {
2370 e.set_opcode(match self.access.space {
2371 MemSpace::Global(_) => 0xeed0,
2372 MemSpace::Local => 0xef40,
2373 MemSpace::Shared => 0xef48,
2374 });
2375
2376 e.set_dst(self.dst);
2377 e.set_reg_src(8..16, self.addr);
2378 e.set_field(20..44, self.offset);
2379
2380 e.set_mem_access(&self.access);
2381 }
2382 }
2383
2384 impl SM50Op for OpLdc {
legalize(&mut self, b: &mut LegalizeBuilder)2385 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2386 use RegFile::GPR;
2387 b.copy_alu_src_if_not_reg(&mut self.offset, GPR, SrcType::GPR);
2388 }
2389
encode(&self, e: &mut SM50Encoder<'_>)2390 fn encode(&self, e: &mut SM50Encoder<'_>) {
2391 assert!(self.cb.src_mod.is_none());
2392 let SrcRef::CBuf(cb) = &self.cb.src_ref else {
2393 panic!("Not a CBuf source");
2394 };
2395 let CBuf::Binding(cb_idx) = cb.buf else {
2396 panic!("Must be a bound constant buffer");
2397 };
2398
2399 e.set_opcode(0xef90);
2400
2401 e.set_dst(self.dst);
2402 e.set_reg_src(8..16, self.offset);
2403 e.set_field(20..36, cb.offset);
2404 e.set_field(36..41, cb_idx);
2405 e.set_field(
2406 44..46,
2407 match self.mode {
2408 LdcMode::Indexed => 0_u8,
2409 LdcMode::IndexedLinear => 1_u8,
2410 LdcMode::IndexedSegmented => 2_u8,
2411 LdcMode::IndexedSegmentedLinear => 3_u8,
2412 },
2413 );
2414 e.set_mem_type(48..51, self.mem_type);
2415 }
2416 }
2417
2418 impl SM50Op for OpSt {
legalize(&mut self, b: &mut LegalizeBuilder)2419 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2420 legalize_ext_instr(self, b);
2421 }
2422
encode(&self, e: &mut SM50Encoder<'_>)2423 fn encode(&self, e: &mut SM50Encoder<'_>) {
2424 e.set_opcode(match self.access.space {
2425 MemSpace::Global(_) => 0xeed8,
2426 MemSpace::Local => 0xef50,
2427 MemSpace::Shared => 0xef58,
2428 });
2429
2430 e.set_reg_src(0..8, self.data);
2431 e.set_reg_src(8..16, self.addr);
2432 e.set_field(20..44, self.offset);
2433 e.set_mem_access(&self.access);
2434 }
2435 }
2436
atom_src_as_ssa( b: &mut LegalizeBuilder, src: Src, atom_type: AtomType, ) -> SSARef2437 fn atom_src_as_ssa(
2438 b: &mut LegalizeBuilder,
2439 src: Src,
2440 atom_type: AtomType,
2441 ) -> SSARef {
2442 if let Some(ssa) = src.as_ssa() {
2443 return *ssa;
2444 }
2445
2446 let tmp;
2447 if atom_type.bits() == 32 {
2448 tmp = b.alloc_ssa(RegFile::GPR, 1);
2449 b.copy_to(tmp.into(), 0.into());
2450 } else {
2451 debug_assert!(atom_type.bits() == 64);
2452 tmp = b.alloc_ssa(RegFile::GPR, 2);
2453 b.copy_to(tmp[0].into(), 0.into());
2454 b.copy_to(tmp[1].into(), 0.into());
2455 }
2456 tmp
2457 }
2458
2459 impl SM50Op for OpAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2460 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2461 if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
2462 let cmpr = atom_src_as_ssa(b, self.cmpr, self.atom_type);
2463 let data = atom_src_as_ssa(b, self.data, self.atom_type);
2464
2465 let mut cmpr_data = Vec::new();
2466 cmpr_data.extend_from_slice(&cmpr);
2467 cmpr_data.extend_from_slice(&data);
2468 let cmpr_data = SSARef::try_from(cmpr_data).unwrap();
2469
2470 self.cmpr = 0.into();
2471 self.data = cmpr_data.into();
2472 self.atom_op = AtomOp::CmpExch(AtomCmpSrc::Packed);
2473 }
2474 legalize_ext_instr(self, b);
2475 }
2476
encode(&self, e: &mut SM50Encoder<'_>)2477 fn encode(&self, e: &mut SM50Encoder<'_>) {
2478 match self.mem_space {
2479 MemSpace::Global(addr_type) => {
2480 if self.dst.is_none() {
2481 e.set_opcode(0xebf8);
2482
2483 e.set_reg_src(0..8, self.data);
2484
2485 let data_type = match self.atom_type {
2486 AtomType::U32 => 0_u8,
2487 AtomType::I32 => 1_u8,
2488 AtomType::U64 => 2_u8,
2489 AtomType::F32 => 3_u8,
2490 // NOTE: U128 => 4_u8,
2491 AtomType::I64 => 5_u8,
2492 _ => panic!("Unsupported data type"),
2493 };
2494 e.set_field(20..23, data_type);
2495 e.set_atom_op(23..26, self.atom_op);
2496 } else if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2497 e.set_opcode(0xee00);
2498
2499 e.set_dst(self.dst);
2500
2501 // TODO: These are all supported by the disassembler but
2502 // only the packed layout appears to be supported by real
2503 // hardware
2504 let (data_src, data_layout) = match cmp_src {
2505 AtomCmpSrc::Separate => {
2506 if self.data.is_zero() {
2507 (self.cmpr, 1_u8)
2508 } else {
2509 assert!(self.cmpr.is_zero());
2510 (self.data, 2_u8)
2511 }
2512 }
2513 AtomCmpSrc::Packed => (self.data, 0_u8),
2514 };
2515 e.set_reg_src(20..28, data_src);
2516
2517 let data_type = match self.atom_type {
2518 AtomType::U32 => 0_u8,
2519 AtomType::U64 => 1_u8,
2520 _ => panic!("Unsupported data type"),
2521 };
2522 e.set_field(49..50, data_type);
2523 e.set_field(50..52, data_layout);
2524 e.set_field(52..56, 15_u8); // subOp
2525 } else {
2526 e.set_opcode(0xed00);
2527
2528 e.set_dst(self.dst);
2529 e.set_reg_src(20..28, self.data);
2530
2531 let data_type = match self.atom_type {
2532 AtomType::U32 => 0_u8,
2533 AtomType::I32 => 1_u8,
2534 AtomType::U64 => 2_u8,
2535 AtomType::F32 => 3_u8,
2536 // NOTE: U128 => 4_u8,
2537 AtomType::I64 => 5_u8,
2538 _ => panic!("Unsupported data type"),
2539 };
2540 e.set_field(49..52, data_type);
2541 e.set_atom_op(52..56, self.atom_op);
2542 }
2543
2544 e.set_mem_order(&self.mem_order);
2545
2546 e.set_reg_src(8..16, self.addr);
2547 e.set_field(28..48, self.addr_offset);
2548 e.set_field(
2549 48..49,
2550 match addr_type {
2551 MemAddrType::A32 => 0_u8,
2552 MemAddrType::A64 => 1_u8,
2553 },
2554 );
2555 }
2556 MemSpace::Local => panic!("Atomics do not support local"),
2557 MemSpace::Shared => {
2558 if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2559 e.set_opcode(0xee00);
2560
2561 assert!(cmp_src == AtomCmpSrc::Packed);
2562 assert!(self.cmpr.is_zero());
2563 e.set_reg_src(20..28, self.data);
2564
2565 let subop = match self.atom_type {
2566 AtomType::U32 => 4_u8,
2567 AtomType::U64 => 5_u8,
2568 _ => panic!("Unsupported data type"),
2569 };
2570 e.set_field(52..56, subop);
2571 } else {
2572 e.set_opcode(0xec00);
2573
2574 e.set_reg_src(20..28, self.data);
2575
2576 let data_type = match self.atom_type {
2577 AtomType::U32 => 0_u8,
2578 AtomType::I32 => 1_u8,
2579 AtomType::U64 => 2_u8,
2580 AtomType::I64 => 3_u8,
2581 _ => panic!("Unsupported data type"),
2582 };
2583 e.set_field(28..30, data_type);
2584 e.set_atom_op(52..56, self.atom_op);
2585 }
2586
2587 e.set_dst(self.dst);
2588 e.set_reg_src(8..16, self.addr);
2589 assert_eq!(self.addr_offset % 4, 0);
2590 e.set_field(30..52, self.addr_offset / 4);
2591 }
2592 }
2593 }
2594 }
2595
2596 impl SM50Op for OpAL2P {
legalize(&mut self, b: &mut LegalizeBuilder)2597 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2598 legalize_ext_instr(self, b);
2599 }
2600
encode(&self, e: &mut SM50Encoder<'_>)2601 fn encode(&self, e: &mut SM50Encoder<'_>) {
2602 e.set_opcode(0xefa0);
2603
2604 e.set_dst(self.dst);
2605 e.set_reg_src(8..16, self.offset);
2606
2607 e.set_field(20..31, self.access.addr);
2608 assert!(!self.access.patch);
2609 e.set_bit(32, self.access.output);
2610
2611 e.set_field(47..49, 0_u8); // comps
2612 e.set_pred_dst(44..47, Dst::None);
2613 }
2614 }
2615
2616 impl SM50Op for OpALd {
legalize(&mut self, b: &mut LegalizeBuilder)2617 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2618 legalize_ext_instr(self, b);
2619 }
2620
encode(&self, e: &mut SM50Encoder<'_>)2621 fn encode(&self, e: &mut SM50Encoder<'_>) {
2622 e.set_opcode(0xefd8);
2623
2624 e.set_dst(self.dst);
2625 if self.access.phys {
2626 assert!(!self.access.patch);
2627 assert!(self.offset.src_ref.as_reg().is_some());
2628 } else if !self.access.patch {
2629 assert!(self.offset.is_zero());
2630 }
2631 e.set_reg_src(8..16, self.offset);
2632 e.set_reg_src(39..47, self.vtx);
2633
2634 e.set_field(20..30, self.access.addr);
2635 e.set_bit(31, self.access.patch);
2636 e.set_bit(32, self.access.output);
2637 e.set_field(47..49, self.access.comps - 1);
2638 }
2639 }
2640
2641 impl SM50Op for OpASt {
legalize(&mut self, b: &mut LegalizeBuilder)2642 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2643 legalize_ext_instr(self, b);
2644 }
2645
encode(&self, e: &mut SM50Encoder<'_>)2646 fn encode(&self, e: &mut SM50Encoder<'_>) {
2647 e.set_opcode(0xeff0);
2648
2649 e.set_reg_src(0..8, self.data);
2650 e.set_reg_src(8..16, self.offset);
2651 e.set_reg_src(39..47, self.vtx);
2652
2653 assert!(!self.access.phys);
2654 assert!(self.access.output);
2655 e.set_field(20..30, self.access.addr);
2656 e.set_bit(31, self.access.patch);
2657 e.set_bit(32, self.access.output);
2658 e.set_field(47..49, self.access.comps - 1);
2659 }
2660 }
2661
2662 impl SM50Op for OpIpa {
legalize(&mut self, b: &mut LegalizeBuilder)2663 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2664 legalize_ext_instr(self, b);
2665 }
2666
encode(&self, e: &mut SM50Encoder<'_>)2667 fn encode(&self, e: &mut SM50Encoder<'_>) {
2668 e.set_opcode(0xe000);
2669
2670 e.set_dst(self.dst);
2671 e.set_reg_src(8..16, 0.into()); // addr
2672 e.set_reg_src(20..28, self.inv_w);
2673 e.set_reg_src(39..47, self.offset);
2674
2675 assert!(self.addr % 4 == 0);
2676 e.set_field(28..38, self.addr);
2677 e.set_bit(38, false); // .IDX
2678 e.set_pred_dst(47..50, Dst::None); // TODO: What is this for?
2679 e.set_bit(51, false); // .SAT
2680 e.set_field(
2681 52..54,
2682 match self.loc {
2683 InterpLoc::Default => 0_u8,
2684 InterpLoc::Centroid => 1_u8,
2685 InterpLoc::Offset => 2_u8,
2686 },
2687 );
2688 e.set_field(
2689 54..56,
2690 match self.freq {
2691 InterpFreq::Pass => 0_u8,
2692 InterpFreq::PassMulW => 1_u8,
2693 InterpFreq::Constant => 2_u8,
2694 InterpFreq::State => 3_u8,
2695 },
2696 );
2697 }
2698 }
2699
2700 impl SM50Op for OpCCtl {
legalize(&mut self, b: &mut LegalizeBuilder)2701 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2702 legalize_ext_instr(self, b);
2703 }
2704
encode(&self, e: &mut SM50Encoder<'_>)2705 fn encode(&self, e: &mut SM50Encoder<'_>) {
2706 match self.mem_space {
2707 MemSpace::Global(addr_type) => {
2708 e.set_opcode(0xef60);
2709
2710 assert!(self.addr_offset % 4 == 0);
2711 e.set_field(22..52, self.addr_offset / 4);
2712 e.set_field(
2713 52..53,
2714 match addr_type {
2715 MemAddrType::A32 => 0_u8,
2716 MemAddrType::A64 => 1_u8,
2717 },
2718 );
2719 }
2720 MemSpace::Local => panic!("cctl does not support local"),
2721 MemSpace::Shared => {
2722 e.set_opcode(0xef80);
2723
2724 assert!(self.addr_offset % 4 == 0);
2725 e.set_field(22..44, self.addr_offset / 4);
2726 }
2727 }
2728
2729 e.set_field(
2730 0..4,
2731 match self.op {
2732 CCtlOp::Qry1 => 0_u8,
2733 CCtlOp::PF1 => 1_u8,
2734 CCtlOp::PF1_5 => 2_u8,
2735 CCtlOp::PF2 => 3_u8,
2736 CCtlOp::WB => 4_u8,
2737 CCtlOp::IV => 5_u8,
2738 CCtlOp::IVAll => 6_u8,
2739 CCtlOp::RS => 7_u8,
2740 CCtlOp::RSLB => 7_u8,
2741 op => panic!("Unsupported cache control {op:?}"),
2742 },
2743 );
2744 e.set_reg_src(8..16, self.addr);
2745 }
2746 }
2747
2748 impl SM50Op for OpMemBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2749 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2750 // Nothing to do
2751 }
2752
encode(&self, e: &mut SM50Encoder<'_>)2753 fn encode(&self, e: &mut SM50Encoder<'_>) {
2754 e.set_opcode(0xef98);
2755
2756 e.set_field(
2757 8..10,
2758 match self.scope {
2759 MemScope::CTA => 0_u8,
2760 MemScope::GPU => 1_u8,
2761 MemScope::System => 2_u8,
2762 },
2763 );
2764 }
2765 }
2766
2767 impl SM50Encoder<'_> {
set_rel_offset(&mut self, range: Range<usize>, label: &Label)2768 fn set_rel_offset(&mut self, range: Range<usize>, label: &Label) {
2769 let ip = u32::try_from(self.ip).unwrap();
2770 let ip = i32::try_from(ip).unwrap();
2771
2772 let target_ip = *self.labels.get(label).unwrap();
2773 let target_ip = u32::try_from(target_ip).unwrap();
2774 let target_ip = i32::try_from(target_ip).unwrap();
2775
2776 let rel_offset = target_ip - ip - 8;
2777
2778 self.set_field(range, rel_offset);
2779 }
2780 }
2781
2782 impl SM50Op for OpBra {
legalize(&mut self, _b: &mut LegalizeBuilder)2783 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2784 // Nothing to do
2785 }
2786
encode(&self, e: &mut SM50Encoder<'_>)2787 fn encode(&self, e: &mut SM50Encoder<'_>) {
2788 e.set_opcode(0xe240);
2789 e.set_rel_offset(20..44, &self.target);
2790 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2791 }
2792 }
2793
2794 impl SM50Op for OpSSy {
legalize(&mut self, _b: &mut LegalizeBuilder)2795 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2796 // Nothing to do
2797 }
2798
encode(&self, e: &mut SM50Encoder<'_>)2799 fn encode(&self, e: &mut SM50Encoder<'_>) {
2800 e.set_opcode(0xe290);
2801 e.set_rel_offset(20..44, &self.target);
2802 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2803 }
2804 }
2805
2806 impl SM50Op for OpSync {
legalize(&mut self, _b: &mut LegalizeBuilder)2807 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2808 // Nothing to do
2809 }
2810
encode(&self, e: &mut SM50Encoder<'_>)2811 fn encode(&self, e: &mut SM50Encoder<'_>) {
2812 e.set_opcode(0xf0f8);
2813 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2814 }
2815 }
2816
2817 impl SM50Op for OpBrk {
legalize(&mut self, _b: &mut LegalizeBuilder)2818 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2819 // Nothing to do
2820 }
2821
encode(&self, e: &mut SM50Encoder<'_>)2822 fn encode(&self, e: &mut SM50Encoder<'_>) {
2823 e.set_opcode(0xe340);
2824 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2825 }
2826 }
2827
2828 impl SM50Op for OpPBk {
legalize(&mut self, _b: &mut LegalizeBuilder)2829 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2830 // Nothing to do
2831 }
2832
encode(&self, e: &mut SM50Encoder<'_>)2833 fn encode(&self, e: &mut SM50Encoder<'_>) {
2834 e.set_opcode(0xe2a0);
2835 e.set_rel_offset(20..44, &self.target);
2836 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2837 }
2838 }
2839
2840 impl SM50Op for OpCont {
legalize(&mut self, _b: &mut LegalizeBuilder)2841 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2842 // Nothing to do
2843 }
2844
encode(&self, e: &mut SM50Encoder<'_>)2845 fn encode(&self, e: &mut SM50Encoder<'_>) {
2846 e.set_opcode(0xe350);
2847 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2848 }
2849 }
2850
2851 impl SM50Op for OpPCnt {
legalize(&mut self, _b: &mut LegalizeBuilder)2852 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2853 // Nothing to do
2854 }
2855
encode(&self, e: &mut SM50Encoder<'_>)2856 fn encode(&self, e: &mut SM50Encoder<'_>) {
2857 e.set_opcode(0xe2b0);
2858 e.set_rel_offset(20..44, &self.target);
2859 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2860 }
2861 }
2862
2863 impl SM50Op for OpExit {
legalize(&mut self, _b: &mut LegalizeBuilder)2864 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2865 // Nothing to do
2866 }
2867
encode(&self, e: &mut SM50Encoder<'_>)2868 fn encode(&self, e: &mut SM50Encoder<'_>) {
2869 e.set_opcode(0xe300);
2870
2871 // TODO: CC flags
2872 e.set_field(0..4, 0xf_u8); // CC.T
2873 }
2874 }
2875
2876 impl SM50Op for OpBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2877 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2878 // Nothing to do
2879 }
2880
encode(&self, e: &mut SM50Encoder<'_>)2881 fn encode(&self, e: &mut SM50Encoder<'_>) {
2882 e.set_opcode(0xf0a8);
2883
2884 e.set_reg_src(8..16, SrcRef::Zero.into());
2885
2886 // 00: RED.POPC
2887 // 01: RED.AND
2888 // 02: RED.OR
2889 e.set_field(35..37, 0_u8);
2890
2891 // 00: SYNC
2892 // 01: ARV
2893 // 02: RED
2894 // 03: SCAN
2895 e.set_field(32..35, 0_u8);
2896
2897 e.set_pred_src(39..42, 42, SrcRef::True.into());
2898 }
2899 }
2900
2901 impl SM50Op for OpCS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2902 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2903 // Nothing to do
2904 }
2905
encode(&self, e: &mut SM50Encoder<'_>)2906 fn encode(&self, e: &mut SM50Encoder<'_>) {
2907 e.set_opcode(0x50c8);
2908 e.set_dst(self.dst);
2909 e.set_field(20..28, self.idx);
2910 }
2911 }
2912
2913 impl SM50Op for OpIsberd {
legalize(&mut self, _b: &mut LegalizeBuilder)2914 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2915 // Nothing to do
2916 }
2917
encode(&self, e: &mut SM50Encoder<'_>)2918 fn encode(&self, e: &mut SM50Encoder<'_>) {
2919 e.set_opcode(0xefd0);
2920 e.set_dst(self.dst);
2921 e.set_reg_src(8..16, self.idx);
2922 }
2923 }
2924
2925 impl SM50Op for OpKill {
legalize(&mut self, _b: &mut LegalizeBuilder)2926 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2927 // Nothing to do
2928 }
2929
encode(&self, e: &mut SM50Encoder<'_>)2930 fn encode(&self, e: &mut SM50Encoder<'_>) {
2931 e.set_opcode(0xe330);
2932 e.set_field(0..5, 0x0f_u8);
2933 }
2934 }
2935
2936 impl SM50Op for OpNop {
legalize(&mut self, _b: &mut LegalizeBuilder)2937 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2938 // Nothing to do
2939 }
2940
encode(&self, e: &mut SM50Encoder<'_>)2941 fn encode(&self, e: &mut SM50Encoder<'_>) {
2942 e.set_opcode(0x50b0);
2943
2944 // TODO: CC flags
2945 e.set_field(8..12, 0xf_u8); // CC.T
2946 }
2947 }
2948
2949 impl SM50Op for OpPixLd {
legalize(&mut self, _b: &mut LegalizeBuilder)2950 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2951 // Nothing to do
2952 }
2953
encode(&self, e: &mut SM50Encoder<'_>)2954 fn encode(&self, e: &mut SM50Encoder<'_>) {
2955 e.set_opcode(0xefe8);
2956 e.set_dst(self.dst);
2957 e.set_reg_src(8..16, 0.into());
2958 e.set_field(
2959 31..34,
2960 match &self.val {
2961 PixVal::CovMask => 1_u8,
2962 PixVal::Covered => 2_u8,
2963 PixVal::Offset => 3_u8,
2964 PixVal::CentroidOffset => 4_u8,
2965 PixVal::MyIndex => 5_u8,
2966 other => panic!("Unsupported PixVal: {other}"),
2967 },
2968 );
2969 e.set_pred_dst(45..48, Dst::None);
2970 }
2971 }
2972
2973 impl SM50Op for OpS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2974 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2975 // Nothing to do
2976 }
2977
encode(&self, e: &mut SM50Encoder<'_>)2978 fn encode(&self, e: &mut SM50Encoder<'_>) {
2979 e.set_opcode(0xf0c8);
2980 e.set_dst(self.dst);
2981 e.set_field(20..28, self.idx);
2982 }
2983 }
2984
2985 impl SM50Op for OpVote {
legalize(&mut self, _b: &mut LegalizeBuilder)2986 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2987 // Nothing to do
2988 }
2989
encode(&self, e: &mut SM50Encoder<'_>)2990 fn encode(&self, e: &mut SM50Encoder<'_>) {
2991 e.set_opcode(0x50d8);
2992
2993 e.set_dst(self.ballot);
2994 e.set_pred_dst(45..48, self.vote);
2995 e.set_pred_src(39..42, 42, self.pred);
2996
2997 e.set_field(
2998 48..50,
2999 match self.op {
3000 VoteOp::All => 0u8,
3001 VoteOp::Any => 1u8,
3002 VoteOp::Eq => 2u8,
3003 },
3004 );
3005 }
3006 }
3007
3008 impl SM50Op for OpOut {
legalize(&mut self, b: &mut LegalizeBuilder)3009 fn legalize(&mut self, b: &mut LegalizeBuilder) {
3010 use RegFile::GPR;
3011 b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR);
3012 b.copy_alu_src_if_i20_overflow(&mut self.stream, GPR, SrcType::ALU);
3013 }
3014
encode(&self, e: &mut SM50Encoder<'_>)3015 fn encode(&self, e: &mut SM50Encoder<'_>) {
3016 match &self.stream.src_ref {
3017 SrcRef::Zero | SrcRef::Reg(_) => {
3018 e.set_opcode(0xfbe0);
3019 e.set_reg_src(20..28, self.stream);
3020 }
3021 SrcRef::Imm32(imm32) => {
3022 e.set_opcode(0xf6e0);
3023 e.set_src_imm_i20(20..39, 56, *imm32);
3024 }
3025 SrcRef::CBuf(cbuf) => {
3026 e.set_opcode(0xebe0);
3027 e.set_src_cb(20..39, cbuf);
3028 }
3029 src => panic!("Invalid out stream: {src}"),
3030 }
3031
3032 e.set_field(
3033 39..41,
3034 match self.out_type {
3035 OutType::Emit => 1_u8,
3036 OutType::Cut => 2_u8,
3037 OutType::EmitThenCut => 3_u8,
3038 },
3039 );
3040
3041 e.set_reg_src(8..16, self.handle);
3042 e.set_dst(self.dst);
3043 }
3044 }
3045
3046 macro_rules! as_sm50_op_match {
3047 ($op: expr) => {
3048 match $op {
3049 Op::FAdd(op) => op,
3050 Op::FMnMx(op) => op,
3051 Op::FMul(op) => op,
3052 Op::FFma(op) => op,
3053 Op::FSet(op) => op,
3054 Op::FSetP(op) => op,
3055 Op::FSwzAdd(op) => op,
3056 Op::Rro(op) => op,
3057 Op::MuFu(op) => op,
3058 Op::Flo(op) => op,
3059 Op::DAdd(op) => op,
3060 Op::DFma(op) => op,
3061 Op::DMnMx(op) => op,
3062 Op::DMul(op) => op,
3063 Op::DSetP(op) => op,
3064 Op::IAdd2(op) => op,
3065 Op::IAdd2X(op) => op,
3066 Op::Mov(op) => op,
3067 Op::Sel(op) => op,
3068 Op::Shfl(op) => op,
3069 Op::Vote(op) => op,
3070 Op::PSetP(op) => op,
3071 Op::SuSt(op) => op,
3072 Op::S2R(op) => op,
3073 Op::PopC(op) => op,
3074 Op::Prmt(op) => op,
3075 Op::Ld(op) => op,
3076 Op::Ldc(op) => op,
3077 Op::St(op) => op,
3078 Op::Lop2(op) => op,
3079 Op::Shf(op) => op,
3080 Op::Shl(op) => op,
3081 Op::Shr(op) => op,
3082 Op::F2F(op) => op,
3083 Op::F2I(op) => op,
3084 Op::I2F(op) => op,
3085 Op::I2I(op) => op,
3086 Op::IMad(op) => op,
3087 Op::IMul(op) => op,
3088 Op::IMnMx(op) => op,
3089 Op::ISetP(op) => op,
3090 Op::Tex(op) => op,
3091 Op::Tld(op) => op,
3092 Op::Tld4(op) => op,
3093 Op::Tmml(op) => op,
3094 Op::Txd(op) => op,
3095 Op::Txq(op) => op,
3096 Op::Ipa(op) => op,
3097 Op::AL2P(op) => op,
3098 Op::ALd(op) => op,
3099 Op::ASt(op) => op,
3100 Op::CCtl(op) => op,
3101 Op::MemBar(op) => op,
3102 Op::Atom(op) => op,
3103 Op::Bra(op) => op,
3104 Op::SSy(op) => op,
3105 Op::Sync(op) => op,
3106 Op::Brk(op) => op,
3107 Op::PBk(op) => op,
3108 Op::Cont(op) => op,
3109 Op::PCnt(op) => op,
3110 Op::Exit(op) => op,
3111 Op::Bar(op) => op,
3112 Op::SuLd(op) => op,
3113 Op::SuAtom(op) => op,
3114 Op::Kill(op) => op,
3115 Op::CS2R(op) => op,
3116 Op::Nop(op) => op,
3117 Op::PixLd(op) => op,
3118 Op::Isberd(op) => op,
3119 Op::Out(op) => op,
3120 Op::Bfe(op) => op,
3121 _ => panic!("Unhandled instruction {}", $op),
3122 }
3123 };
3124 }
3125
as_sm50_op(op: &Op) -> &dyn SM50Op3126 fn as_sm50_op(op: &Op) -> &dyn SM50Op {
3127 as_sm50_op_match!(op)
3128 }
3129
as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op3130 fn as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op {
3131 as_sm50_op_match!(op)
3132 }
3133
encode_instr( instr_index: usize, instr: Option<&Box<Instr>>, sm: &ShaderModel50, labels: &HashMap<Label, usize>, ip: &mut usize, sched_instr: &mut [u32; 2], ) -> [u32; 2]3134 fn encode_instr(
3135 instr_index: usize,
3136 instr: Option<&Box<Instr>>,
3137 sm: &ShaderModel50,
3138 labels: &HashMap<Label, usize>,
3139 ip: &mut usize,
3140 sched_instr: &mut [u32; 2],
3141 ) -> [u32; 2] {
3142 let mut e = SM50Encoder {
3143 sm,
3144 ip: *ip,
3145 labels,
3146 inst: [0_u32; 2],
3147 sched: 0,
3148 };
3149
3150 if let Some(instr) = instr {
3151 as_sm50_op(&instr.op).encode(&mut e);
3152 e.set_pred(&instr.pred);
3153 e.set_instr_deps(&instr.deps);
3154 } else {
3155 let nop = OpNop { label: None };
3156 nop.encode(&mut e);
3157 e.set_pred(&true.into());
3158 e.set_instr_deps(&InstrDeps::new());
3159 }
3160
3161 *ip += 8;
3162
3163 BitMutView::new(sched_instr)
3164 .set_field(21 * instr_index..21 * (instr_index + 1), e.sched);
3165
3166 e.inst
3167 }
3168
encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32>3169 fn encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32> {
3170 assert!(s.functions.len() == 1);
3171 let func = &s.functions[0];
3172
3173 let mut num_instrs = 0_usize;
3174 let mut labels = HashMap::new();
3175 for b in &func.blocks {
3176 // We ensure blocks will have groups of 3 instructions with a
3177 // schedule instruction before each groups. As we should never jump
3178 // to a schedule instruction, we account for that here.
3179 labels.insert(b.label, num_instrs + 8);
3180
3181 let block_num_instrs = b.instrs.len().next_multiple_of(3);
3182
3183 // Every 3 instructions, we have a new schedule instruction so we
3184 // need to account for that.
3185 num_instrs += (block_num_instrs + (block_num_instrs / 3)) * 8;
3186 }
3187
3188 let mut encoded = Vec::new();
3189 for b in &func.blocks {
3190 // A block is composed of groups of 3 instructions.
3191 let block_num_instrs = b.instrs.len().next_multiple_of(3);
3192
3193 let mut instrs_iter = b.instrs.iter();
3194
3195 for _ in 0..(block_num_instrs / 3) {
3196 let mut ip = ((encoded.len() / 2) + 1) * 8;
3197
3198 let mut sched_instr = [0x0; 2];
3199
3200 let instr0 = encode_instr(
3201 0,
3202 instrs_iter.next(),
3203 sm,
3204 &labels,
3205 &mut ip,
3206 &mut sched_instr,
3207 );
3208 let instr1 = encode_instr(
3209 1,
3210 instrs_iter.next(),
3211 sm,
3212 &labels,
3213 &mut ip,
3214 &mut sched_instr,
3215 );
3216 let instr2 = encode_instr(
3217 2,
3218 instrs_iter.next(),
3219 sm,
3220 &labels,
3221 &mut ip,
3222 &mut sched_instr,
3223 );
3224
3225 encoded.extend_from_slice(&sched_instr[..]);
3226 encoded.extend_from_slice(&instr0[..]);
3227 encoded.extend_from_slice(&instr1[..]);
3228 encoded.extend_from_slice(&instr2[..]);
3229 }
3230 }
3231
3232 encoded
3233 }
3234