1 // Copyright © 2022 Collabora, Ltd. 2 // SPDX-License-Identifier: MIT 3 4 use crate::ir::*; 5 6 use std::collections::HashMap; 7 8 enum CBufRule { 9 Yes, 10 No, 11 BindlessRequiresBlock(usize), 12 } 13 14 impl CBufRule { allows_src(&self, src_bi: usize, src: &Src) -> bool15 fn allows_src(&self, src_bi: usize, src: &Src) -> bool { 16 let SrcRef::CBuf(cb) = &src.src_ref else { 17 return true; 18 }; 19 20 match self { 21 CBufRule::Yes => true, 22 CBufRule::No => false, 23 CBufRule::BindlessRequiresBlock(bi) => match cb.buf { 24 CBuf::Binding(_) => true, 25 CBuf::BindlessSSA(_) => src_bi == *bi, 26 CBuf::BindlessUGPR(_) => panic!("Not in SSA form"), 27 }, 28 } 29 } 30 } 31 32 struct CopyEntry { 33 bi: usize, 34 src_type: SrcType, 35 src: Src, 36 } 37 38 struct PrmtEntry { 39 bi: usize, 40 sel: PrmtSel, 41 srcs: [Src; 2], 42 } 43 44 enum CopyPropEntry { 45 Copy(CopyEntry), 46 Prmt(PrmtEntry), 47 } 48 49 struct CopyPropPass { 50 ssa_map: HashMap<SSAValue, CopyPropEntry>, 51 } 52 53 impl CopyPropPass { new() -> CopyPropPass54 pub fn new() -> CopyPropPass { 55 CopyPropPass { 56 ssa_map: HashMap::new(), 57 } 58 } 59 add_copy( &mut self, bi: usize, dst: SSAValue, src_type: SrcType, src: Src, )60 fn add_copy( 61 &mut self, 62 bi: usize, 63 dst: SSAValue, 64 src_type: SrcType, 65 src: Src, 66 ) { 67 assert!(src.src_ref.get_reg().is_none()); 68 self.ssa_map 69 .insert(dst, CopyPropEntry::Copy(CopyEntry { bi, src_type, src })); 70 } 71 add_prmt( &mut self, bi: usize, dst: SSAValue, sel: PrmtSel, srcs: [Src; 2], )72 fn add_prmt( 73 &mut self, 74 bi: usize, 75 dst: SSAValue, 76 sel: PrmtSel, 77 srcs: [Src; 2], 78 ) { 79 assert!( 80 srcs[0].src_ref.get_reg().is_none() 81 && srcs[1].src_ref.get_reg().is_none() 82 ); 83 self.ssa_map 84 .insert(dst, CopyPropEntry::Prmt(PrmtEntry { bi, sel, srcs })); 85 } 86 add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src)87 fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) { 88 assert!(dst.comps() == 2); 89 match src.src_ref { 90 SrcRef::Zero | SrcRef::Imm32(_) => { 91 self.add_copy(bi, dst[0], SrcType::ALU, Src::new_zero()); 92 self.add_copy(bi, dst[1], SrcType::F64, src); 93 } 94 SrcRef::CBuf(cb) => { 95 let lo32 = Src::from(SrcRef::CBuf(cb)); 96 let hi32 = Src { 97 src_ref: SrcRef::CBuf(cb.offset(4)), 98 src_mod: src.src_mod, 99 src_swizzle: src.src_swizzle, 100 }; 101 self.add_copy(bi, dst[0], SrcType::ALU, lo32); 102 self.add_copy(bi, dst[1], SrcType::F64, hi32); 103 } 104 SrcRef::SSA(ssa) => { 105 assert!(ssa.comps() == 2); 106 let lo32 = Src::from(ssa[0]); 107 let hi32 = Src { 108 src_ref: ssa[1].into(), 109 src_mod: src.src_mod, 110 src_swizzle: src.src_swizzle, 111 }; 112 self.add_copy(bi, dst[0], SrcType::ALU, lo32); 113 self.add_copy(bi, dst[1], SrcType::F64, hi32); 114 } 115 _ => (), 116 } 117 } 118 get_copy(&self, dst: &SSAValue) -> Option<&CopyPropEntry>119 fn get_copy(&self, dst: &SSAValue) -> Option<&CopyPropEntry> { 120 self.ssa_map.get(dst) 121 } 122 prop_to_pred(&self, pred: &mut Pred)123 fn prop_to_pred(&self, pred: &mut Pred) { 124 loop { 125 let src_ssa = match &pred.pred_ref { 126 PredRef::SSA(ssa) => ssa, 127 _ => return, 128 }; 129 130 let Some(CopyPropEntry::Copy(entry)) = self.get_copy(src_ssa) 131 else { 132 return; 133 }; 134 135 match entry.src.src_ref { 136 SrcRef::True => { 137 pred.pred_ref = PredRef::None; 138 } 139 SrcRef::False => { 140 pred.pred_ref = PredRef::None; 141 pred.pred_inv = !pred.pred_inv; 142 } 143 SrcRef::SSA(ssa) => { 144 assert!(ssa.comps() == 1); 145 pred.pred_ref = PredRef::SSA(ssa[0]); 146 } 147 _ => return, 148 } 149 150 match entry.src.src_mod { 151 SrcMod::None => (), 152 SrcMod::BNot => { 153 pred.pred_inv = !pred.pred_inv; 154 } 155 _ => panic!("Invalid predicate modifier"), 156 } 157 } 158 } 159 prop_to_ssa_ref(&self, src_ssa: &mut SSARef) -> bool160 fn prop_to_ssa_ref(&self, src_ssa: &mut SSARef) -> bool { 161 let mut progress = false; 162 163 for c in 0..src_ssa.comps() { 164 let c_ssa = &mut src_ssa[usize::from(c)]; 165 let Some(CopyPropEntry::Copy(entry)) = self.get_copy(c_ssa) else { 166 continue; 167 }; 168 169 if entry.src.src_mod.is_none() { 170 if let SrcRef::SSA(entry_ssa) = entry.src.src_ref { 171 assert!(entry_ssa.comps() == 1); 172 *c_ssa = entry_ssa[0]; 173 progress = true; 174 } 175 } 176 } 177 178 progress 179 } 180 prop_to_ssa_src(&self, src: &mut Src)181 fn prop_to_ssa_src(&self, src: &mut Src) { 182 assert!(src.src_mod.is_none()); 183 if let SrcRef::SSA(src_ssa) = &mut src.src_ref { 184 loop { 185 if !self.prop_to_ssa_ref(src_ssa) { 186 break; 187 } 188 } 189 } 190 } 191 prop_to_gpr_src(&self, src: &mut Src)192 fn prop_to_gpr_src(&self, src: &mut Src) { 193 loop { 194 let src_ssa = match &mut src.src_ref { 195 SrcRef::SSA(ssa) => { 196 // First, try to propagate SSA components 197 if self.prop_to_ssa_ref(ssa) { 198 continue; 199 } 200 ssa 201 } 202 _ => return, 203 }; 204 205 for c in 0..usize::from(src_ssa.comps()) { 206 let Some(CopyPropEntry::Copy(entry)) = 207 self.get_copy(&src_ssa[c]) 208 else { 209 return; 210 }; 211 212 match entry.src.src_ref { 213 SrcRef::Zero | SrcRef::Imm32(0) => (), 214 _ => return, 215 } 216 } 217 218 // If we got here, all the components are zero 219 src.src_ref = SrcRef::Zero; 220 } 221 } 222 prop_to_scalar_src( &self, src_type: SrcType, cbuf_rule: &CBufRule, src: &mut Src, )223 fn prop_to_scalar_src( 224 &self, 225 src_type: SrcType, 226 cbuf_rule: &CBufRule, 227 src: &mut Src, 228 ) { 229 loop { 230 let src_ssa = match &src.src_ref { 231 SrcRef::SSA(ssa) => ssa, 232 _ => return, 233 }; 234 235 assert!(src_ssa.comps() == 1); 236 let entry = match self.get_copy(&src_ssa[0]) { 237 Some(e) => e, 238 None => return, 239 }; 240 241 match entry { 242 CopyPropEntry::Copy(entry) => { 243 if !cbuf_rule.allows_src(entry.bi, &entry.src) { 244 return; 245 } 246 247 // If there are modifiers, the source types have to match 248 if !entry.src.src_mod.is_none() 249 && entry.src_type != src_type 250 { 251 return; 252 } 253 254 src.src_ref = entry.src.src_ref; 255 src.src_mod = entry.src.src_mod.modify(src.src_mod); 256 } 257 CopyPropEntry::Prmt(entry) => { 258 // Turn the swizzle into a permute. For F16, we use Xx to 259 // indicate that it only takes the bottom 16 bits. 260 let swizzle_prmt: [u8; 4] = match src_type { 261 SrcType::F16 => [0, 1, 0, 1], 262 SrcType::F16v2 => match src.src_swizzle { 263 SrcSwizzle::None => [0, 1, 2, 3], 264 SrcSwizzle::Xx => [0, 1, 0, 1], 265 SrcSwizzle::Yy => [2, 3, 2, 3], 266 }, 267 _ => [0, 1, 2, 3], 268 }; 269 270 let mut entry_src_idx = None; 271 let mut combined = [0_u8; 4]; 272 273 for i in 0..4 { 274 let prmt_byte = entry.sel.get(swizzle_prmt[i].into()); 275 276 // If we have a sign extension, we cannot simplify it. 277 if prmt_byte.msb() { 278 return; 279 } 280 281 // Ensure we are using the same source, we cannot 282 // combine multiple sources. 283 if entry_src_idx.is_none() { 284 entry_src_idx = Some(prmt_byte.src()); 285 } else if entry_src_idx != Some(prmt_byte.src()) { 286 return; 287 } 288 289 combined[i] = prmt_byte.byte().try_into().unwrap(); 290 } 291 292 let entry_src_idx = usize::from(entry_src_idx.unwrap()); 293 let entry_src = entry.srcs[entry_src_idx]; 294 295 if !cbuf_rule.allows_src(entry.bi, &entry_src) { 296 return; 297 } 298 299 // See if that permute is a valid swizzle 300 let new_swizzle = match src_type { 301 SrcType::F16 => { 302 if combined != [0, 1, 0, 1] { 303 return; 304 } 305 SrcSwizzle::None 306 } 307 SrcType::F16v2 => match combined { 308 [0, 1, 2, 3] => SrcSwizzle::None, 309 [0, 1, 0, 1] => SrcSwizzle::Xx, 310 [2, 3, 2, 3] => SrcSwizzle::Yy, 311 _ => return, 312 }, 313 _ => { 314 if combined != [0, 1, 2, 3] { 315 return; 316 } 317 SrcSwizzle::None 318 } 319 }; 320 321 src.src_ref = entry_src.src_ref; 322 src.src_mod = entry_src.src_mod.modify(src.src_mod); 323 src.src_swizzle = new_swizzle; 324 } 325 } 326 } 327 } 328 prop_to_f64_src(&self, cbuf_rule: &CBufRule, src: &mut Src)329 fn prop_to_f64_src(&self, cbuf_rule: &CBufRule, src: &mut Src) { 330 loop { 331 let src_ssa = match &mut src.src_ref { 332 SrcRef::SSA(ssa) => ssa, 333 _ => return, 334 }; 335 336 assert!(src_ssa.comps() == 2); 337 338 // First, try to propagate the two halves individually. Source 339 // modifiers only apply to the high 32 bits so we have to reject 340 // any copies with source modifiers in the low bits and apply 341 // source modifiers as needed when propagating the high bits. 342 let lo_entry_or_none = self.get_copy(&src_ssa[0]); 343 if let Some(CopyPropEntry::Copy(lo_entry)) = lo_entry_or_none { 344 if lo_entry.src.src_mod.is_none() { 345 if let SrcRef::SSA(lo_entry_ssa) = lo_entry.src.src_ref { 346 src_ssa[0] = lo_entry_ssa[0]; 347 continue; 348 } 349 } 350 } 351 352 let hi_entry_or_none = self.get_copy(&src_ssa[1]); 353 if let Some(CopyPropEntry::Copy(hi_entry)) = hi_entry_or_none { 354 if hi_entry.src.src_mod.is_none() 355 || hi_entry.src_type == SrcType::F64 356 { 357 if let SrcRef::SSA(hi_entry_ssa) = hi_entry.src.src_ref { 358 src_ssa[1] = hi_entry_ssa[0]; 359 src.src_mod = hi_entry.src.src_mod.modify(src.src_mod); 360 continue; 361 } 362 } 363 } 364 365 let Some(CopyPropEntry::Copy(lo_entry)) = lo_entry_or_none else { 366 return; 367 }; 368 369 let Some(CopyPropEntry::Copy(hi_entry)) = hi_entry_or_none else { 370 return; 371 }; 372 373 if !lo_entry.src.src_mod.is_none() { 374 return; 375 } 376 377 if !hi_entry.src.src_mod.is_none() 378 && hi_entry.src_type != SrcType::F64 379 { 380 return; 381 } 382 383 if !cbuf_rule.allows_src(hi_entry.bi, &hi_entry.src) 384 || !cbuf_rule.allows_src(lo_entry.bi, &lo_entry.src) 385 { 386 return; 387 } 388 389 let new_src_ref = match hi_entry.src.src_ref { 390 SrcRef::Zero => match lo_entry.src.src_ref { 391 SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero, 392 _ => return, 393 }, 394 SrcRef::Imm32(i) => { 395 // 32-bit immediates for f64 srouces are the top 32 bits 396 // with zero in the lower 32. 397 match lo_entry.src.src_ref { 398 SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(i), 399 _ => return, 400 } 401 } 402 SrcRef::CBuf(hi_cb) => match lo_entry.src.src_ref { 403 SrcRef::CBuf(lo_cb) => { 404 if hi_cb.buf != lo_cb.buf { 405 return; 406 } 407 if lo_cb.offset % 8 != 0 { 408 return; 409 } 410 if hi_cb.offset != lo_cb.offset + 4 { 411 return; 412 } 413 SrcRef::CBuf(lo_cb) 414 } 415 _ => return, 416 }, 417 // SrcRef::SSA is already handled above 418 _ => return, 419 }; 420 421 src.src_ref = new_src_ref; 422 src.src_mod = hi_entry.src.src_mod.modify(src.src_mod); 423 } 424 } 425 prop_to_src( &self, src_type: SrcType, cbuf_rule: &CBufRule, src: &mut Src, )426 fn prop_to_src( 427 &self, 428 src_type: SrcType, 429 cbuf_rule: &CBufRule, 430 src: &mut Src, 431 ) { 432 match src_type { 433 SrcType::SSA => { 434 self.prop_to_ssa_src(src); 435 } 436 SrcType::GPR => { 437 self.prop_to_gpr_src(src); 438 } 439 SrcType::ALU 440 | SrcType::F16 441 | SrcType::F16v2 442 | SrcType::F32 443 | SrcType::I32 444 | SrcType::B32 445 | SrcType::Pred => { 446 self.prop_to_scalar_src(src_type, cbuf_rule, src); 447 } 448 SrcType::F64 => { 449 self.prop_to_f64_src(cbuf_rule, src); 450 } 451 SrcType::Carry | SrcType::Bar => (), 452 } 453 } 454 try_add_instr(&mut self, bi: usize, instr: &Instr)455 fn try_add_instr(&mut self, bi: usize, instr: &Instr) { 456 match &instr.op { 457 Op::HAdd2(add) => { 458 let dst = add.dst.as_ssa().unwrap(); 459 assert!(dst.comps() == 1); 460 let dst = dst[0]; 461 462 if !add.saturate { 463 if add.srcs[0].is_fneg_zero(SrcType::F16v2) { 464 self.add_copy(bi, dst, SrcType::F16v2, add.srcs[1]); 465 } else if add.srcs[1].is_fneg_zero(SrcType::F16v2) { 466 self.add_copy(bi, dst, SrcType::F16v2, add.srcs[0]); 467 } 468 } 469 } 470 Op::FAdd(add) => { 471 let dst = add.dst.as_ssa().unwrap(); 472 assert!(dst.comps() == 1); 473 let dst = dst[0]; 474 475 if !add.saturate { 476 if add.srcs[0].is_fneg_zero(SrcType::F32) { 477 self.add_copy(bi, dst, SrcType::F32, add.srcs[1]); 478 } else if add.srcs[1].is_fneg_zero(SrcType::F32) { 479 self.add_copy(bi, dst, SrcType::F32, add.srcs[0]); 480 } 481 } 482 } 483 Op::DAdd(add) => { 484 let dst = add.dst.as_ssa().unwrap(); 485 if add.srcs[0].is_fneg_zero(SrcType::F64) { 486 self.add_fp64_copy(bi, dst, add.srcs[1]); 487 } else if add.srcs[1].is_fneg_zero(SrcType::F64) { 488 self.add_fp64_copy(bi, dst, add.srcs[0]); 489 } 490 } 491 Op::Lop3(lop) => { 492 let dst = lop.dst.as_ssa().unwrap(); 493 assert!(dst.comps() == 1); 494 let dst = dst[0]; 495 496 let op = lop.op; 497 if op.lut == 0 { 498 self.add_copy(bi, dst, SrcType::ALU, SrcRef::Zero.into()); 499 } else if op.lut == !0 { 500 self.add_copy( 501 bi, 502 dst, 503 SrcType::ALU, 504 SrcRef::Imm32(u32::MAX).into(), 505 ); 506 } else { 507 for s in 0..3 { 508 if op.lut == LogicOp3::SRC_MASKS[s] { 509 self.add_copy(bi, dst, SrcType::ALU, lop.srcs[s]); 510 } 511 } 512 } 513 } 514 Op::PLop3(lop) => { 515 for i in 0..2 { 516 let dst = match lop.dsts[i] { 517 Dst::SSA(vec) => { 518 assert!(vec.comps() == 1); 519 vec[0] 520 } 521 _ => continue, 522 }; 523 524 let op = lop.ops[i]; 525 if op.lut == 0 { 526 self.add_copy( 527 bi, 528 dst, 529 SrcType::Pred, 530 SrcRef::False.into(), 531 ); 532 } else if op.lut == !0 { 533 self.add_copy( 534 bi, 535 dst, 536 SrcType::Pred, 537 SrcRef::True.into(), 538 ); 539 } else { 540 for s in 0..3 { 541 if op.lut == LogicOp3::SRC_MASKS[s] { 542 self.add_copy( 543 bi, 544 dst, 545 SrcType::Pred, 546 lop.srcs[s], 547 ); 548 } else if op.lut == !LogicOp3::SRC_MASKS[s] { 549 self.add_copy( 550 bi, 551 dst, 552 SrcType::Pred, 553 lop.srcs[s].bnot(), 554 ); 555 } 556 } 557 } 558 } 559 } 560 Op::IAdd2(add) => { 561 let dst = add.dst.as_ssa().unwrap(); 562 assert!(dst.comps() == 1); 563 let dst = dst[0]; 564 565 if add.srcs[0].is_zero() { 566 self.add_copy(bi, dst, SrcType::I32, add.srcs[1]); 567 } else if add.srcs[1].is_zero() { 568 self.add_copy(bi, dst, SrcType::I32, add.srcs[0]); 569 } 570 } 571 Op::IAdd3(add) => { 572 let dst = add.dst.as_ssa().unwrap(); 573 assert!(dst.comps() == 1); 574 let dst = dst[0]; 575 576 if add.srcs[0].is_zero() { 577 if add.srcs[1].is_zero() { 578 self.add_copy(bi, dst, SrcType::I32, add.srcs[2]); 579 } else if add.srcs[2].is_zero() { 580 self.add_copy(bi, dst, SrcType::I32, add.srcs[1]); 581 } 582 } else if add.srcs[1].is_zero() && add.srcs[2].is_zero() { 583 self.add_copy(bi, dst, SrcType::I32, add.srcs[0]); 584 } 585 } 586 Op::Prmt(prmt) => { 587 let dst = prmt.dst.as_ssa().unwrap(); 588 assert!(dst.comps() == 1); 589 if let Some(sel) = prmt.get_sel() { 590 if let Some(imm) = prmt.as_u32() { 591 self.add_copy(bi, dst[0], SrcType::GPR, imm.into()); 592 } else if sel == PrmtSel(0x3210) { 593 self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); 594 } else if sel == PrmtSel(0x7654) { 595 self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); 596 } else { 597 self.add_prmt(bi, dst[0], sel, prmt.srcs); 598 } 599 } 600 } 601 Op::R2UR(r2ur) => { 602 assert!(r2ur.src.src_mod.is_none()); 603 if r2ur.src.is_uniform() { 604 let dst = r2ur.dst.as_ssa().unwrap(); 605 assert!(dst.comps() == 1); 606 self.add_copy(bi, dst[0], SrcType::GPR, r2ur.src); 607 } 608 } 609 Op::Copy(copy) => { 610 let dst = copy.dst.as_ssa().unwrap(); 611 assert!(dst.comps() == 1); 612 self.add_copy(bi, dst[0], SrcType::GPR, copy.src); 613 } 614 Op::ParCopy(pcopy) => { 615 for (dst, src) in pcopy.dsts_srcs.iter() { 616 let dst = dst.as_ssa().unwrap(); 617 assert!(dst.comps() == 1); 618 self.add_copy(bi, dst[0], SrcType::GPR, *src); 619 } 620 } 621 _ => (), 622 } 623 } 624 run(&mut self, f: &mut Function)625 pub fn run(&mut self, f: &mut Function) { 626 for (bi, b) in f.blocks.iter_mut().enumerate() { 627 let b_uniform = b.uniform; 628 for instr in &mut b.instrs { 629 self.try_add_instr(bi, instr); 630 631 self.prop_to_pred(&mut instr.pred); 632 633 let cbuf_rule = if instr.is_uniform() { 634 CBufRule::No 635 } else if !b_uniform { 636 CBufRule::BindlessRequiresBlock(bi) 637 } else { 638 CBufRule::Yes 639 }; 640 641 match &mut instr.op { 642 Op::IAdd2(add) => { 643 // Carry-out interacts funny with SrcMod::INeg so we can 644 // only propagate with modifiers if no carry is written. 645 use SrcType::{ALU, I32}; 646 let [src0, src1] = &mut add.srcs; 647 if add.carry_out.is_none() { 648 self.prop_to_src(I32, &cbuf_rule, src0); 649 self.prop_to_src(I32, &cbuf_rule, src1); 650 } else { 651 self.prop_to_src(ALU, &cbuf_rule, src0); 652 self.prop_to_src(ALU, &cbuf_rule, src1); 653 } 654 } 655 Op::IAdd3(add) => { 656 // Overflow interacts funny with SrcMod::INeg so we can 657 // only propagate with modifiers if no overflow values 658 // are written. 659 use SrcType::{ALU, I32}; 660 let [src0, src1, src2] = &mut add.srcs; 661 if add.overflow[0].is_none() 662 && add.overflow[0].is_none() 663 { 664 self.prop_to_src(I32, &cbuf_rule, src0); 665 self.prop_to_src(I32, &cbuf_rule, src1); 666 self.prop_to_src(I32, &cbuf_rule, src2); 667 } else { 668 self.prop_to_src(ALU, &cbuf_rule, src0); 669 self.prop_to_src(ALU, &cbuf_rule, src1); 670 self.prop_to_src(ALU, &cbuf_rule, src2); 671 } 672 } 673 _ => { 674 let src_types = instr.src_types(); 675 for (i, src) in instr.srcs_mut().iter_mut().enumerate() 676 { 677 self.prop_to_src(src_types[i], &cbuf_rule, src); 678 } 679 } 680 } 681 } 682 } 683 } 684 } 685 686 impl Shader<'_> { opt_copy_prop(&mut self)687 pub fn opt_copy_prop(&mut self) { 688 for f in &mut self.functions { 689 CopyPropPass::new().run(f); 690 } 691 } 692 } 693