1 /* 2 * Copyright 2024 Valve Corporation 3 * SPDX-License-Identifier: MIT 4 */ 5 6 #include "helpers.h" 7 8 using namespace aco; 9 10 BEGIN_TEST(vopd_sched.commutative) 11 if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32)) 12 return; 13 14 PhysReg reg_v0{256}; 15 PhysReg reg_v1{257}; 16 PhysReg reg_v2{258}; 17 PhysReg reg_v3{259}; 18 19 //>> p_unit_test 0 20 //! v1: %0:v[1] = v_dual_add_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_mul_f32 %0:v[2], %0:v[3] 21 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 22 bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 23 Operand(reg_v3, v1)); 24 bld.vop2(aco_opcode::v_add_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), 25 Operand(reg_v3, v1)); 26 27 /* Neither of these opcodes are commutative. */ 28 bld.reset(program->create_and_insert_block()); 29 //>> p_unit_test 1 30 //! v1: %0:v[0] = v_fmamk_f32 %0:v[2], %0:v[3], 0 31 //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] 32 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 33 bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 34 Operand(reg_v3, v1), Operand::zero()); 35 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 36 Operand(reg_v3, v1)); 37 38 /* We have to change the opcode for subtractions. */ 39 bld.reset(program->create_and_insert_block()); 40 //>> p_unit_test 2 41 //! v1: %0:v[1] = v_dual_subrev_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 42 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 43 bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 44 Operand(reg_v3, v1), Operand::zero()); 45 bld.vop2(aco_opcode::v_sub_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), 46 Operand(reg_v3, v1)); 47 48 bld.reset(program->create_and_insert_block()); 49 //>> p_unit_test 3 50 //! v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[1] = v_dual_sub_f32 %0:v[3], %0:v[2] 51 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 52 bld.vop2(aco_opcode::v_subrev_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), 53 Operand(reg_v3, v1)); 54 bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 55 Operand(reg_v3, v1), Operand::zero()); 56 57 /* If we have to move the second instruction into OPY instead of OPX, then swapping must still be 58 * correct. */ 59 bld.reset(program->create_and_insert_block()); 60 //>> p_unit_test 4 61 //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3] 62 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 63 bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 64 Operand(reg_v3, v1)); 65 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 66 Operand(reg_v3, v1)); 67 68 bld.reset(program->create_and_insert_block()); 69 //>> p_unit_test 5 70 //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3] 71 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); 72 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 73 Operand(reg_v3, v1)); 74 bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1), 75 Operand(reg_v3, v1)); 76 77 finish_schedule_vopd_test(); 78 END_TEST 79 80 BEGIN_TEST(vopd_sched.mov_to_add_bfrev) 81 if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32)) 82 return; 83 84 PhysReg reg_v0{256}; 85 PhysReg reg_v1{257}; 86 PhysReg reg_v2{258}; 87 PhysReg reg_v3{259}; 88 89 //>> p_unit_test 0 90 //! v1: %0:v[1] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] 91 bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); 92 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 93 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v2, v1)); 94 95 /* We can't turn the v_mov_b32 into a v_add_u32 because then both instructions would be OPY-only. 96 */ 97 bld.reset(program->create_and_insert_block()); 98 //>> p_unit_test 1 99 //! v1: %0:v[0] = v_mov_b32 %0:v[2] 100 //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] 101 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1)); 102 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 103 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 104 Operand(reg_v3, v1)); 105 106 bld.reset(program->create_and_insert_block()); 107 //>> p_unit_test 2 108 //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3] 109 //! v1: %0:v[0] = v_mov_b32 %0:v[2] 110 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2)); 111 bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 112 Operand(reg_v3, v1)); 113 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 114 115 bld.reset(program->create_and_insert_block()); 116 //>> p_unit_test 3 117 //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2] 118 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3)); 119 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 120 bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 121 Operand(reg_v3, v1)); 122 123 bld.reset(program->create_and_insert_block()); 124 //>> p_unit_test 4 125 //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2] 126 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); 127 bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1), 128 Operand(reg_v3, v1)); 129 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 130 131 /* The v_add_u32 should be OPY, not OPX. */ 132 bld.reset(program->create_and_insert_block()); 133 //>> p_unit_test 5 134 //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] 135 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); 136 bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), 137 Operand(reg_v3, v1), Operand::zero()); 138 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 139 140 bld.reset(program->create_and_insert_block()); 141 //>> p_unit_test 6 142 //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2] 143 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6)); 144 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1)); 145 bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1), 146 Operand(reg_v3, v1), Operand::zero()); 147 148 //>> p_unit_test 7 149 //! v1: %0:v[1] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[0] = v_dual_mov_b32 0x3c000000 150 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7)); 151 bld.vop1(aco_opcode::v_bfrev_b32, Definition(reg_v0, v1), Operand::c32(60)); 152 bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v2, v1)); 153 154 finish_schedule_vopd_test(); 155 END_TEST 156