xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/test_scheduler.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "helpers.h"
7 
8 using namespace aco;
9 
10 BEGIN_TEST(vopd_sched.commutative)
11    if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32))
12       return;
13 
14    PhysReg reg_v0{256};
15    PhysReg reg_v1{257};
16    PhysReg reg_v2{258};
17    PhysReg reg_v3{259};
18 
19    //>> p_unit_test 0
20    //! v1: %0:v[1] = v_dual_add_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_mul_f32 %0:v[2], %0:v[3]
21    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
22    bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
23             Operand(reg_v3, v1));
24    bld.vop2(aco_opcode::v_add_f32, Definition(reg_v1, v1), Operand(reg_v2, v1),
25             Operand(reg_v3, v1));
26 
27    /* Neither of these opcodes are commutative. */
28    bld.reset(program->create_and_insert_block());
29    //>> p_unit_test 1
30    //! v1: %0:v[0] = v_fmamk_f32 %0:v[2], %0:v[3], 0
31    //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3]
32    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
33    bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
34             Operand(reg_v3, v1), Operand::zero());
35    bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
36             Operand(reg_v3, v1));
37 
38    /* We have to change the opcode for subtractions. */
39    bld.reset(program->create_and_insert_block());
40    //>> p_unit_test 2
41    //! v1: %0:v[1] = v_dual_subrev_f32 %0:v[3], %0:v[2] :: v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0
42    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
43    bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
44             Operand(reg_v3, v1), Operand::zero());
45    bld.vop2(aco_opcode::v_sub_f32, Definition(reg_v1, v1), Operand(reg_v2, v1),
46             Operand(reg_v3, v1));
47 
48    bld.reset(program->create_and_insert_block());
49    //>> p_unit_test 3
50    //! v1: %0:v[0] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[1] = v_dual_sub_f32 %0:v[3], %0:v[2]
51    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
52    bld.vop2(aco_opcode::v_subrev_f32, Definition(reg_v1, v1), Operand(reg_v2, v1),
53             Operand(reg_v3, v1));
54    bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
55             Operand(reg_v3, v1), Operand::zero());
56 
57    /* If we have to move the second instruction into OPY instead of OPX, then swapping must still be
58     * correct. */
59    bld.reset(program->create_and_insert_block());
60    //>> p_unit_test 4
61    //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3]
62    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
63    bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
64             Operand(reg_v3, v1));
65    bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
66             Operand(reg_v3, v1));
67 
68    bld.reset(program->create_and_insert_block());
69    //>> p_unit_test 5
70    //! v1: %0:v[0] = v_dual_mul_f32 %0:v[3], %0:v[2] :: v1: %0:v[1] = v_dual_lshlrev_b32 %0:v[2], %0:v[3]
71    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
72    bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
73             Operand(reg_v3, v1));
74    bld.vop2(aco_opcode::v_mul_f32, Definition(reg_v0, v1), Operand(reg_v2, v1),
75             Operand(reg_v3, v1));
76 
77    finish_schedule_vopd_test();
78 END_TEST
79 
80 BEGIN_TEST(vopd_sched.mov_to_add_bfrev)
81    if (!setup_cs(NULL, GFX11, CHIP_UNKNOWN, "", 32))
82       return;
83 
84    PhysReg reg_v0{256};
85    PhysReg reg_v1{257};
86    PhysReg reg_v2{258};
87    PhysReg reg_v3{259};
88 
89    //>> p_unit_test 0
90    //! v1: %0:v[1] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2]
91    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
92    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
93    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v2, v1));
94 
95    /* We can't turn the v_mov_b32 into a v_add_u32 because then both instructions would be OPY-only.
96     */
97    bld.reset(program->create_and_insert_block());
98    //>> p_unit_test 1
99    //! v1: %0:v[0] = v_mov_b32 %0:v[2]
100    //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3]
101    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
102    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
103    bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
104             Operand(reg_v3, v1));
105 
106    bld.reset(program->create_and_insert_block());
107    //>> p_unit_test 2
108    //! v1: %0:v[1] = v_lshlrev_b32 %0:v[2], %0:v[3]
109    //! v1: %0:v[0] = v_mov_b32 %0:v[2]
110    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
111    bld.vop2(aco_opcode::v_lshlrev_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
112             Operand(reg_v3, v1));
113    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
114 
115    bld.reset(program->create_and_insert_block());
116    //>> p_unit_test 3
117    //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2]
118    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
119    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
120    bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
121             Operand(reg_v3, v1));
122 
123    bld.reset(program->create_and_insert_block());
124    //>> p_unit_test 4
125    //! v1: %0:v[0] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[1] = v_dual_and_b32 %0:v[3], %0:v[2]
126    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
127    bld.vop2(aco_opcode::v_and_b32, Definition(reg_v1, v1), Operand(reg_v2, v1),
128             Operand(reg_v3, v1));
129    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
130 
131    /* The v_add_u32 should be OPY, not OPX. */
132    bld.reset(program->create_and_insert_block());
133    //>> p_unit_test 5
134    //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2]
135    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
136    bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1),
137             Operand(reg_v3, v1), Operand::zero());
138    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
139 
140    bld.reset(program->create_and_insert_block());
141    //>> p_unit_test 6
142    //! v1: %0:v[1] = v_dual_fmamk_f32 %0:v[2], %0:v[3], 0 :: v1: %0:v[0] = v_dual_add_nc_u32 0, %0:v[2]
143    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
144    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v0, v1), Operand(reg_v2, v1));
145    bld.vop2(aco_opcode::v_fmamk_f32, Definition(reg_v1, v1), Operand(reg_v2, v1),
146             Operand(reg_v3, v1), Operand::zero());
147 
148    //>> p_unit_test 7
149    //! v1: %0:v[1] = v_dual_mov_b32 %0:v[2] :: v1: %0:v[0] = v_dual_mov_b32 0x3c000000
150    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
151    bld.vop1(aco_opcode::v_bfrev_b32, Definition(reg_v0, v1), Operand::c32(60));
152    bld.vop1(aco_opcode::v_mov_b32, Definition(reg_v1, v1), Operand(reg_v2, v1));
153 
154    finish_schedule_vopd_test();
155 END_TEST
156