xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/tests/test_lower_subdword.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "helpers.h"
7 #include <stdarg.h>
8 
9 using namespace aco;
10 
11 BEGIN_TEST(lower_subdword.simple_extract)
12    //>> v1: %a, v2: %b = p_startpgm
13    if (!setup_cs("v1 v2", GFX6))
14       return;
15 
16    //! v1: %aw0 = p_parallelcopy %a
17    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[0], Operand::c32(0));
18 
19    //! v1: %aw1 = v_lshrrev_b32 16, %a
20    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[0], Operand::c32(1));
21 
22    //! v1: %ab3 = v_lshrrev_b32 24, %a
23    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[0], Operand::c32(3));
24 
25    //! v1: %_,  v1: %bd1 = p_split_vector %b
26    //! v1: %6 = v_lshrrev_b32 16, %bd1
27    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand::c32(3));
28 
29    //! v1: %bd0,  v1: %_ = p_split_vector %b
30    //! v1: %bb2 = v_lshrrev_b32 16, %bd0
31    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), inputs[1], Operand::c32(2));
32 
33    //! v1: %bd0_2,  v1: %bd1_2 = p_split_vector %b
34    //! v1: %bv3bhi = v_alignbyte_b32 %bd1_2, %bd0_2, 3
35    bld.pseudo(aco_opcode::p_extract_vector, bld.def(v3b), inputs[1], Operand::c32(1));
36 
37    finish_lower_subdword_test();
38 END_TEST
39 
40 BEGIN_TEST(lower_subdword.simple_split)
41    //>> v1: %a, v2: %b = p_startpgm
42    if (!setup_cs("v1 v2", GFX6))
43       return;
44 
45    //! v1: %o1_0 = p_parallelcopy %a
46    //! v1: %o1_1 = v_lshrrev_b32 8, %a
47    //! v1: %o1_2 = v_lshrrev_b32 24, %a
48    bld.pseudo(aco_opcode::p_split_vector, bld.def(v1b), bld.def(v2b), bld.def(v1b), inputs[0]);
49 
50    //! v1: %tmp2_0,  v1: %_ = p_split_vector %b
51    //! v1: %o2_0 = p_parallelcopy %tmp2_0
52    //! v1: %tmp2_1,  v1: %_ = p_split_vector %b
53    //! v1: %o2_1 = v_lshrrev_b32 8, %tmp2_1
54    //! v1: %tmp2_2,  v1: %_ = p_split_vector %b
55    //! v1: %o2_2 = v_lshrrev_b32 16, %tmp2_2
56    //! v1: %tmp2_3,  v1: %_ = p_split_vector %b
57    //! v1: %o2_3 = v_lshrrev_b32 24, %tmp2_3
58    //! v1: %_,  v1: %tmp2_4 = p_split_vector %b
59    //! v1: %o2_4 = p_parallelcopy %tmp2_4
60    //! v1: %_,  v1: %tmp2_5 = p_split_vector %b
61    //! v1: %o2_5 = v_lshrrev_b32 8, %tmp2_5
62    //! v1: %_,  v1: %tmp2_6 = p_split_vector %b
63    //! v1: %o2_6 = v_lshrrev_b32 16, %tmp2_6
64    //! v1: %_,  v1: %tmp2_7 = p_split_vector %b
65    //! v1: %o2_7 = v_lshrrev_b32 24, %tmp2_7
66    bld.pseudo(aco_opcode::p_split_vector, bld.def(v1b), bld.def(v1b), bld.def(v1b), bld.def(v1b),
67               bld.def(v1b), bld.def(v1b), bld.def(v1b), bld.def(v1b), inputs[1]);
68 
69    finish_lower_subdword_test();
70 END_TEST
71 
72 BEGIN_TEST(lower_subdword.simple_create)
73    //>> v1: %b,  v1: %w,  v1: %tb,  s1: %sd,  v1: %vd = p_startpgm
74    if (!setup_cs("v1b v2b v3b s1 v1 ", GFX6))
75       return;
76 
77    //! v1: %tmp1_0 = v_lshlrev_b32 24, %b
78    //! v1: %tmp1_1 = v_alignbyte_b32 %b, %tmp1_0, 1
79    //! v1: %tmp1_2 = v_alignbyte_b32 %b, %tmp1_1, 1
80    //! v1: %o1 = v_alignbyte_b32 %b, %tmp1_2, 1
81    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], inputs[0], inputs[0], inputs[0]);
82 
83    //! v1: %tmp2_0 = v_lshlrev_b32 24, %b
84    //! v1: %tmp2_1 = v_alignbyte_b32 %w, %tmp2_0, 2
85    //! v1: %o2 = v_alignbyte_b32 %b, %tmp2_1, 1
86    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], inputs[1], inputs[0]);
87 
88    //! v1: %tmp3_0 = v_lshlrev_b32 24, %b
89    //! v1: %tmp3_1 = v_alignbyte_b32 %tb, %tmp3_0, 3
90    //! v1: %tmp3_2 = v_lshlrev_b32 16, %w
91    //! v1: %tmp3_3 = v_alignbyte_b32 %b, %tmp3_2, 1
92    //! v1: %tmp3_4 = v_alignbyte_b32 %b, %tmp3_3, 1
93    //! v2: %o3 = p_create_vector %tmp3_1, %tmp3_4
94    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), inputs[0], inputs[2], inputs[1], inputs[0],
95               inputs[0]);
96 
97    //! v1: %tmp4_0 = p_parallelcopy %sd
98    //! v1: %tmp4_1 = p_parallelcopy %vd
99    //! v1: %tmp4_2 = v_lshlrev_b32 16, %w
100    //! v1: %tmp4_3 = v_alignbyte_b32 %tb, %tmp4_2, 2
101    //! v1: %tmp4_4 = v_lshlrev_b32 8, %tb
102    //! v1: %tmp4_5 = v_alignbyte_b32 %tb, %tmp4_4, 3
103    //! v4: %o4 = p_create_vector %tmp4_0, %tmp4_1, %tmp4_3, %tmp4_5
104    bld.pseudo(aco_opcode::p_create_vector, bld.def(v4), inputs[3], inputs[4], inputs[1], inputs[2],
105               inputs[2]);
106 
107    //! v1: %tmp5_0 = v_lshlrev_b32 24, %b
108    //! v1: %o5 = v_alignbyte_b32 %b, %tmp5_0, 3
109    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2b), inputs[0], inputs[0]);
110 
111    finish_lower_subdword_test();
112 END_TEST
113 
114 BEGIN_TEST(lower_subdword.create_const_undef)
115    //>> v1: %b,  v1: %w = p_startpgm
116    if (!setup_cs("v1b v2b", GFX6))
117       return;
118 
119    //! v1: %o1 = p_parallelcopy %b
120    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), inputs[0], Operand(v1b), Operand(v2b));
121 
122    //! v1: %o2 = p_parallelcopy 0x40300
123    bld.pseudo(aco_opcode::p_create_vector, bld.def(v3b), Operand::c8(0), Operand::c8(3),
124               Operand::c8(4));
125 
126    //! s1: %tmp1 = p_parallelcopy 0x64000000
127    //! v1: %o3 = v_alignbyte_b32 %w, %tmp1, 3
128    bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::c8(100), inputs[1], Operand(v1b));
129 
130    //! s1: %tmp2 = p_parallelcopy 0x64000000
131    //! v1: %tmp3 = v_alignbyte_b32 %b, %tmp2, 2
132    //! v1: %tmp4 = v_alignbyte_b32 %b, %tmp3, 1
133    //! v1: %tmp5 = p_parallelcopy %w
134    //! v2: %o4 = p_create_vector %tmp4, %tmp5
135    bld.pseudo(aco_opcode::p_create_vector, bld.def(v6b), Operand::c8(100), inputs[0], Operand(v1b),
136               inputs[0], inputs[1]);
137 
138    //! v1: %o5 = v_alignbyte_b32 %b, 0, 3
139    bld.pseudo(aco_opcode::p_create_vector, bld.def(v2b), Operand(v1b), inputs[0]);
140 
141    finish_lower_subdword_test();
142 END_TEST
143