xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/no-shrink-extloads.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; Make sure we don't turn the 32-bit argument load into a 16-bit
6*9880d681SAndroid Build Coastguard Worker; load. There aren't extending scalar lods, so that would require
7*9880d681SAndroid Build Coastguard Worker; using a buffer_load instruction.
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
10*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
11*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_short v
12*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
13*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %arg to i16
14*9880d681SAndroid Build Coastguard Worker  store i16 %trunc, i16 addrspace(1)* %out
15*9880d681SAndroid Build Coastguard Worker  ret void
16*9880d681SAndroid Build Coastguard Worker}
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Worker; It should be OK (and probably performance neutral) to reduce this,
19*9880d681SAndroid Build Coastguard Worker; but we don't know if the load is uniform yet.
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
22*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
23*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_short v
24*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
25*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
26*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
27*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
28*9880d681SAndroid Build Coastguard Worker  %load = load i32, i32 addrspace(1)* %gep.in
29*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %load to i16
30*9880d681SAndroid Build Coastguard Worker  store i16 %trunc, i16 addrspace(1)* %gep.out
31*9880d681SAndroid Build Coastguard Worker  ret void
32*9880d681SAndroid Build Coastguard Worker}
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
35*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
36*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
37*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
38*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %arg to i8
39*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %out
40*9880d681SAndroid Build Coastguard Worker  ret void
41*9880d681SAndroid Build Coastguard Worker}
42*9880d681SAndroid Build Coastguard Worker
43*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
44*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
45*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
46*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
47*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
48*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
49*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
50*9880d681SAndroid Build Coastguard Worker  %load = load i32, i32 addrspace(1)* %gep.in
51*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %load to i8
52*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %gep.out
53*9880d681SAndroid Build Coastguard Worker  ret void
54*9880d681SAndroid Build Coastguard Worker}
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
57*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
58*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
59*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
60*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %arg to i1
61*9880d681SAndroid Build Coastguard Worker  store i1 %trunc, i1 addrspace(1)* %out
62*9880d681SAndroid Build Coastguard Worker  ret void
63*9880d681SAndroid Build Coastguard Worker}
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
66*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
67*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
68*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
69*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
70*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
71*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
72*9880d681SAndroid Build Coastguard Worker  %load = load i32, i32 addrspace(1)* %gep.in
73*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i32 %load to i1
74*9880d681SAndroid Build Coastguard Worker  store i1 %trunc, i1 addrspace(1)* %gep.out
75*9880d681SAndroid Build Coastguard Worker  ret void
76*9880d681SAndroid Build Coastguard Worker}
77*9880d681SAndroid Build Coastguard Worker
78*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
79*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
80*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword v
81*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
82*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %arg to i32
83*9880d681SAndroid Build Coastguard Worker  store i32 %trunc, i32 addrspace(1)* %out
84*9880d681SAndroid Build Coastguard Worker  ret void
85*9880d681SAndroid Build Coastguard Worker}
86*9880d681SAndroid Build Coastguard Worker
87*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
88*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
89*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword v
90*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
91*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
92*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
93*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
94*9880d681SAndroid Build Coastguard Worker  %load = load i64, i64 addrspace(1)* %gep.in
95*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %load to i32
96*9880d681SAndroid Build Coastguard Worker  store i32 %trunc, i32 addrspace(1)* %gep.out
97*9880d681SAndroid Build Coastguard Worker  ret void
98*9880d681SAndroid Build Coastguard Worker}
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
101*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
102*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword v
103*9880d681SAndroid Build Coastguard Workerdefine void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
104*9880d681SAndroid Build Coastguard Worker  %srl = lshr i64 %arg, 32
105*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %srl to i32
106*9880d681SAndroid Build Coastguard Worker  store i32 %trunc, i32 addrspace(1)* %out
107*9880d681SAndroid Build Coastguard Worker  ret void
108*9880d681SAndroid Build Coastguard Worker}
109*9880d681SAndroid Build Coastguard Worker
110*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
111*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
112*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword v
113*9880d681SAndroid Build Coastguard Workerdefine void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
114*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
115*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
116*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
117*9880d681SAndroid Build Coastguard Worker  %load = load i64, i64 addrspace(1)* %gep.in
118*9880d681SAndroid Build Coastguard Worker  %srl = lshr i64 %load, 32
119*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %srl to i32
120*9880d681SAndroid Build Coastguard Worker  store i32 %trunc, i32 addrspace(1)* %gep.out
121*9880d681SAndroid Build Coastguard Worker  ret void
122*9880d681SAndroid Build Coastguard Worker}
123*9880d681SAndroid Build Coastguard Worker
124*9880d681SAndroid Build Coastguard Worker; Might as well reduce to 8-bit loads.
125*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
126*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
127*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
128*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
129*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i16 %arg to i8
130*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %out
131*9880d681SAndroid Build Coastguard Worker  ret void
132*9880d681SAndroid Build Coastguard Worker}
133*9880d681SAndroid Build Coastguard Worker
134*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
135*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_ubyte v
136*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
137*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
138*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
139*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
140*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
141*9880d681SAndroid Build Coastguard Worker  %load = load i16, i16 addrspace(1)* %gep.in
142*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i16 %load to i8
143*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %gep.out
144*9880d681SAndroid Build Coastguard Worker  ret void
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
148*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
149*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
150*9880d681SAndroid Build Coastguard Workerdefine void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
151*9880d681SAndroid Build Coastguard Worker  %srl = lshr i64 %arg, 32
152*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %srl to i8
153*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %out
154*9880d681SAndroid Build Coastguard Worker  ret void
155*9880d681SAndroid Build Coastguard Worker}
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
158*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
159*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
160*9880d681SAndroid Build Coastguard Workerdefine void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
161*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
162*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
163*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
164*9880d681SAndroid Build Coastguard Worker  %load = load i64, i64 addrspace(1)* %gep.in
165*9880d681SAndroid Build Coastguard Worker  %srl = lshr i64 %load, 32
166*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %srl to i8
167*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %gep.out
168*9880d681SAndroid Build Coastguard Worker  ret void
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
172*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword s
173*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
174*9880d681SAndroid Build Coastguard Workerdefine void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
175*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %arg to i8
176*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %out
177*9880d681SAndroid Build Coastguard Worker  ret void
178*9880d681SAndroid Build Coastguard Worker}
179*9880d681SAndroid Build Coastguard Worker
180*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
181*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
182*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_byte v
183*9880d681SAndroid Build Coastguard Workerdefine void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
184*9880d681SAndroid Build Coastguard Worker  %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
185*9880d681SAndroid Build Coastguard Worker  %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
186*9880d681SAndroid Build Coastguard Worker  %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
187*9880d681SAndroid Build Coastguard Worker  %load = load i64, i64 addrspace(1)* %gep.in
188*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %load to i8
189*9880d681SAndroid Build Coastguard Worker  store i8 %trunc, i8 addrspace(1)* %gep.out
190*9880d681SAndroid Build Coastguard Worker  ret void
191*9880d681SAndroid Build Coastguard Worker}
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16
194*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
195*9880d681SAndroid Build Coastguard Worker; SI: s_waitcnt lgkmcnt(0)
196*9880d681SAndroid Build Coastguard Worker; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
197*9880d681SAndroid Build Coastguard Workerdefine void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
198*9880d681SAndroid Build Coastguard Workerentry:
199*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(2)* %in
200*9880d681SAndroid Build Coastguard Worker  %mask = and i32 %val, 65535
201*9880d681SAndroid Build Coastguard Worker  store i32 %mask, i32 addrspace(1)* %out
202*9880d681SAndroid Build Coastguard Worker  ret void
203*9880d681SAndroid Build Coastguard Worker}
204*9880d681SAndroid Build Coastguard Worker
205*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}extract_hi_i64_bitcast_v2i32:
206*9880d681SAndroid Build Coastguard Worker; SI: buffer_load_dword v
207*9880d681SAndroid Build Coastguard Worker; SI: buffer_store_dword v
208*9880d681SAndroid Build Coastguard Workerdefine void @extract_hi_i64_bitcast_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
209*9880d681SAndroid Build Coastguard Worker  %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
210*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <2 x i32> %ld to i64
211*9880d681SAndroid Build Coastguard Worker  %hi = lshr i64 %bc, 32
212*9880d681SAndroid Build Coastguard Worker  %trunc = trunc i64 %hi to i32
213*9880d681SAndroid Build Coastguard Worker  store i32 %trunc, i32 addrspace(1)* %out
214*9880d681SAndroid Build Coastguard Worker  ret void
215*9880d681SAndroid Build Coastguard Worker}
216