xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/ctpop.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=SI %s
2*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
3*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.ctpop.i32(i32) nounwind readnone
6*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
7*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
8*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
9*9880d681SAndroid Build Coastguard Workerdeclare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}s_ctpop_i32:
12*9880d681SAndroid Build Coastguard Worker; GCN: s_load_dword [[SVAL:s[0-9]+]],
13*9880d681SAndroid Build Coastguard Worker; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
14*9880d681SAndroid Build Coastguard Worker; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
15*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[VRESULT]],
16*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
17*9880d681SAndroid Build Coastguard Worker
18*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
19*9880d681SAndroid Build Coastguard Workerdefine void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
20*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
21*9880d681SAndroid Build Coastguard Worker  store i32 %ctpop, i32 addrspace(1)* %out, align 4
22*9880d681SAndroid Build Coastguard Worker  ret void
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Worker; XXX - Why 0 in register?
26*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32:
27*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL:v[0-9]+]],
28*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
29*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
30*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
33*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
34*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
35*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
36*9880d681SAndroid Build Coastguard Worker  store i32 %ctpop, i32 addrspace(1)* %out, align 4
37*9880d681SAndroid Build Coastguard Worker  ret void
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
41*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL1:v[0-9]+]],
42*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
43*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], [[VAL1]], 0
44*9880d681SAndroid Build Coastguard Worker; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
45*9880d681SAndroid Build Coastguard Worker; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
46*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
47*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
50*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
51*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
52*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32 addrspace(1)* %in0, align 4
53*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32 addrspace(1)* %in1, align 4
54*9880d681SAndroid Build Coastguard Worker  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
55*9880d681SAndroid Build Coastguard Worker  %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
56*9880d681SAndroid Build Coastguard Worker  %add = add i32 %ctpop0, %ctpop1
57*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
58*9880d681SAndroid Build Coastguard Worker  ret void
59*9880d681SAndroid Build Coastguard Worker}
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
62*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL0:v[0-9]+]],
63*9880d681SAndroid Build Coastguard Worker; GCN: s_waitcnt
64*9880d681SAndroid Build Coastguard Worker; GCN-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
65*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
66*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
67*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
68*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32 addrspace(1)* %in0, align 4
69*9880d681SAndroid Build Coastguard Worker  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
70*9880d681SAndroid Build Coastguard Worker  %add = add i32 %ctpop0, %sval
71*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
72*9880d681SAndroid Build Coastguard Worker  ret void
73*9880d681SAndroid Build Coastguard Worker}
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_v2i32:
76*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
77*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
78*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
81*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
82*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
83*9880d681SAndroid Build Coastguard Worker  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
84*9880d681SAndroid Build Coastguard Worker  %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
85*9880d681SAndroid Build Coastguard Worker  store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
86*9880d681SAndroid Build Coastguard Worker  ret void
87*9880d681SAndroid Build Coastguard Worker}
88*9880d681SAndroid Build Coastguard Worker
89*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_v4i32:
90*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
91*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
92*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
93*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
94*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
95*9880d681SAndroid Build Coastguard Worker
96*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
97*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
98*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
99*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
100*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
101*9880d681SAndroid Build Coastguard Worker  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
102*9880d681SAndroid Build Coastguard Worker  %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
103*9880d681SAndroid Build Coastguard Worker  store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
104*9880d681SAndroid Build Coastguard Worker  ret void
105*9880d681SAndroid Build Coastguard Worker}
106*9880d681SAndroid Build Coastguard Worker
107*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_v8i32:
108*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
109*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
110*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
111*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
112*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
113*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
114*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
115*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
116*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
117*9880d681SAndroid Build Coastguard Worker
118*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
119*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
120*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
121*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
122*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
123*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
124*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
125*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
126*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
127*9880d681SAndroid Build Coastguard Worker  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
128*9880d681SAndroid Build Coastguard Worker  %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
129*9880d681SAndroid Build Coastguard Worker  store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
130*9880d681SAndroid Build Coastguard Worker  ret void
131*9880d681SAndroid Build Coastguard Worker}
132*9880d681SAndroid Build Coastguard Worker
133*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_v16i32:
134*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
135*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
136*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
137*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
138*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
139*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
140*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
141*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
142*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
143*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
144*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
145*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
146*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
147*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
148*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
149*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64
150*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
151*9880d681SAndroid Build Coastguard Worker
152*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
153*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
154*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
155*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
156*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
157*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
158*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
159*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
160*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
161*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
162*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
163*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
164*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
165*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
166*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
167*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
168*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
169*9880d681SAndroid Build Coastguard Worker  %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
170*9880d681SAndroid Build Coastguard Worker  %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
171*9880d681SAndroid Build Coastguard Worker  store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
172*9880d681SAndroid Build Coastguard Worker  ret void
173*9880d681SAndroid Build Coastguard Worker}
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
176*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL:v[0-9]+]],
177*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
178*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
179*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
182*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
183*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
184*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
185*9880d681SAndroid Build Coastguard Worker  %add = add i32 %ctpop, 4
186*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
187*9880d681SAndroid Build Coastguard Worker  ret void
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
191*9880d681SAndroid Build Coastguard Worker; GCN: buffer_load_dword [[VAL:v[0-9]+]],
192*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
193*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
194*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
197*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
198*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
199*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
200*9880d681SAndroid Build Coastguard Worker  %add = add i32 4, %ctpop
201*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
202*9880d681SAndroid Build Coastguard Worker  ret void
203*9880d681SAndroid Build Coastguard Worker}
204*9880d681SAndroid Build Coastguard Worker
205*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
206*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
207*9880d681SAndroid Build Coastguard Worker; GCN-DAG: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
208*9880d681SAndroid Build Coastguard Worker; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
209*9880d681SAndroid Build Coastguard Worker; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
210*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
211*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
212*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
213*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
214*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
215*9880d681SAndroid Build Coastguard Worker  %add = add i32 %ctpop, 99999
216*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
217*9880d681SAndroid Build Coastguard Worker  ret void
218*9880d681SAndroid Build Coastguard Worker}
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
221*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
222*9880d681SAndroid Build Coastguard Worker; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
223*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
224*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
225*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
226*9880d681SAndroid Build Coastguard Worker
227*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
228*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
229*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
230*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
231*9880d681SAndroid Build Coastguard Worker  %add = add i32 %ctpop, %const
232*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
233*9880d681SAndroid Build Coastguard Worker  ret void
234*9880d681SAndroid Build Coastguard Worker}
235*9880d681SAndroid Build Coastguard Worker
236*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
237*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]],
238*9880d681SAndroid Build Coastguard Worker; GCN-DAG: s_load_dword [[VAR:s[0-9]+]],
239*9880d681SAndroid Build Coastguard Worker; GCN: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
240*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
241*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
242*9880d681SAndroid Build Coastguard Worker
243*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
244*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
245*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
246*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
247*9880d681SAndroid Build Coastguard Worker  %add = add i32 %const, %ctpop
248*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
249*9880d681SAndroid Build Coastguard Worker  ret void
250*9880d681SAndroid Build Coastguard Worker}
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
253*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
254*9880d681SAndroid Build Coastguard Worker; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
255*9880d681SAndroid Build Coastguard Worker; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
256*9880d681SAndroid Build Coastguard Worker; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
257*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
258*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
261*9880d681SAndroid Build Coastguard Workerdefine void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
262*9880d681SAndroid Build Coastguard Worker  %val = load i32, i32 addrspace(1)* %in, align 4
263*9880d681SAndroid Build Coastguard Worker  %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
264*9880d681SAndroid Build Coastguard Worker  %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
265*9880d681SAndroid Build Coastguard Worker  %const = load i32, i32 addrspace(1)* %gep, align 4
266*9880d681SAndroid Build Coastguard Worker  %add = add i32 %const, %ctpop
267*9880d681SAndroid Build Coastguard Worker  store i32 %add, i32 addrspace(1)* %out, align 4
268*9880d681SAndroid Build Coastguard Worker  ret void
269*9880d681SAndroid Build Coastguard Worker}
270*9880d681SAndroid Build Coastguard Worker
271*9880d681SAndroid Build Coastguard Worker; FIXME: We currently disallow SALU instructions in all branches,
272*9880d681SAndroid Build Coastguard Worker; but there are some cases when the should be allowed.
273*9880d681SAndroid Build Coastguard Worker
274*9880d681SAndroid Build Coastguard Worker; FUNC-LABEL: {{^}}ctpop_i32_in_br:
275*9880d681SAndroid Build Coastguard Worker; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
276*9880d681SAndroid Build Coastguard Worker; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x34
277*9880d681SAndroid Build Coastguard Worker; GCN: s_bcnt1_i32_b32  [[SRESULT:s[0-9]+]], [[VAL]]
278*9880d681SAndroid Build Coastguard Worker; GCN: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
279*9880d681SAndroid Build Coastguard Worker; GCN: buffer_store_dword [[RESULT]],
280*9880d681SAndroid Build Coastguard Worker; GCN: s_endpgm
281*9880d681SAndroid Build Coastguard Worker; EG: BCNT_INT
282*9880d681SAndroid Build Coastguard Workerdefine void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
283*9880d681SAndroid Build Coastguard Workerentry:
284*9880d681SAndroid Build Coastguard Worker  %tmp0 = icmp eq i32 %cond, 0
285*9880d681SAndroid Build Coastguard Worker  br i1 %tmp0, label %if, label %else
286*9880d681SAndroid Build Coastguard Worker
287*9880d681SAndroid Build Coastguard Workerif:
288*9880d681SAndroid Build Coastguard Worker  %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg)
289*9880d681SAndroid Build Coastguard Worker  br label %endif
290*9880d681SAndroid Build Coastguard Worker
291*9880d681SAndroid Build Coastguard Workerelse:
292*9880d681SAndroid Build Coastguard Worker  %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
293*9880d681SAndroid Build Coastguard Worker  %tmp4 = load i32, i32 addrspace(1)* %tmp3
294*9880d681SAndroid Build Coastguard Worker  br label %endif
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Workerendif:
297*9880d681SAndroid Build Coastguard Worker  %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else]
298*9880d681SAndroid Build Coastguard Worker  store i32 %tmp5, i32 addrspace(1)* %out
299*9880d681SAndroid Build Coastguard Worker  ret void
300*9880d681SAndroid Build Coastguard Worker}
301