xref: /aosp_15_r20/external/llvm/test/Transforms/InstCombine/x86-vperm2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -instcombine -S | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
6*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
7*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_non_const_imm
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
11*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double>
12*9880d681SAndroid Build Coastguard Worker}
13*9880d681SAndroid Build Coastguard Worker
14*9880d681SAndroid Build Coastguard Worker
15*9880d681SAndroid Build Coastguard Worker; In the following 4 tests, both zero mask bits of the immediate are set.
16*9880d681SAndroid Build Coastguard Worker
17*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
18*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
19*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
20*9880d681SAndroid Build Coastguard Worker
21*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x88
22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> zeroinitializer
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Worker
25*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
26*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
27*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
28*9880d681SAndroid Build Coastguard Worker
29*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2ps_0x88
30*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <8 x float> zeroinitializer
31*9880d681SAndroid Build Coastguard Worker}
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
34*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
35*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
36*9880d681SAndroid Build Coastguard Worker
37*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2si_0x88
38*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <8 x i32> zeroinitializer
39*9880d681SAndroid Build Coastguard Worker}
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
42*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
43*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
44*9880d681SAndroid Build Coastguard Worker
45*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2i_0x88
46*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x i64> zeroinitializer
47*9880d681SAndroid Build Coastguard Worker}
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker
50*9880d681SAndroid Build Coastguard Worker; The other control bits are ignored when zero mask bits of the immediate are set.
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
53*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
54*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0xff
57*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> zeroinitializer
58*9880d681SAndroid Build Coastguard Worker}
59*9880d681SAndroid Build Coastguard Worker
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Worker; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
62*9880d681SAndroid Build Coastguard Worker; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
65*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
66*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
67*9880d681SAndroid Build Coastguard Worker
68*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x00
69*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
71*9880d681SAndroid Build Coastguard Worker}
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
74*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
75*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
76*9880d681SAndroid Build Coastguard Worker
77*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x01
78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
79*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
83*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
84*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
85*9880d681SAndroid Build Coastguard Worker
86*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x02
87*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
88*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
89*9880d681SAndroid Build Coastguard Worker}
90*9880d681SAndroid Build Coastguard Worker
91*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
92*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
93*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
94*9880d681SAndroid Build Coastguard Worker
95*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x03
96*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
97*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
98*9880d681SAndroid Build Coastguard Worker}
99*9880d681SAndroid Build Coastguard Worker
100*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
101*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
102*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x10
105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %a0
106*9880d681SAndroid Build Coastguard Worker}
107*9880d681SAndroid Build Coastguard Worker
108*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
109*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
110*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
111*9880d681SAndroid Build Coastguard Worker
112*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x11
113*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
114*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
115*9880d681SAndroid Build Coastguard Worker}
116*9880d681SAndroid Build Coastguard Worker
117*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
118*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
119*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
120*9880d681SAndroid Build Coastguard Worker
121*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x12
122*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
127*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
128*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x13
131*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
132*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
133*9880d681SAndroid Build Coastguard Worker}
134*9880d681SAndroid Build Coastguard Worker
135*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
136*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
137*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
138*9880d681SAndroid Build Coastguard Worker
139*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x20
140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
142*9880d681SAndroid Build Coastguard Worker}
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
145*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
146*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
147*9880d681SAndroid Build Coastguard Worker
148*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x21
149*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
150*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
151*9880d681SAndroid Build Coastguard Worker}
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
154*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
155*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
156*9880d681SAndroid Build Coastguard Worker
157*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x22
158*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
159*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
160*9880d681SAndroid Build Coastguard Worker}
161*9880d681SAndroid Build Coastguard Worker
162*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
163*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
164*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
165*9880d681SAndroid Build Coastguard Worker
166*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x23
167*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
172*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
173*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x30
176*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
177*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
178*9880d681SAndroid Build Coastguard Worker}
179*9880d681SAndroid Build Coastguard Worker
180*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
181*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
182*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
183*9880d681SAndroid Build Coastguard Worker
184*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x31
185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
186*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
187*9880d681SAndroid Build Coastguard Worker}
188*9880d681SAndroid Build Coastguard Worker
189*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
190*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
191*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
192*9880d681SAndroid Build Coastguard Worker
193*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x32
194*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %a1
195*9880d681SAndroid Build Coastguard Worker}
196*9880d681SAndroid Build Coastguard Worker
197*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
198*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
199*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
200*9880d681SAndroid Build Coastguard Worker
201*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x33
202*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
203*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double> %1
204*9880d681SAndroid Build Coastguard Worker}
205*9880d681SAndroid Build Coastguard Worker
206*9880d681SAndroid Build Coastguard Worker; Confirm that a mask for 32-bit elements is also correct.
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
209*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
210*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2ps_0x31
213*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
214*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <8 x float> %1
215*9880d681SAndroid Build Coastguard Worker}
216*9880d681SAndroid Build Coastguard Worker
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker; Confirm that the AVX2 version works the same.
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
221*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
222*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
223*9880d681SAndroid Build Coastguard Worker
224*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2i_0x33
225*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
226*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x i64> %1
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Worker; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
231*9880d681SAndroid Build Coastguard Worker
232*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
233*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
234*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
235*9880d681SAndroid Build Coastguard Worker
236*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x81
237*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
238*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double>
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
242*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
243*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
244*9880d681SAndroid Build Coastguard Worker
245*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x83
246*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
247*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double>
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Worker
250*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
251*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
252*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
253*9880d681SAndroid Build Coastguard Worker
254*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x28
255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double>
257*9880d681SAndroid Build Coastguard Worker}
258*9880d681SAndroid Build Coastguard Worker
259*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
260*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
261*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2pd_0x08
264*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
265*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x double>
266*9880d681SAndroid Build Coastguard Worker}
267*9880d681SAndroid Build Coastguard Worker
268*9880d681SAndroid Build Coastguard Worker; Check one more with the AVX2 version.
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
271*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
272*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
273*9880d681SAndroid Build Coastguard Worker
274*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @perm2i_0x28
275*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
276*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:  ret <4 x i64>
277*9880d681SAndroid Build Coastguard Worker}
278*9880d681SAndroid Build Coastguard Worker
279*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
280*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
281*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
282*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
283*9880d681SAndroid Build Coastguard Worker
284