xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx | FileCheck %s
3*9880d681SAndroid Build Coastguard Worker
4*9880d681SAndroid Build Coastguard Worker; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
7*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vinsertf128_pd_256_1:
8*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
10*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
11*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
12*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
13*9880d681SAndroid Build Coastguard Worker}
14*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
15*9880d681SAndroid Build Coastguard Worker
16*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
17*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vinsertf128_ps_256_1:
18*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
19*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
20*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
21*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
22*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
23*9880d681SAndroid Build Coastguard Worker}
24*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
25*9880d681SAndroid Build Coastguard Worker
26*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
27*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_1:
28*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
29*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
30*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
31*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
32*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Worker; Verify that high bits of the immediate are masked off. This should be the equivalent
36*9880d681SAndroid Build Coastguard Worker; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
37*9880d681SAndroid Build Coastguard Worker; not a vinsertf128 $1.
38*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
39*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
40*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
41*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    ## kill: %XMM1<def> %XMM1<kill> %YMM1<def>
42*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
44*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
45*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
46*9880d681SAndroid Build Coastguard Worker}
47*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
48*9880d681SAndroid Build Coastguard Worker
49*9880d681SAndroid Build Coastguard Worker; We don't check any vextractf128 variant with immediate 0 because that's just a move.
50*9880d681SAndroid Build Coastguard Worker
51*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
52*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vextractf128_pd_256_1:
53*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
57*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
58*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
59*9880d681SAndroid Build Coastguard Worker}
60*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
61*9880d681SAndroid Build Coastguard Worker
62*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
63*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vextractf128_ps_256_1:
64*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
65*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
66*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
67*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
68*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
69*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
70*9880d681SAndroid Build Coastguard Worker}
71*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
72*9880d681SAndroid Build Coastguard Worker
73*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
74*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vextractf128_si_256_1:
75*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
76*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
77*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
78*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
79*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
80*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
81*9880d681SAndroid Build Coastguard Worker}
82*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
83*9880d681SAndroid Build Coastguard Worker
84*9880d681SAndroid Build Coastguard Worker; Verify that high bits of the immediate are masked off. This should be the equivalent
85*9880d681SAndroid Build Coastguard Worker; of a vextractf128 $0 which should be optimized away, so just check that it's
86*9880d681SAndroid Build Coastguard Worker; not a vextractf128 of any kind.
87*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
88*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
89*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
93*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
94*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
95*9880d681SAndroid Build Coastguard Worker}
96*9880d681SAndroid Build Coastguard Worker
97*9880d681SAndroid Build Coastguard Worker
98*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
99*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_blend_pd_256:
100*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
101*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
102*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
103*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
104*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
105*9880d681SAndroid Build Coastguard Worker}
106*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
107*9880d681SAndroid Build Coastguard Worker
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
110*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_blend_ps_256:
111*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
112*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
113*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
114*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
115*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
116*9880d681SAndroid Build Coastguard Worker}
117*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
118*9880d681SAndroid Build Coastguard Worker
119*9880d681SAndroid Build Coastguard Worker
120*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
121*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_dp_ps_256:
122*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
123*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
124*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
125*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
126*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
127*9880d681SAndroid Build Coastguard Worker}
128*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
129*9880d681SAndroid Build Coastguard Worker
130*9880d681SAndroid Build Coastguard Worker
131*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
132*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_psll_dq:
133*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
134*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
135*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
136*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
137*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Worker
142*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
143*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_psrl_dq:
144*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
145*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
146*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
147*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
148*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
149*9880d681SAndroid Build Coastguard Worker}
150*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
151*9880d681SAndroid Build Coastguard Worker
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
154*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_blendpd:
155*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
156*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
157*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
158*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
159*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
160*9880d681SAndroid Build Coastguard Worker}
161*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
162*9880d681SAndroid Build Coastguard Worker
163*9880d681SAndroid Build Coastguard Worker
164*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
165*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_blendps:
166*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
167*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
168*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
169*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
170*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
171*9880d681SAndroid Build Coastguard Worker}
172*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
173*9880d681SAndroid Build Coastguard Worker
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
176*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pblendw:
177*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
178*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
179*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
180*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
181*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
182*9880d681SAndroid Build Coastguard Worker}
183*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
184*9880d681SAndroid Build Coastguard Worker
185*9880d681SAndroid Build Coastguard Worker
186*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
187*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxbd:
188*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
189*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
190*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
191*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
192*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
193*9880d681SAndroid Build Coastguard Worker}
194*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
195*9880d681SAndroid Build Coastguard Worker
196*9880d681SAndroid Build Coastguard Worker
197*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
198*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxbq:
199*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
200*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
201*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
202*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
203*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
204*9880d681SAndroid Build Coastguard Worker}
205*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Worker
208*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
209*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxbw:
210*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
211*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
212*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
213*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
214*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
215*9880d681SAndroid Build Coastguard Worker}
216*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker
219*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
220*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxdq:
221*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
222*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
223*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
224*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
225*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
226*9880d681SAndroid Build Coastguard Worker}
227*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
231*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxwd:
232*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
233*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
234*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
235*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
236*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
237*9880d681SAndroid Build Coastguard Worker}
238*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
239*9880d681SAndroid Build Coastguard Worker
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
242*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovsxwq:
243*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
244*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
245*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
246*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
247*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
250*9880d681SAndroid Build Coastguard Worker
251*9880d681SAndroid Build Coastguard Worker
252*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
253*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxbd:
254*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
256*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
257*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
258*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
259*9880d681SAndroid Build Coastguard Worker}
260*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Worker
263*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
264*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxbq:
265*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
266*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
267*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
268*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
269*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
270*9880d681SAndroid Build Coastguard Worker}
271*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
272*9880d681SAndroid Build Coastguard Worker
273*9880d681SAndroid Build Coastguard Worker
274*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
275*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxbw:
276*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
277*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
278*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
279*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
280*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res
281*9880d681SAndroid Build Coastguard Worker}
282*9880d681SAndroid Build Coastguard Workerdeclare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
283*9880d681SAndroid Build Coastguard Worker
284*9880d681SAndroid Build Coastguard Worker
285*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
286*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxdq:
287*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
288*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
289*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
290*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
291*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
292*9880d681SAndroid Build Coastguard Worker}
293*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Worker
296*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
297*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxwd:
298*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
299*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
300*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
301*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
302*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
303*9880d681SAndroid Build Coastguard Worker}
304*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
305*9880d681SAndroid Build Coastguard Worker
306*9880d681SAndroid Build Coastguard Worker
307*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
308*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse41_pmovzxwq:
309*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
310*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
311*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
312*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
313*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
314*9880d681SAndroid Build Coastguard Worker}
315*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
316*9880d681SAndroid Build Coastguard Worker
317*9880d681SAndroid Build Coastguard Worker
318*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
319*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
320*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
321*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
322*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
323*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
324*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
325*9880d681SAndroid Build Coastguard Worker}
326*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
327*9880d681SAndroid Build Coastguard Worker
328*9880d681SAndroid Build Coastguard Worker
329*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
330*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
331*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
332*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
333*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
334*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
335*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
336*9880d681SAndroid Build Coastguard Worker}
337*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Worker
340*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
341*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_cvtps2pd:
342*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
343*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
344*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
345*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
346*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
347*9880d681SAndroid Build Coastguard Worker}
348*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
349*9880d681SAndroid Build Coastguard Worker
350*9880d681SAndroid Build Coastguard Worker
351*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
352*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
353*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
354*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
355*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
356*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
357*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
358*9880d681SAndroid Build Coastguard Worker}
359*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
360*9880d681SAndroid Build Coastguard Worker
361*9880d681SAndroid Build Coastguard Worker
362*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
363*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
364*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
365*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
366*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
367*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
368*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
369*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res
370*9880d681SAndroid Build Coastguard Worker}
371*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
372*9880d681SAndroid Build Coastguard Worker
373*9880d681SAndroid Build Coastguard Worker
374*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
375*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
376*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
377*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
378*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
379*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
380*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
381*9880d681SAndroid Build Coastguard Worker}
382*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
383*9880d681SAndroid Build Coastguard Worker
384*9880d681SAndroid Build Coastguard Worker
385*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
386*9880d681SAndroid Build Coastguard Worker  ; add operation forces the execution domain.
387*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_storeu_dq:
388*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
389*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
390*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddb LCPI34_0, %xmm0, %xmm0
391*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
392*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
393*9880d681SAndroid Build Coastguard Worker  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
394*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
395*9880d681SAndroid Build Coastguard Worker  ret void
396*9880d681SAndroid Build Coastguard Worker}
397*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
398*9880d681SAndroid Build Coastguard Worker
399*9880d681SAndroid Build Coastguard Worker
400*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
401*9880d681SAndroid Build Coastguard Worker  ; fadd operation forces the execution domain.
402*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse2_storeu_pd:
403*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
404*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
405*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
406*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
407*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
408*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovupd %xmm0, (%eax)
409*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
410*9880d681SAndroid Build Coastguard Worker  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
411*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
412*9880d681SAndroid Build Coastguard Worker  ret void
413*9880d681SAndroid Build Coastguard Worker}
414*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
415*9880d681SAndroid Build Coastguard Worker
416*9880d681SAndroid Build Coastguard Worker
417*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
418*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_sse_storeu_ps:
419*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
420*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
421*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovups %xmm0, (%eax)
422*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
423*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
424*9880d681SAndroid Build Coastguard Worker  ret void
425*9880d681SAndroid Build Coastguard Worker}
426*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
427*9880d681SAndroid Build Coastguard Worker
428*9880d681SAndroid Build Coastguard Worker
429*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
430*9880d681SAndroid Build Coastguard Worker  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
431*9880d681SAndroid Build Coastguard Worker  ; add operation forces the execution domain.
432*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_storeu_dq_256:
433*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
434*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
435*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
436*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
437*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
438*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
439*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
440*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovups %ymm0, (%eax)
441*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
442*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
443*9880d681SAndroid Build Coastguard Worker  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
444*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
445*9880d681SAndroid Build Coastguard Worker  ret void
446*9880d681SAndroid Build Coastguard Worker}
447*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
448*9880d681SAndroid Build Coastguard Worker
449*9880d681SAndroid Build Coastguard Worker
450*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
451*9880d681SAndroid Build Coastguard Worker  ; add operation forces the execution domain.
452*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_storeu_pd_256:
453*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
454*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
455*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
456*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
457*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovupd %ymm0, (%eax)
458*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
459*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
460*9880d681SAndroid Build Coastguard Worker  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
461*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
462*9880d681SAndroid Build Coastguard Worker  ret void
463*9880d681SAndroid Build Coastguard Worker}
464*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
465*9880d681SAndroid Build Coastguard Worker
466*9880d681SAndroid Build Coastguard Worker
467*9880d681SAndroid Build Coastguard Workerdefine void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
468*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_storeu_ps_256:
469*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
470*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
471*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vmovups %ymm0, (%eax)
472*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vzeroupper
473*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
474*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
475*9880d681SAndroid Build Coastguard Worker  ret void
476*9880d681SAndroid Build Coastguard Worker}
477*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
478*9880d681SAndroid Build Coastguard Worker
479*9880d681SAndroid Build Coastguard Worker
480*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
481*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vpermil_pd:
482*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
483*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
484*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
485*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
486*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
487*9880d681SAndroid Build Coastguard Worker}
488*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
489*9880d681SAndroid Build Coastguard Worker
490*9880d681SAndroid Build Coastguard Worker
491*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
492*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
493*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
494*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
495*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
496*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
497*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
498*9880d681SAndroid Build Coastguard Worker}
499*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
500*9880d681SAndroid Build Coastguard Worker
501*9880d681SAndroid Build Coastguard Worker
502*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
503*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vpermil_ps:
504*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
505*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
506*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
507*9880d681SAndroid Build Coastguard Worker  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
508*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
509*9880d681SAndroid Build Coastguard Worker}
510*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
511*9880d681SAndroid Build Coastguard Worker
512*9880d681SAndroid Build Coastguard Worker
513*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
514*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
515*9880d681SAndroid Build Coastguard Worker; CHECK:       ## BB#0:
516*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
517*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT:    retl
518*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
519*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
520*9880d681SAndroid Build Coastguard Worker}
521*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
522