xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker; Just one 32-bit run to make sure we do reasonable things.
9*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
10*9880d681SAndroid Build Coastguard Worker
11*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @merge_2f64_f64_23(double* %ptr) nounwind uwtable noinline ssp {
12*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_2f64_f64_23:
13*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
14*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 16(%rdi), %xmm0
15*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
16*9880d681SAndroid Build Coastguard Worker;
17*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_2f64_f64_23:
18*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
19*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 16(%rdi), %xmm0
20*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
21*9880d681SAndroid Build Coastguard Worker;
22*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_2f64_f64_23:
23*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
24*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
25*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 16(%eax), %xmm0
26*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
27*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
28*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
29*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double* %ptr0
30*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double* %ptr1
31*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <2 x double> undef, double %val0, i32 0
32*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <2 x double> %res0, double %val1, i32 1
33*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @merge_2i64_i64_12(i64* %ptr) nounwind uwtable noinline ssp {
37*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_2i64_i64_12:
38*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
39*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 8(%rdi), %xmm0
40*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
41*9880d681SAndroid Build Coastguard Worker;
42*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_2i64_i64_12:
43*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
44*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 8(%rdi), %xmm0
45*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
46*9880d681SAndroid Build Coastguard Worker;
47*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_2i64_i64_12:
48*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
49*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
50*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 8(%eax), %xmm0
51*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
52*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
53*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2
54*9880d681SAndroid Build Coastguard Worker  %val0 = load i64, i64* %ptr0
55*9880d681SAndroid Build Coastguard Worker  %val1 = load i64, i64* %ptr1
56*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0
57*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1
58*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res1
59*9880d681SAndroid Build Coastguard Worker}
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_2345(float* %ptr) nounwind uwtable noinline ssp {
62*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4f32_f32_2345:
63*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
64*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 8(%rdi), %xmm0
65*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
66*9880d681SAndroid Build Coastguard Worker;
67*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_2345:
68*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
69*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 8(%rdi), %xmm0
70*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
71*9880d681SAndroid Build Coastguard Worker;
72*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_2345:
73*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
74*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
75*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 8(%eax), %xmm0
76*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
77*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 2
78*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 3
79*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds float, float* %ptr, i64 4
80*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 5
81*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
82*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
83*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float* %ptr2
84*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
85*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
86*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
87*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <4 x float> %res1, float %val2, i32 2
88*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res2, float %val3, i32 3
89*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_3zuu(float* %ptr) nounwind uwtable noinline ssp {
93*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4f32_f32_3zuu:
94*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
96*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
97*9880d681SAndroid Build Coastguard Worker;
98*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_3zuu:
99*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
100*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
101*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
102*9880d681SAndroid Build Coastguard Worker;
103*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_3zuu:
104*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
105*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
106*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
107*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
108*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 3
109*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
110*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
111*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float 0.0, i32 1
112*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
113*9880d681SAndroid Build Coastguard Worker}
114*9880d681SAndroid Build Coastguard Worker
115*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_34uu(float* %ptr) nounwind uwtable noinline ssp {
116*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4f32_f32_34uu:
117*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
118*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
119*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
120*9880d681SAndroid Build Coastguard Worker;
121*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_34uu:
122*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
123*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
124*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
125*9880d681SAndroid Build Coastguard Worker;
126*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_34uu:
127*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
128*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
129*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
130*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
131*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 3
132*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 4
133*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
134*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
135*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
136*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
137*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Worker
140*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_34z6(float* %ptr) nounwind uwtable noinline ssp {
141*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: merge_4f32_f32_34z6:
142*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
143*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movups 12(%rdi), %xmm0
144*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    xorps %xmm1, %xmm1
145*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
146*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
147*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
148*9880d681SAndroid Build Coastguard Worker;
149*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: merge_4f32_f32_34z6:
150*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
151*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movups 12(%rdi), %xmm1
152*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    xorps %xmm0, %xmm0
153*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
154*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
155*9880d681SAndroid Build Coastguard Worker;
156*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_34z6:
157*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
158*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
159*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2],mem[3]
160*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker;
162*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_34z6:
163*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
164*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
165*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 12(%eax), %xmm1
166*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    xorps %xmm0, %xmm0
167*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
168*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
169*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 3
170*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 4
171*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 6
172*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
173*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
174*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
175*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0
176*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
177*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res1, float %val3, i32 3
178*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
179*9880d681SAndroid Build Coastguard Worker}
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_45zz(float* %ptr) nounwind uwtable noinline ssp {
182*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4f32_f32_45zz:
183*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
184*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
185*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
186*9880d681SAndroid Build Coastguard Worker;
187*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_45zz:
188*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
189*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
190*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
191*9880d681SAndroid Build Coastguard Worker;
192*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_45zz:
193*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
194*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
195*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
196*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
197*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
198*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
199*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
200*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
201*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0
202*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
203*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
204*9880d681SAndroid Build Coastguard Worker}
205*9880d681SAndroid Build Coastguard Worker
206*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline ssp {
207*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: merge_4f32_f32_012u:
208*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
209*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
210*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
212*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
213*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
214*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
215*9880d681SAndroid Build Coastguard Worker;
216*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: merge_4f32_f32_012u:
217*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
218*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
219*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
220*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
221*9880d681SAndroid Build Coastguard Worker;
222*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_012u:
223*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
224*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
225*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
226*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
227*9880d681SAndroid Build Coastguard Worker;
228*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_012u:
229*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
230*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
231*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
232*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
233*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
234*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
235*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 1
236*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds float, float* %ptr, i64 2
237*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
238*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
239*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float* %ptr2
240*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
241*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
242*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <4 x float> %res1, float %val2, i32 2
243*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res2, float undef, i32 3
244*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
245*9880d681SAndroid Build Coastguard Worker}
246*9880d681SAndroid Build Coastguard Worker
247*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline ssp {
248*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: merge_4f32_f32_019u:
249*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
250*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
251*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
252*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
253*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
254*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
255*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
256*9880d681SAndroid Build Coastguard Worker;
257*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: merge_4f32_f32_019u:
258*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
259*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
260*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
261*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
262*9880d681SAndroid Build Coastguard Worker;
263*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_019u:
264*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
265*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
266*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
267*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
268*9880d681SAndroid Build Coastguard Worker;
269*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_019u:
270*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
271*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
272*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
273*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
274*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
275*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
276*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 1
277*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds float, float* %ptr, i64 9
278*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
279*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
280*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float* %ptr2
281*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
282*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
283*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <4 x float> %res1, float %val2, i32 2
284*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res2, float undef, i32 3
285*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
286*9880d681SAndroid Build Coastguard Worker}
287*9880d681SAndroid Build Coastguard Worker
288*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp {
289*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4i32_i32_23u5:
290*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
291*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 8(%rdi), %xmm0
292*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
293*9880d681SAndroid Build Coastguard Worker;
294*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4i32_i32_23u5:
295*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
296*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 8(%rdi), %xmm0
297*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
298*9880d681SAndroid Build Coastguard Worker;
299*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4i32_i32_23u5:
300*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
301*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
302*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 8(%eax), %xmm0
303*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
304*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
305*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
306*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
307*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
308*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32* %ptr1
309*9880d681SAndroid Build Coastguard Worker  %val3 = load i32, i32* %ptr3
310*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
311*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
312*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3
313*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res3
314*9880d681SAndroid Build Coastguard Worker}
315*9880d681SAndroid Build Coastguard Worker
316*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp {
317*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4i32_i32_3zuu:
318*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
319*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
320*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
321*9880d681SAndroid Build Coastguard Worker;
322*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4i32_i32_3zuu:
323*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
324*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
325*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
326*9880d681SAndroid Build Coastguard Worker;
327*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4i32_i32_3zuu:
328*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
329*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
330*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
331*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
332*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3
333*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
334*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
335*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x i32> %res0, i32     0, i32 1
336*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res1
337*9880d681SAndroid Build Coastguard Worker}
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @merge_4i32_i32_34uu(i32* %ptr) nounwind uwtable noinline ssp {
340*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4i32_i32_34uu:
341*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
342*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
343*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
344*9880d681SAndroid Build Coastguard Worker;
345*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4i32_i32_34uu:
346*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
347*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
348*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
349*9880d681SAndroid Build Coastguard Worker;
350*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4i32_i32_34uu:
351*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
352*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
353*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
354*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
355*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3
356*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 4
357*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
358*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32* %ptr1
359*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0
360*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
361*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res1
362*9880d681SAndroid Build Coastguard Worker}
363*9880d681SAndroid Build Coastguard Worker
364*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @merge_4i32_i32_45zz(i32* %ptr) nounwind uwtable noinline ssp {
365*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4i32_i32_45zz:
366*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
367*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
368*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
369*9880d681SAndroid Build Coastguard Worker;
370*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4i32_i32_45zz:
371*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
372*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
373*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
374*9880d681SAndroid Build Coastguard Worker;
375*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4i32_i32_45zz:
376*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
377*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
378*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
379*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
380*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4
381*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5
382*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
383*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32* %ptr1
384*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0
385*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1
386*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %res1
387*9880d681SAndroid Build Coastguard Worker}
388*9880d681SAndroid Build Coastguard Worker
389*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline ssp {
390*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_8i16_i16_23u567u9:
391*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
392*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups 4(%rdi), %xmm0
393*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
394*9880d681SAndroid Build Coastguard Worker;
395*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_8i16_i16_23u567u9:
396*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
397*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups 4(%rdi), %xmm0
398*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
399*9880d681SAndroid Build Coastguard Worker;
400*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_8i16_i16_23u567u9:
401*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
402*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
403*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups 4(%eax), %xmm0
404*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
405*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
406*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
407*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 5
408*9880d681SAndroid Build Coastguard Worker  %ptr4 = getelementptr inbounds i16, i16* %ptr, i64 6
409*9880d681SAndroid Build Coastguard Worker  %ptr5 = getelementptr inbounds i16, i16* %ptr, i64 7
410*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds i16, i16* %ptr, i64 9
411*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
412*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
413*9880d681SAndroid Build Coastguard Worker  %val3 = load i16, i16* %ptr3
414*9880d681SAndroid Build Coastguard Worker  %val4 = load i16, i16* %ptr4
415*9880d681SAndroid Build Coastguard Worker  %val5 = load i16, i16* %ptr5
416*9880d681SAndroid Build Coastguard Worker  %val7 = load i16, i16* %ptr7
417*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
418*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
419*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3
420*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <8 x i16> %res3, i16 %val4, i32 4
421*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <8 x i16> %res4, i16 %val5, i32 5
422*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x i16> %res5, i16 %val7, i32 7
423*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res7
424*9880d681SAndroid Build Coastguard Worker}
425*9880d681SAndroid Build Coastguard Worker
426*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
427*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_8i16_i16_34uuuuuu:
428*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
429*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
430*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
431*9880d681SAndroid Build Coastguard Worker;
432*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_8i16_i16_34uuuuuu:
433*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
434*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
435*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
436*9880d681SAndroid Build Coastguard Worker;
437*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_8i16_i16_34uuuuuu:
438*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
439*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
440*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
441*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
442*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 3
443*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 4
444*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
445*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
446*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
447*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
448*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res1
449*9880d681SAndroid Build Coastguard Worker}
450*9880d681SAndroid Build Coastguard Worker
451*9880d681SAndroid Build Coastguard Workerdefine <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline ssp {
452*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_8i16_i16_45u7zzzz:
453*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
454*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
455*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
456*9880d681SAndroid Build Coastguard Worker;
457*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_8i16_i16_45u7zzzz:
458*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
459*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
460*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
461*9880d681SAndroid Build Coastguard Worker;
462*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_8i16_i16_45u7zzzz:
463*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
464*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
465*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
466*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
467*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
468*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
469*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
470*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
471*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
472*9880d681SAndroid Build Coastguard Worker  %val3 = load i16, i16* %ptr3
473*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0
474*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1
475*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3
476*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <8 x i16> %res3, i16     0, i32 4
477*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <8 x i16> %res4, i16     0, i32 5
478*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <8 x i16> %res5, i16     0, i32 6
479*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x i16> %res6, i16     0, i32 7
480*9880d681SAndroid Build Coastguard Worker  ret <8 x i16> %res7
481*9880d681SAndroid Build Coastguard Worker}
482*9880d681SAndroid Build Coastguard Worker
483*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noinline ssp {
484*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF:
485*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
486*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movups (%rdi), %xmm0
487*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
488*9880d681SAndroid Build Coastguard Worker;
489*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_16i8_i8_01u3456789ABCDuF:
490*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
491*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovups (%rdi), %xmm0
492*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
493*9880d681SAndroid Build Coastguard Worker;
494*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF:
495*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
496*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
497*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movups (%eax), %xmm0
498*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
499*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
500*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
501*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3
502*9880d681SAndroid Build Coastguard Worker  %ptr4 = getelementptr inbounds i8, i8* %ptr, i64 4
503*9880d681SAndroid Build Coastguard Worker  %ptr5 = getelementptr inbounds i8, i8* %ptr, i64 5
504*9880d681SAndroid Build Coastguard Worker  %ptr6 = getelementptr inbounds i8, i8* %ptr, i64 6
505*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 7
506*9880d681SAndroid Build Coastguard Worker  %ptr8 = getelementptr inbounds i8, i8* %ptr, i64 8
507*9880d681SAndroid Build Coastguard Worker  %ptr9 = getelementptr inbounds i8, i8* %ptr, i64 9
508*9880d681SAndroid Build Coastguard Worker  %ptrA = getelementptr inbounds i8, i8* %ptr, i64 10
509*9880d681SAndroid Build Coastguard Worker  %ptrB = getelementptr inbounds i8, i8* %ptr, i64 11
510*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds i8, i8* %ptr, i64 12
511*9880d681SAndroid Build Coastguard Worker  %ptrD = getelementptr inbounds i8, i8* %ptr, i64 13
512*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds i8, i8* %ptr, i64 15
513*9880d681SAndroid Build Coastguard Worker  %val0 = load i8, i8* %ptr0
514*9880d681SAndroid Build Coastguard Worker  %val1 = load i8, i8* %ptr1
515*9880d681SAndroid Build Coastguard Worker  %val3 = load i8, i8* %ptr3
516*9880d681SAndroid Build Coastguard Worker  %val4 = load i8, i8* %ptr4
517*9880d681SAndroid Build Coastguard Worker  %val5 = load i8, i8* %ptr5
518*9880d681SAndroid Build Coastguard Worker  %val6 = load i8, i8* %ptr6
519*9880d681SAndroid Build Coastguard Worker  %val7 = load i8, i8* %ptr7
520*9880d681SAndroid Build Coastguard Worker  %val8 = load i8, i8* %ptr8
521*9880d681SAndroid Build Coastguard Worker  %val9 = load i8, i8* %ptr9
522*9880d681SAndroid Build Coastguard Worker  %valA = load i8, i8* %ptrA
523*9880d681SAndroid Build Coastguard Worker  %valB = load i8, i8* %ptrB
524*9880d681SAndroid Build Coastguard Worker  %valC = load i8, i8* %ptrC
525*9880d681SAndroid Build Coastguard Worker  %valD = load i8, i8* %ptrD
526*9880d681SAndroid Build Coastguard Worker  %valF = load i8, i8* %ptrF
527*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
528*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
529*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3
530*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <16 x i8> %res3, i8 %val4, i32 4
531*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <16 x i8> %res4, i8 %val5, i32 5
532*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <16 x i8> %res5, i8 %val6, i32 6
533*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
534*9880d681SAndroid Build Coastguard Worker  %res8 = insertelement <16 x i8> %res7, i8 %val8, i32 8
535*9880d681SAndroid Build Coastguard Worker  %res9 = insertelement <16 x i8> %res8, i8 %val9, i32 9
536*9880d681SAndroid Build Coastguard Worker  %resA = insertelement <16 x i8> %res9, i8 %valA, i32 10
537*9880d681SAndroid Build Coastguard Worker  %resB = insertelement <16 x i8> %resA, i8 %valB, i32 11
538*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x i8> %resB, i8 %valC, i32 12
539*9880d681SAndroid Build Coastguard Worker  %resD = insertelement <16 x i8> %resC, i8 %valD, i32 13
540*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i8> %resD, i8 %valF, i32 15
541*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %resF
542*9880d681SAndroid Build Coastguard Worker}
543*9880d681SAndroid Build Coastguard Worker
544*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
545*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
546*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
547*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
548*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
549*9880d681SAndroid Build Coastguard Worker;
550*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
551*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
552*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
553*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
554*9880d681SAndroid Build Coastguard Worker;
555*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
556*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
557*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
558*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
559*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
560*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
561*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
562*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3
563*9880d681SAndroid Build Coastguard Worker  %val0 = load i8, i8* %ptr0
564*9880d681SAndroid Build Coastguard Worker  %val1 = load i8, i8* %ptr1
565*9880d681SAndroid Build Coastguard Worker  %val3 = load i8, i8* %ptr3
566*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
567*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
568*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3
569*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <16 x i8> %res3, i8     0, i32 6
570*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <16 x i8> %res6, i8     0, i32 7
571*9880d681SAndroid Build Coastguard Worker  %resD = insertelement <16 x i8> %res7, i8     0, i32 13
572*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x i8> %resD, i8     0, i32 14
573*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i8> %resE, i8     0, i32 15
574*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %resF
575*9880d681SAndroid Build Coastguard Worker}
576*9880d681SAndroid Build Coastguard Worker
577*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
578*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
579*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
580*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
581*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
582*9880d681SAndroid Build Coastguard Worker;
583*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
584*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
585*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
586*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
587*9880d681SAndroid Build Coastguard Worker;
588*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz:
589*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
590*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
591*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
592*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
593*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
594*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1
595*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds i8, i8* %ptr, i64 2
596*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3
597*9880d681SAndroid Build Coastguard Worker  %ptr6 = getelementptr inbounds i8, i8* %ptr, i64 6
598*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 7
599*9880d681SAndroid Build Coastguard Worker  %val0 = load i8, i8* %ptr0
600*9880d681SAndroid Build Coastguard Worker  %val1 = load i8, i8* %ptr1
601*9880d681SAndroid Build Coastguard Worker  %val2 = load i8, i8* %ptr2
602*9880d681SAndroid Build Coastguard Worker  %val3 = load i8, i8* %ptr3
603*9880d681SAndroid Build Coastguard Worker  %val6 = load i8, i8* %ptr6
604*9880d681SAndroid Build Coastguard Worker  %val7 = load i8, i8* %ptr7
605*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0
606*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1
607*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <16 x i8> %res1, i8 %val2, i32 2
608*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i8> %res2, i8 %val3, i32 3
609*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <16 x i8> %res3, i8 %val6, i32 6
610*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7
611*9880d681SAndroid Build Coastguard Worker  %resD = insertelement <16 x i8> %res7, i8     0, i32 13
612*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x i8> %resD, i8     0, i32 14
613*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i8> %resE, i8     0, i32 15
614*9880d681SAndroid Build Coastguard Worker  ret <16 x i8> %resF
615*9880d681SAndroid Build Coastguard Worker}
616*9880d681SAndroid Build Coastguard Worker
617*9880d681SAndroid Build Coastguard Workerdefine void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) {
618*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4i32_i32_combine:
619*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
620*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
621*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm0, (%rdi)
622*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
623*9880d681SAndroid Build Coastguard Worker;
624*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: merge_4i32_i32_combine:
625*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
626*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
627*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %xmm0, (%rdi)
628*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
629*9880d681SAndroid Build Coastguard Worker;
630*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: merge_4i32_i32_combine:
631*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
632*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
633*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %xmm0, (%rdi)
634*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
635*9880d681SAndroid Build Coastguard Worker;
636*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: merge_4i32_i32_combine:
637*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
638*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
639*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovdqa %xmm0, (%rdi)
640*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
641*9880d681SAndroid Build Coastguard Worker;
642*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4i32_i32_combine:
643*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
644*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
645*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
646*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
647*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movaps %xmm0, (%eax)
648*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
649*9880d681SAndroid Build Coastguard Worker %1 = getelementptr i32, i32* %src, i32 0
650*9880d681SAndroid Build Coastguard Worker %2 = load i32, i32* %1
651*9880d681SAndroid Build Coastguard Worker %3 = insertelement <4 x i32> undef, i32 %2, i32 0
652*9880d681SAndroid Build Coastguard Worker %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer
653*9880d681SAndroid Build Coastguard Worker %5 = lshr <4 x i32> %4, <i32 0, i32 undef, i32 undef, i32 undef>
654*9880d681SAndroid Build Coastguard Worker %6 = and <4 x i32> %5, <i32 -1, i32 0, i32 0, i32 0>
655*9880d681SAndroid Build Coastguard Worker store <4 x i32> %6, <4 x i32>* %dst
656*9880d681SAndroid Build Coastguard Worker ret void
657*9880d681SAndroid Build Coastguard Worker}
658*9880d681SAndroid Build Coastguard Worker
659*9880d681SAndroid Build Coastguard Worker;
660*9880d681SAndroid Build Coastguard Worker; consecutive loads including any/all volatiles may not be combined
661*9880d681SAndroid Build Coastguard Worker;
662*9880d681SAndroid Build Coastguard Worker
663*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinline ssp {
664*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_2i64_i64_12_volatile:
665*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
666*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
667*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
668*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
669*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
670*9880d681SAndroid Build Coastguard Worker;
671*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_2i64_i64_12_volatile:
672*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
673*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
674*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
675*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
676*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
677*9880d681SAndroid Build Coastguard Worker;
678*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_2i64_i64_12_volatile:
679*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
680*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
681*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
682*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    pinsrd $1, 12(%eax), %xmm0
683*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    pinsrd $2, 16(%eax), %xmm0
684*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    pinsrd $3, 20(%eax), %xmm0
685*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
686*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
687*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2
688*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile i64, i64* %ptr0
689*9880d681SAndroid Build Coastguard Worker  %val1 = load volatile i64, i64* %ptr1
690*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0
691*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1
692*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res1
693*9880d681SAndroid Build Coastguard Worker}
694*9880d681SAndroid Build Coastguard Worker
695*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp {
696*9880d681SAndroid Build Coastguard Worker; SSE2-LABEL: merge_4f32_f32_2345_volatile:
697*9880d681SAndroid Build Coastguard Worker; SSE2:       # BB#0:
698*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
699*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
700*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
701*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
702*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
703*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
704*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
705*9880d681SAndroid Build Coastguard Worker; SSE2-NEXT:    retq
706*9880d681SAndroid Build Coastguard Worker;
707*9880d681SAndroid Build Coastguard Worker; SSE41-LABEL: merge_4f32_f32_2345_volatile:
708*9880d681SAndroid Build Coastguard Worker; SSE41:       # BB#0:
709*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
710*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
711*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
712*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
713*9880d681SAndroid Build Coastguard Worker; SSE41-NEXT:    retq
714*9880d681SAndroid Build Coastguard Worker;
715*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_2345_volatile:
716*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
717*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
718*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
719*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
720*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
721*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
722*9880d681SAndroid Build Coastguard Worker;
723*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_2345_volatile:
724*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
725*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
726*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
727*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
728*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
729*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
730*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
731*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 2
732*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 3
733*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds float, float* %ptr, i64 4
734*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 5
735*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile float, float* %ptr0
736*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
737*9880d681SAndroid Build Coastguard Worker  %val2 = load float, float* %ptr2
738*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
739*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
740*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float %val1, i32 1
741*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <4 x float> %res1, float %val2, i32 2
742*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res2, float %val3, i32 3
743*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
744*9880d681SAndroid Build Coastguard Worker}
745*9880d681SAndroid Build Coastguard Worker
746*9880d681SAndroid Build Coastguard Worker;
747*9880d681SAndroid Build Coastguard Worker; Non-consecutive test.
748*9880d681SAndroid Build Coastguard Worker;
749*9880d681SAndroid Build Coastguard Worker
750*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp {
751*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: merge_4f32_f32_X0YY:
752*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
753*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
754*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
755*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
756*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
757*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
758*9880d681SAndroid Build Coastguard Worker;
759*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: merge_4f32_f32_X0YY:
760*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
761*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
762*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
763*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1]
764*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
765*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
766*9880d681SAndroid Build Coastguard Worker;
767*9880d681SAndroid Build Coastguard Worker; X32-SSE-LABEL: merge_4f32_f32_X0YY:
768*9880d681SAndroid Build Coastguard Worker; X32-SSE:       # BB#0:
769*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
770*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
771*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
772*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
773*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1]
774*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
775*9880d681SAndroid Build Coastguard Worker; X32-SSE-NEXT:    retl
776*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0, align 4
777*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1, align 4
778*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <4 x float> undef, float %val0, i32 0
779*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <4 x float> %res0, float 0.000000e+00, i32 1
780*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <4 x float> %res1, float %val1, i32 2
781*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <4 x float> %res2, float %val1, i32 3
782*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res3
783*9880d681SAndroid Build Coastguard Worker}
784