xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/merge-consecutive-loads-512.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
4*9880d681SAndroid Build Coastguard Worker;
5*9880d681SAndroid Build Coastguard Worker; Just one 32-bit run to make sure we do reasonable things.
6*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F
7*9880d681SAndroid Build Coastguard Worker
8*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
9*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_2f64_12u4:
10*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
11*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovupd 16(%rdi), %ymm0
12*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
13*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
14*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
15*9880d681SAndroid Build Coastguard Worker;
16*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
17*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
18*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
19*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovupd 16(%eax), %ymm0
20*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf128 $1, 64(%eax), %ymm0, %ymm1
21*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
22*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
23*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
24*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
25*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4
26*9880d681SAndroid Build Coastguard Worker  %val0 = load <2 x double>, <2 x double>* %ptr0
27*9880d681SAndroid Build Coastguard Worker  %val1 = load <2 x double>, <2 x double>* %ptr1
28*9880d681SAndroid Build Coastguard Worker  %val3 = load <2 x double>, <2 x double>* %ptr3
29*9880d681SAndroid Build Coastguard Worker  %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
30*9880d681SAndroid Build Coastguard Worker  %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
31*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
32*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res
33*9880d681SAndroid Build Coastguard Worker}
34*9880d681SAndroid Build Coastguard Worker
35*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
36*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_2f64_23z5:
37*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
38*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovupd 32(%rdi), %ymm0
39*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
40*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
41*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
42*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
43*9880d681SAndroid Build Coastguard Worker;
44*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
45*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
46*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
47*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovupd 32(%eax), %ymm0
48*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
49*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf128 $1, 80(%eax), %ymm1, %ymm1
50*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
51*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
52*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
53*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
54*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5
55*9880d681SAndroid Build Coastguard Worker  %val0 = load <2 x double>, <2 x double>* %ptr0
56*9880d681SAndroid Build Coastguard Worker  %val1 = load <2 x double>, <2 x double>* %ptr1
57*9880d681SAndroid Build Coastguard Worker  %val3 = load <2 x double>, <2 x double>* %ptr3
58*9880d681SAndroid Build Coastguard Worker  %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
59*9880d681SAndroid Build Coastguard Worker  %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res
62*9880d681SAndroid Build Coastguard Worker}
63*9880d681SAndroid Build Coastguard Worker
64*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
65*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_4f64_z2:
66*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
67*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
68*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
69*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
70*9880d681SAndroid Build Coastguard Worker;
71*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
72*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
73*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
74*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
75*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
76*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
77*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
78*9880d681SAndroid Build Coastguard Worker  %val1 = load <4 x double>, <4 x double>* %ptr1
79*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
80*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res
81*9880d681SAndroid Build Coastguard Worker}
82*9880d681SAndroid Build Coastguard Worker
83*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
84*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_f64_23uuuuu9:
85*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
86*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovupd 16(%rdi), %zmm0
87*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
88*9880d681SAndroid Build Coastguard Worker;
89*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
90*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
91*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
92*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovupd 16(%eax), %zmm0
93*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
94*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
95*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
96*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
97*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double* %ptr0
98*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double* %ptr1
99*9880d681SAndroid Build Coastguard Worker  %val7 = load double, double* %ptr7
100*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x double> undef, double %val0, i32 0
101*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
102*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x double> %res1, double %val7, i32 7
103*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res7
104*9880d681SAndroid Build Coastguard Worker}
105*9880d681SAndroid Build Coastguard Worker
106*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
107*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_f64_12zzuuzz:
108*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
109*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovupd 8(%rdi), %xmm0
110*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
111*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
112*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
113*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
114*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
115*9880d681SAndroid Build Coastguard Worker;
116*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
117*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
118*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
119*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovupd 8(%eax), %xmm0
120*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
121*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
122*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
123*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
124*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
125*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
126*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
127*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double* %ptr0
128*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double* %ptr1
129*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x double> undef, double %val0, i32 0
130*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
131*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <8 x double> %res1, double   0.0, i32 2
132*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <8 x double> %res2, double   0.0, i32 3
133*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <8 x double> %res3, double   0.0, i32 6
134*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x double> %res6, double   0.0, i32 7
135*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res7
136*9880d681SAndroid Build Coastguard Worker}
137*9880d681SAndroid Build Coastguard Worker
138*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
139*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
140*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
141*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovupd 8(%rdi), %zmm0
142*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
143*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
144*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
145*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
146*9880d681SAndroid Build Coastguard Worker;
147*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
148*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
149*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
150*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovupd 8(%eax), %zmm0
151*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
152*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
153*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
154*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
155*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
156*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
157*9880d681SAndroid Build Coastguard Worker  %ptr4 = getelementptr inbounds double, double* %ptr, i64 5
158*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds double, double* %ptr, i64 8
159*9880d681SAndroid Build Coastguard Worker  %val0 = load double, double* %ptr0
160*9880d681SAndroid Build Coastguard Worker  %val2 = load double, double* %ptr2
161*9880d681SAndroid Build Coastguard Worker  %val4 = load double, double* %ptr4
162*9880d681SAndroid Build Coastguard Worker  %val7 = load double, double* %ptr7
163*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x double> undef, double %val0, i32 0
164*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <8 x double> %res0, double %val2, i32 2
165*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <8 x double> %res2, double %val4, i32 4
166*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <8 x double> %res4, double   0.0, i32 5
167*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x double> %res5, double %val7, i32 7
168*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res7
169*9880d681SAndroid Build Coastguard Worker}
170*9880d681SAndroid Build Coastguard Worker
171*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
172*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8i64_4i64_z3:
173*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
174*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
175*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0
176*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
177*9880d681SAndroid Build Coastguard Worker;
178*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
179*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
180*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
181*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxor %ymm0, %ymm0, %ymm0
182*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinserti64x4 $1, 96(%eax), %zmm0, %zmm0
183*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
184*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
185*9880d681SAndroid Build Coastguard Worker  %val1 = load <4 x i64>, <4 x i64>* %ptr1
186*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
187*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res
188*9880d681SAndroid Build Coastguard Worker}
189*9880d681SAndroid Build Coastguard Worker
190*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
191*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8i64_i64_56zz9uzz:
192*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
193*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqu 40(%rdi), %xmm0
194*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
195*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
196*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
197*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
198*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
199*9880d681SAndroid Build Coastguard Worker;
200*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
201*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
202*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
203*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqu 40(%eax), %xmm0
204*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
205*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
206*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
207*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
208*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
209*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
210*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6
211*9880d681SAndroid Build Coastguard Worker  %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9
212*9880d681SAndroid Build Coastguard Worker  %val0 = load i64, i64* %ptr0
213*9880d681SAndroid Build Coastguard Worker  %val1 = load i64, i64* %ptr1
214*9880d681SAndroid Build Coastguard Worker  %val4 = load i64, i64* %ptr4
215*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
216*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
217*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <8 x i64> %res1, i64     0, i32 2
218*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <8 x i64> %res2, i64     0, i32 3
219*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
220*9880d681SAndroid Build Coastguard Worker  %res6 = insertelement <8 x i64> %res4, i64     0, i32 6
221*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x i64> %res6, i64     0, i32 7
222*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res7
223*9880d681SAndroid Build Coastguard Worker}
224*9880d681SAndroid Build Coastguard Worker
225*9880d681SAndroid Build Coastguard Workerdefine <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
226*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
227*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
228*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqu64 8(%rdi), %zmm0
229*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
230*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
231*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
232*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
233*9880d681SAndroid Build Coastguard Worker;
234*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
235*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
236*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
237*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0
238*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
239*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
240*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
241*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
242*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
243*9880d681SAndroid Build Coastguard Worker  %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
244*9880d681SAndroid Build Coastguard Worker  %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
245*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
246*9880d681SAndroid Build Coastguard Worker  %val0 = load i64, i64* %ptr0
247*9880d681SAndroid Build Coastguard Worker  %val2 = load i64, i64* %ptr2
248*9880d681SAndroid Build Coastguard Worker  %val4 = load i64, i64* %ptr4
249*9880d681SAndroid Build Coastguard Worker  %val7 = load i64, i64* %ptr7
250*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
251*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
252*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
253*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <8 x i64> %res4, i64     0, i32 5
254*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
255*9880d681SAndroid Build Coastguard Worker  ret <8 x i64> %res7
256*9880d681SAndroid Build Coastguard Worker}
257*9880d681SAndroid Build Coastguard Worker
258*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp {
259*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
260*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
261*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
262*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
263*9880d681SAndroid Build Coastguard Worker;
264*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
265*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
266*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
267*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
268*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
269*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 8
270*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 9
271*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
272*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
273*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x float> undef, float %val0, i32 0
274*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x float> %res0, float %val1, i32 1
275*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <16 x float> %res1, float   0.0, i32 2
276*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x float> %res2, float   0.0, i32 3
277*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
278*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x float> %res4, float   0.0, i32 15
279*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %resF
280*9880d681SAndroid Build Coastguard Worker}
281*9880d681SAndroid Build Coastguard Worker
282*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp {
283*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
284*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
285*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovups 16(%rdi), %xmm0
286*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
287*9880d681SAndroid Build Coastguard Worker;
288*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
289*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
290*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
291*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovups 16(%eax), %xmm0
292*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
293*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
294*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
295*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 7
296*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
297*9880d681SAndroid Build Coastguard Worker  %val1 = load float, float* %ptr1
298*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
299*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x float> undef, float %val0, i32 0
300*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x float> %res0, float %val1, i32 1
301*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x float> %res1, float %val3, i32 3
302*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %res3
303*9880d681SAndroid Build Coastguard Worker}
304*9880d681SAndroid Build Coastguard Worker
305*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp {
306*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
307*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
308*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovups (%rdi), %zmm0
309*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
310*9880d681SAndroid Build Coastguard Worker;
311*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
312*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
313*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
314*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
315*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
316*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
317*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
318*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds float, float* %ptr, i64 12
319*9880d681SAndroid Build Coastguard Worker  %ptrE = getelementptr inbounds float, float* %ptr, i64 14
320*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds float, float* %ptr, i64 15
321*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
322*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
323*9880d681SAndroid Build Coastguard Worker  %valC = load float, float* %ptrC
324*9880d681SAndroid Build Coastguard Worker  %valE = load float, float* %ptrE
325*9880d681SAndroid Build Coastguard Worker  %valF = load float, float* %ptrF
326*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x float> undef, float %val0, i32 0
327*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x float> %res0, float %val3, i32 3
328*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x float> %res3, float %valC, i32 12
329*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x float> %resC, float %valE, i32 14
330*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x float> %resE, float %valF, i32 15
331*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %resF
332*9880d681SAndroid Build Coastguard Worker}
333*9880d681SAndroid Build Coastguard Worker
334*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
335*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
336*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
337*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovups (%rdi), %zmm0
338*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
339*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
340*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
341*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
342*9880d681SAndroid Build Coastguard Worker;
343*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
344*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
345*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
346*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
347*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
348*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
349*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
350*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
351*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
352*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
353*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds float, float* %ptr, i64 12
354*9880d681SAndroid Build Coastguard Worker  %ptrE = getelementptr inbounds float, float* %ptr, i64 14
355*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds float, float* %ptr, i64 15
356*9880d681SAndroid Build Coastguard Worker  %val0 = load float, float* %ptr0
357*9880d681SAndroid Build Coastguard Worker  %val3 = load float, float* %ptr3
358*9880d681SAndroid Build Coastguard Worker  %valC = load float, float* %ptrC
359*9880d681SAndroid Build Coastguard Worker  %valE = load float, float* %ptrE
360*9880d681SAndroid Build Coastguard Worker  %valF = load float, float* %ptrF
361*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x float> undef, float %val0, i32 0
362*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x float> %res0, float %val3, i32 3
363*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
364*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <16 x float> %res4, float   0.0, i32 5
365*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x float> %res5, float %valC, i32 12
366*9880d681SAndroid Build Coastguard Worker  %resD = insertelement <16 x float> %resC, float   0.0, i32 13
367*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x float> %resD, float %valE, i32 14
368*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x float> %resE, float %valF, i32 15
369*9880d681SAndroid Build Coastguard Worker  ret <16 x float> %resF
370*9880d681SAndroid Build Coastguard Worker}
371*9880d681SAndroid Build Coastguard Worker
372*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp {
373*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
374*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
375*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
376*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
377*9880d681SAndroid Build Coastguard Worker;
378*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
379*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
380*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
381*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
382*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
383*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
384*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2
385*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
386*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32* %ptr1
387*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
388*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
389*9880d681SAndroid Build Coastguard Worker  %res2 = insertelement <16 x i32> %res1, i32     0, i32 2
390*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i32> %res2, i32     0, i32 3
391*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
392*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i32> %res4, i32     0, i32 15
393*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %resF
394*9880d681SAndroid Build Coastguard Worker}
395*9880d681SAndroid Build Coastguard Worker
396*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp {
397*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
398*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
399*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovups 8(%rdi), %xmm0
400*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
401*9880d681SAndroid Build Coastguard Worker;
402*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
403*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
404*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
405*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
406*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
407*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
408*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
409*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
410*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
411*9880d681SAndroid Build Coastguard Worker  %val1 = load i32, i32* %ptr1
412*9880d681SAndroid Build Coastguard Worker  %val3 = load i32, i32* %ptr3
413*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
414*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
415*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
416*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %res3
417*9880d681SAndroid Build Coastguard Worker}
418*9880d681SAndroid Build Coastguard Worker
419*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
420*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
421*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
422*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
423*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
424*9880d681SAndroid Build Coastguard Worker;
425*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
426*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
427*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
428*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
429*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
430*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
431*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
432*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
433*9880d681SAndroid Build Coastguard Worker  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
434*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
435*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
436*9880d681SAndroid Build Coastguard Worker  %val3 = load i32, i32* %ptr3
437*9880d681SAndroid Build Coastguard Worker  %valC = load i32, i32* %ptrC
438*9880d681SAndroid Build Coastguard Worker  %valE = load i32, i32* %ptrE
439*9880d681SAndroid Build Coastguard Worker  %valF = load i32, i32* %ptrF
440*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
441*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
442*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
443*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
444*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
445*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %resF
446*9880d681SAndroid Build Coastguard Worker}
447*9880d681SAndroid Build Coastguard Worker
448*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
449*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
450*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
451*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
452*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
453*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
454*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
455*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
456*9880d681SAndroid Build Coastguard Worker;
457*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
458*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
459*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
460*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
461*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
462*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
463*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
464*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
465*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
466*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
467*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
468*9880d681SAndroid Build Coastguard Worker  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
469*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
470*9880d681SAndroid Build Coastguard Worker  %val0 = load i32, i32* %ptr0
471*9880d681SAndroid Build Coastguard Worker  %val3 = load i32, i32* %ptr3
472*9880d681SAndroid Build Coastguard Worker  %valC = load i32, i32* %ptrC
473*9880d681SAndroid Build Coastguard Worker  %valE = load i32, i32* %ptrE
474*9880d681SAndroid Build Coastguard Worker  %valF = load i32, i32* %ptrF
475*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
476*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
477*9880d681SAndroid Build Coastguard Worker  %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
478*9880d681SAndroid Build Coastguard Worker  %res5 = insertelement <16 x i32> %res4, i32     0, i32 5
479*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
480*9880d681SAndroid Build Coastguard Worker  %resD = insertelement <16 x i32> %resC, i32     0, i32 13
481*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
482*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
483*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %resF
484*9880d681SAndroid Build Coastguard Worker}
485*9880d681SAndroid Build Coastguard Worker
486*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
487*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
488*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
489*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
490*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
491*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
492*9880d681SAndroid Build Coastguard Worker;
493*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
494*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
495*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
496*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
497*9880d681SAndroid Build Coastguard Worker;
498*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
499*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
500*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
501*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
502*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
503*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
504*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
505*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
506*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4
507*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
508*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
509*9880d681SAndroid Build Coastguard Worker  %val3 = load i16, i16* %ptr3
510*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
511*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
512*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
513*9880d681SAndroid Build Coastguard Worker  %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
514*9880d681SAndroid Build Coastguard Worker  %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
515*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res31
516*9880d681SAndroid Build Coastguard Worker}
517*9880d681SAndroid Build Coastguard Worker
518*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
519*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
520*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
521*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
522*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
523*9880d681SAndroid Build Coastguard Worker;
524*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
525*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
526*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
527*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
528*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
529*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
530*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
531*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
532*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
533*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
534*9880d681SAndroid Build Coastguard Worker  %val3 = load i16, i16* %ptr3
535*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
536*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
537*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
538*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %res3
539*9880d681SAndroid Build Coastguard Worker}
540*9880d681SAndroid Build Coastguard Worker
541*9880d681SAndroid Build Coastguard Workerdefine <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
542*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
543*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
544*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
545*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
546*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
547*9880d681SAndroid Build Coastguard Worker;
548*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
549*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
550*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
551*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
552*9880d681SAndroid Build Coastguard Worker;
553*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
554*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
555*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
556*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
557*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
558*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
559*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
560*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
561*9880d681SAndroid Build Coastguard Worker  %val0 = load i16, i16* %ptr0
562*9880d681SAndroid Build Coastguard Worker  %val1 = load i16, i16* %ptr1
563*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
564*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
565*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <32 x i16> %res1, i16     0, i16 3
566*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <32 x i16> %res3, i16     0, i16 14
567*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <32 x i16> %resE, i16     0, i16 15
568*9880d681SAndroid Build Coastguard Worker  %resG = insertelement <32 x i16> %resF, i16     0, i16 16
569*9880d681SAndroid Build Coastguard Worker  %resH = insertelement <32 x i16> %resG, i16     0, i16 17
570*9880d681SAndroid Build Coastguard Worker  ret <32 x i16> %resH
571*9880d681SAndroid Build Coastguard Worker}
572*9880d681SAndroid Build Coastguard Worker
573*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
574*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
575*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
576*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
577*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
578*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
579*9880d681SAndroid Build Coastguard Worker;
580*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
581*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
582*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
583*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
584*9880d681SAndroid Build Coastguard Worker;
585*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
586*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
587*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
588*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
589*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
590*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
591*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
592*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
593*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
594*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8
595*9880d681SAndroid Build Coastguard Worker  %val0 = load i8, i8* %ptr0
596*9880d681SAndroid Build Coastguard Worker  %val1 = load i8, i8* %ptr1
597*9880d681SAndroid Build Coastguard Worker  %val3 = load i8, i8* %ptr3
598*9880d681SAndroid Build Coastguard Worker  %val7 = load i8, i8* %ptr7
599*9880d681SAndroid Build Coastguard Worker  %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
600*9880d681SAndroid Build Coastguard Worker  %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
601*9880d681SAndroid Build Coastguard Worker  %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
602*9880d681SAndroid Build Coastguard Worker  %res7  = insertelement <64 x i8> %res3,  i8 %val7, i8 7
603*9880d681SAndroid Build Coastguard Worker  %res14 = insertelement <64 x i8> %res7,  i8     0, i8 14
604*9880d681SAndroid Build Coastguard Worker  %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
605*9880d681SAndroid Build Coastguard Worker  %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
606*9880d681SAndroid Build Coastguard Worker  %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
607*9880d681SAndroid Build Coastguard Worker  %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
608*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res63
609*9880d681SAndroid Build Coastguard Worker}
610*9880d681SAndroid Build Coastguard Worker
611*9880d681SAndroid Build Coastguard Workerdefine <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
612*9880d681SAndroid Build Coastguard Worker; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
613*9880d681SAndroid Build Coastguard Worker; AVX512F:       # BB#0:
614*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
615*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
616*9880d681SAndroid Build Coastguard Worker; AVX512F-NEXT:    retq
617*9880d681SAndroid Build Coastguard Worker;
618*9880d681SAndroid Build Coastguard Worker; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
619*9880d681SAndroid Build Coastguard Worker; AVX512BW:       # BB#0:
620*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
621*9880d681SAndroid Build Coastguard Worker; AVX512BW-NEXT:    retq
622*9880d681SAndroid Build Coastguard Worker;
623*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
624*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
625*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
626*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
627*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
628*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
629*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
630*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
631*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
632*9880d681SAndroid Build Coastguard Worker  %val0 = load i8, i8* %ptr0
633*9880d681SAndroid Build Coastguard Worker  %val1 = load i8, i8* %ptr1
634*9880d681SAndroid Build Coastguard Worker  %val3 = load i8, i8* %ptr3
635*9880d681SAndroid Build Coastguard Worker  %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
636*9880d681SAndroid Build Coastguard Worker  %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
637*9880d681SAndroid Build Coastguard Worker  %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
638*9880d681SAndroid Build Coastguard Worker  %res14 = insertelement <64 x i8> %res3,  i8     0, i8 14
639*9880d681SAndroid Build Coastguard Worker  %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
640*9880d681SAndroid Build Coastguard Worker  %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
641*9880d681SAndroid Build Coastguard Worker  %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
642*9880d681SAndroid Build Coastguard Worker  %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
643*9880d681SAndroid Build Coastguard Worker  ret <64 x i8> %res63
644*9880d681SAndroid Build Coastguard Worker}
645*9880d681SAndroid Build Coastguard Worker
646*9880d681SAndroid Build Coastguard Worker;
647*9880d681SAndroid Build Coastguard Worker; consecutive loads including any/all volatiles may not be combined
648*9880d681SAndroid Build Coastguard Worker;
649*9880d681SAndroid Build Coastguard Worker
650*9880d681SAndroid Build Coastguard Workerdefine <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp {
651*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
652*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
653*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
654*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
655*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vbroadcastsd 72(%rdi), %ymm1
656*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
657*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
658*9880d681SAndroid Build Coastguard Worker;
659*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
660*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
661*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
662*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
663*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
664*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vbroadcastsd 72(%eax), %ymm1
665*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
666*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
667*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
668*9880d681SAndroid Build Coastguard Worker  %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
669*9880d681SAndroid Build Coastguard Worker  %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
670*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile double, double* %ptr0
671*9880d681SAndroid Build Coastguard Worker  %val1 = load double, double* %ptr1
672*9880d681SAndroid Build Coastguard Worker  %val7 = load double, double* %ptr7
673*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <8 x double> undef, double %val0, i32 0
674*9880d681SAndroid Build Coastguard Worker  %res1 = insertelement <8 x double> %res0, double %val1, i32 1
675*9880d681SAndroid Build Coastguard Worker  %res7 = insertelement <8 x double> %res1, double %val7, i32 7
676*9880d681SAndroid Build Coastguard Worker  ret <8 x double> %res7
677*9880d681SAndroid Build Coastguard Worker}
678*9880d681SAndroid Build Coastguard Worker
679*9880d681SAndroid Build Coastguard Workerdefine <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp {
680*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
681*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
682*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
683*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpinsrd $3, 12(%rdi), %xmm0, %xmm0
684*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
685*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpinsrd $2, 56(%rdi), %xmm1, %xmm1
686*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpinsrd $3, 60(%rdi), %xmm1, %xmm1
687*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
688*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
689*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
690*9880d681SAndroid Build Coastguard Worker;
691*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
692*9880d681SAndroid Build Coastguard Worker; X32-AVX512F:       # BB#0:
693*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
694*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
695*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpinsrd $3, 12(%eax), %xmm0, %xmm0
696*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
697*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpinsrd $2, 56(%eax), %xmm1, %xmm1
698*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vpinsrd $3, 60(%eax), %xmm1, %xmm1
699*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
700*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
701*9880d681SAndroid Build Coastguard Worker; X32-AVX512F-NEXT:    retl
702*9880d681SAndroid Build Coastguard Worker  %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
703*9880d681SAndroid Build Coastguard Worker  %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
704*9880d681SAndroid Build Coastguard Worker  %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
705*9880d681SAndroid Build Coastguard Worker  %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
706*9880d681SAndroid Build Coastguard Worker  %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
707*9880d681SAndroid Build Coastguard Worker  %val0 = load volatile i32, i32* %ptr0
708*9880d681SAndroid Build Coastguard Worker  %val3 = load volatile i32, i32* %ptr3
709*9880d681SAndroid Build Coastguard Worker  %valC = load volatile i32, i32* %ptrC
710*9880d681SAndroid Build Coastguard Worker  %valE = load volatile i32, i32* %ptrE
711*9880d681SAndroid Build Coastguard Worker  %valF = load volatile i32, i32* %ptrF
712*9880d681SAndroid Build Coastguard Worker  %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
713*9880d681SAndroid Build Coastguard Worker  %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
714*9880d681SAndroid Build Coastguard Worker  %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
715*9880d681SAndroid Build Coastguard Worker  %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
716*9880d681SAndroid Build Coastguard Worker  %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
717*9880d681SAndroid Build Coastguard Worker  ret <16 x i32> %resF
718*9880d681SAndroid Build Coastguard Worker}
719