xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx-vperm2x128.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
6*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45670123:
7*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
8*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
9*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
10*9880d681SAndroid Build Coastguard Workerentry:
11*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
12*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
13*9880d681SAndroid Build Coastguard Worker}
14*9880d681SAndroid Build Coastguard Worker
15*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45670123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
16*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45670123_mem:
17*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3,0,1]
19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
20*9880d681SAndroid Build Coastguard Workerentry:
21*9880d681SAndroid Build Coastguard Worker  %a = load <8 x float>, <8 x float>* %pa
22*9880d681SAndroid Build Coastguard Worker  %b = load <8 x float>, <8 x float>* %pb
23*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
24*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
25*9880d681SAndroid Build Coastguard Worker}
26*9880d681SAndroid Build Coastguard Worker
27*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_0123cdef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
28*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_0123cdef:
29*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
30*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
31*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
32*9880d681SAndroid Build Coastguard Workerentry:
33*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
34*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
35*9880d681SAndroid Build Coastguard Worker}
36*9880d681SAndroid Build Coastguard Worker
37*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_01230123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
38*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8f32_01230123:
39*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
40*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
41*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
42*9880d681SAndroid Build Coastguard Worker;
43*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8f32_01230123:
44*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
45*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
46*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
47*9880d681SAndroid Build Coastguard Workerentry:
48*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
49*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
50*9880d681SAndroid Build Coastguard Worker}
51*9880d681SAndroid Build Coastguard Worker
52*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_01230123_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
53*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8f32_01230123_mem:
54*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
55*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rdi), %ymm0
56*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
57*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
58*9880d681SAndroid Build Coastguard Worker;
59*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8f32_01230123_mem:
60*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
61*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = mem[0,1,0,1]
62*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
63*9880d681SAndroid Build Coastguard Workerentry:
64*9880d681SAndroid Build Coastguard Worker  %a = load <8 x float>, <8 x float>* %pa
65*9880d681SAndroid Build Coastguard Worker  %b = load <8 x float>, <8 x float>* %pb
66*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
67*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
68*9880d681SAndroid Build Coastguard Worker}
69*9880d681SAndroid Build Coastguard Worker
70*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
71*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45674567:
72*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
73*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
74*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
75*9880d681SAndroid Build Coastguard Workerentry:
76*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
77*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
78*9880d681SAndroid Build Coastguard Worker}
79*9880d681SAndroid Build Coastguard Worker
80*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_45674567_mem(<8 x float>* %pa, <8 x float>* %pb) nounwind uwtable readnone ssp {
81*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_45674567_mem:
82*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
83*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3,2,3]
84*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
85*9880d681SAndroid Build Coastguard Workerentry:
86*9880d681SAndroid Build Coastguard Worker  %a = load <8 x float>, <8 x float>* %pa
87*9880d681SAndroid Build Coastguard Worker  %b = load <8 x float>, <8 x float>* %pb
88*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
89*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @shuffle_v32i8_2323(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
93*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v32i8_2323:
94*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
95*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
96*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
97*9880d681SAndroid Build Coastguard Workerentry:
98*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
99*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %shuffle
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @shuffle_v32i8_2323_domain(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
103*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v32i8_2323_domain:
104*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
105*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
106*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
107*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
108*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
109*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
110*9880d681SAndroid Build Coastguard Worker;
111*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v32i8_2323_domain:
112*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
113*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
114*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
115*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
116*9880d681SAndroid Build Coastguard Workerentry:
117*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
118*9880d681SAndroid Build Coastguard Worker  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
119*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
120*9880d681SAndroid Build Coastguard Worker  ret <32 x i8> %shuffle
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_6701(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
124*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4i64_6701:
125*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
126*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
127*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
128*9880d681SAndroid Build Coastguard Workerentry:
129*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
130*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %shuffle
131*9880d681SAndroid Build Coastguard Worker}
132*9880d681SAndroid Build Coastguard Worker
133*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_6701_domain(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
134*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v4i64_6701_domain:
135*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
136*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
137*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
138*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
139*9880d681SAndroid Build Coastguard Worker;
140*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v4i64_6701_domain:
141*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
142*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
143*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
144*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
145*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
146*9880d681SAndroid Build Coastguard Workerentry:
147*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
148*9880d681SAndroid Build Coastguard Worker  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
149*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
150*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %shuffle
151*9880d681SAndroid Build Coastguard Worker}
152*9880d681SAndroid Build Coastguard Worker
153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @shuffle_v8i32_u5u7cdef(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
154*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v8i32_u5u7cdef:
155*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
156*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
157*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
158*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
159*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
160*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker;
162*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v8i32_u5u7cdef:
163*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
164*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
165*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
166*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
167*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
168*9880d681SAndroid Build Coastguard Workerentry:
169*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
170*9880d681SAndroid Build Coastguard Worker  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
171*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
172*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %shuffle
173*9880d681SAndroid Build Coastguard Worker}
174*9880d681SAndroid Build Coastguard Worker
175*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @shuffle_v16i16_4501(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
176*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v16i16_4501:
177*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
179*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
180*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
181*9880d681SAndroid Build Coastguard Worker;
182*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v16i16_4501:
183*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
184*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
185*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
186*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
187*9880d681SAndroid Build Coastguard Workerentry:
188*9880d681SAndroid Build Coastguard Worker  ; add forces execution domain
189*9880d681SAndroid Build Coastguard Worker  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
190*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
191*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %shuffle
192*9880d681SAndroid Build Coastguard Worker}
193*9880d681SAndroid Build Coastguard Worker
194*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @shuffle_v16i16_4501_mem(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
195*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v16i16_4501_mem:
196*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
197*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
198*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps (%rsi), %ymm1
199*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
200*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
201*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
202*9880d681SAndroid Build Coastguard Worker;
203*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v16i16_4501_mem:
204*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
205*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
206*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
207*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
208*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
209*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
210*9880d681SAndroid Build Coastguard Workerentry:
211*9880d681SAndroid Build Coastguard Worker  %c = load <16 x i16>, <16 x i16>* %a
212*9880d681SAndroid Build Coastguard Worker  %d = load <16 x i16>, <16 x i16>* %b
213*9880d681SAndroid Build Coastguard Worker  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
214*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
215*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %shuffle
216*9880d681SAndroid Build Coastguard Worker}
217*9880d681SAndroid Build Coastguard Worker
218*9880d681SAndroid Build Coastguard Worker;;;; Cases with undef indicies mixed in the mask
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67u9ub(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
221*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67u9ub:
222*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
223*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
224*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
225*9880d681SAndroid Build Coastguard Workerentry:
226*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
227*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
228*9880d681SAndroid Build Coastguard Worker}
229*9880d681SAndroid Build Coastguard Worker
230*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
231*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uu67:
232*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
233*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
234*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
235*9880d681SAndroid Build Coastguard Workerentry:
236*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
237*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
238*9880d681SAndroid Build Coastguard Worker}
239*9880d681SAndroid Build Coastguard Worker
240*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uuab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
241*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uuab:
242*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
243*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
244*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
245*9880d681SAndroid Build Coastguard Workerentry:
246*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 10, i32 11>
247*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
248*9880d681SAndroid Build Coastguard Worker}
249*9880d681SAndroid Build Coastguard Worker
250*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
251*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67uuef:
252*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
253*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
254*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
255*9880d681SAndroid Build Coastguard Workerentry:
256*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
257*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
258*9880d681SAndroid Build Coastguard Worker}
259*9880d681SAndroid Build Coastguard Worker
260*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu674567(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
261*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu674567:
262*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
263*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
264*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
265*9880d681SAndroid Build Coastguard Workerentry:
266*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
267*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
268*9880d681SAndroid Build Coastguard Worker}
269*9880d681SAndroid Build Coastguard Worker
270*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu6789ab(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
271*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu6789ab:
272*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
273*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
274*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
275*9880d681SAndroid Build Coastguard Workerentry:
276*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
277*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
278*9880d681SAndroid Build Coastguard Worker}
279*9880d681SAndroid Build Coastguard Worker
280*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_4567uu67(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
281*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_4567uu67:
282*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
283*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
284*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
285*9880d681SAndroid Build Coastguard Workerentry:
286*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
287*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
288*9880d681SAndroid Build Coastguard Worker}
289*9880d681SAndroid Build Coastguard Worker
290*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_4567uuef(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
291*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_4567uuef:
292*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
293*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
294*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
295*9880d681SAndroid Build Coastguard Workerentry:
296*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 14, i32 15>
297*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
298*9880d681SAndroid Build Coastguard Worker}
299*9880d681SAndroid Build Coastguard Worker
300*9880d681SAndroid Build Coastguard Worker;;;; Cases we must not select vperm2f128
301*9880d681SAndroid Build Coastguard Worker
302*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @shuffle_v8f32_uu67ucuf(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
303*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v8f32_uu67ucuf:
304*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0: ## %entry
305*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
306*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
307*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
308*9880d681SAndroid Build Coastguard Workerentry:
309*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
310*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %shuffle
311*9880d681SAndroid Build Coastguard Worker}
312*9880d681SAndroid Build Coastguard Worker
313*9880d681SAndroid Build Coastguard Worker;; Test zero mask generation.
314*9880d681SAndroid Build Coastguard Worker;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
315*9880d681SAndroid Build Coastguard Worker;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
316*9880d681SAndroid Build Coastguard Worker;; TODO: When building for optsize we should use vperm2f128.
317*9880d681SAndroid Build Coastguard Worker
318*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz01(<4 x double> %a) {
319*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz01:
320*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
321*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
322*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
323*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
324*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
325*9880d681SAndroid Build Coastguard Worker}
326*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz01_optsize(<4 x double> %a) optsize {
327*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz01_optsize:
328*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
329*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
330*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
331*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
332*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
333*9880d681SAndroid Build Coastguard Worker}
334*9880d681SAndroid Build Coastguard Worker
335*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz23(<4 x double> %a) {
336*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz23:
337*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
338*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
339*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
340*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
341*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
342*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
343*9880d681SAndroid Build Coastguard Worker}
344*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz23_optsize(<4 x double> %a) optsize {
345*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz23_optsize:
346*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
347*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
348*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
349*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
350*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
351*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
352*9880d681SAndroid Build Coastguard Worker}
353*9880d681SAndroid Build Coastguard Worker
354*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz45(<4 x double> %a) {
355*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz45:
356*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
357*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
358*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
359*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
360*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
361*9880d681SAndroid Build Coastguard Worker}
362*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz45_optsize(<4 x double> %a) optsize {
363*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz45_optsize:
364*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
365*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
366*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
367*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
368*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
369*9880d681SAndroid Build Coastguard Worker}
370*9880d681SAndroid Build Coastguard Worker
371*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz67(<4 x double> %a) {
372*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz67:
373*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
374*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
375*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
376*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
377*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
378*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
379*9880d681SAndroid Build Coastguard Worker}
380*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_zz67_optsize(<4 x double> %a) optsize {
381*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_zz67_optsize:
382*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
383*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
384*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
385*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
386*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
387*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
388*9880d681SAndroid Build Coastguard Worker}
389*9880d681SAndroid Build Coastguard Worker
390*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_01zz(<4 x double> %a) {
391*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_01zz:
392*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
393*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
394*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
395*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
396*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
397*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
398*9880d681SAndroid Build Coastguard Worker}
399*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_01zz_optsize(<4 x double> %a) optsize {
400*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_01zz_optsize:
401*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
402*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
403*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
404*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
405*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
406*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
407*9880d681SAndroid Build Coastguard Worker}
408*9880d681SAndroid Build Coastguard Worker
409*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_23zz(<4 x double> %a) {
410*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_23zz:
411*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
412*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
413*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
414*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
415*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
416*9880d681SAndroid Build Coastguard Worker}
417*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_23zz_optsize(<4 x double> %a) optsize {
418*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_23zz_optsize:
419*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
420*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
421*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
422*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
423*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
424*9880d681SAndroid Build Coastguard Worker}
425*9880d681SAndroid Build Coastguard Worker
426*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_45zz(<4 x double> %a) {
427*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_45zz:
428*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
429*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
430*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
431*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
432*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
433*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
434*9880d681SAndroid Build Coastguard Worker}
435*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_45zz_optsize(<4 x double> %a) optsize {
436*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_45zz_optsize:
437*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
438*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
439*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
440*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
441*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
442*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
443*9880d681SAndroid Build Coastguard Worker}
444*9880d681SAndroid Build Coastguard Worker
445*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_67zz(<4 x double> %a) {
446*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_67zz:
447*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
448*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
449*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
450*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
451*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
452*9880d681SAndroid Build Coastguard Worker}
453*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @shuffle_v4f64_67zz_optsize(<4 x double> %a) optsize {
454*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: shuffle_v4f64_67zz_optsize:
455*9880d681SAndroid Build Coastguard Worker; ALL:       ## BB#0:
456*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
457*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
458*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
459*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %s
460*9880d681SAndroid Build Coastguard Worker}
461*9880d681SAndroid Build Coastguard Worker
462*9880d681SAndroid Build Coastguard Worker;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
463*9880d681SAndroid Build Coastguard Worker
464*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) {
465*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: shuffle_v4i64_67zz:
466*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0:
467*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
468*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
469*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
470*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
471*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
472*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
473*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
474*9880d681SAndroid Build Coastguard Worker;
475*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: shuffle_v4i64_67zz:
476*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0:
477*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
478*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
479*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
480*9880d681SAndroid Build Coastguard Worker  %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
481*9880d681SAndroid Build Coastguard Worker  %c = add <4 x i64> %b, %s
482*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %c
483*9880d681SAndroid Build Coastguard Worker}
484*9880d681SAndroid Build Coastguard Worker
485*9880d681SAndroid Build Coastguard Worker;;; Memory folding cases
486*9880d681SAndroid Build Coastguard Worker
487*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @ld0_hi0_lo1_4f64(<4 x double> * %pa, <4 x double> %b) nounwind uwtable readnone ssp {
488*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_4f64:
489*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
493*9880d681SAndroid Build Coastguard Worker;
494*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_4f64:
495*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
496*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
497*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
498*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
499*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
500*9880d681SAndroid Build Coastguard Workerentry:
501*9880d681SAndroid Build Coastguard Worker  %a = load <4 x double>, <4 x double> * %pa
502*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
503*9880d681SAndroid Build Coastguard Worker  %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0>
504*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
505*9880d681SAndroid Build Coastguard Worker}
506*9880d681SAndroid Build Coastguard Worker
507*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @ld1_hi0_hi1_4f64(<4 x double> %a, <4 x double> * %pb) nounwind uwtable readnone ssp {
508*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_4f64:
509*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
510*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
511*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vaddpd {{.*}}(%rip), %ymm0, %ymm0
512*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
513*9880d681SAndroid Build Coastguard Worker;
514*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_4f64:
515*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
516*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
517*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
518*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
519*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
520*9880d681SAndroid Build Coastguard Workerentry:
521*9880d681SAndroid Build Coastguard Worker  %b = load <4 x double>, <4 x double> * %pb
522*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
523*9880d681SAndroid Build Coastguard Worker  %res = fadd <4 x double> %shuffle, <double 1.0, double 1.0, double 1.0, double 1.0>
524*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
525*9880d681SAndroid Build Coastguard Worker}
526*9880d681SAndroid Build Coastguard Worker
527*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @ld0_hi0_lo1_8f32(<8 x float> * %pa, <8 x float> %b) nounwind uwtable readnone ssp {
528*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_8f32:
529*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
530*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
531*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
532*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
533*9880d681SAndroid Build Coastguard Worker;
534*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_8f32:
535*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
536*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
540*9880d681SAndroid Build Coastguard Workerentry:
541*9880d681SAndroid Build Coastguard Worker  %a = load <8 x float>, <8 x float> * %pa
542*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
543*9880d681SAndroid Build Coastguard Worker  %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
544*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
545*9880d681SAndroid Build Coastguard Worker}
546*9880d681SAndroid Build Coastguard Worker
547*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @ld1_hi0_hi1_8f32(<8 x float> %a, <8 x float> * %pb) nounwind uwtable readnone ssp {
548*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_8f32:
549*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
550*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
551*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
552*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
553*9880d681SAndroid Build Coastguard Worker;
554*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_8f32:
555*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
556*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
557*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
558*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
559*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
560*9880d681SAndroid Build Coastguard Workerentry:
561*9880d681SAndroid Build Coastguard Worker  %b = load <8 x float>, <8 x float> * %pb
562*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
563*9880d681SAndroid Build Coastguard Worker  %res = fadd <8 x float> %shuffle, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
564*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
565*9880d681SAndroid Build Coastguard Worker}
566*9880d681SAndroid Build Coastguard Worker
567*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @ld0_hi0_lo1_4i64(<4 x i64> * %pa, <4 x i64> %b) nounwind uwtable readnone ssp {
568*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_4i64:
569*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
570*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
571*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm1
572*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
573*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
574*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
575*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
576*9880d681SAndroid Build Coastguard Worker;
577*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_4i64:
578*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
579*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
580*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
581*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
582*9880d681SAndroid Build Coastguard Workerentry:
583*9880d681SAndroid Build Coastguard Worker  %a = load <4 x i64>, <4 x i64> * %pa
584*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
585*9880d681SAndroid Build Coastguard Worker  %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4>
586*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
587*9880d681SAndroid Build Coastguard Worker}
588*9880d681SAndroid Build Coastguard Worker
589*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @ld1_hi0_hi1_4i64(<4 x i64> %a, <4 x i64> * %pb) nounwind uwtable readnone ssp {
590*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_4i64:
591*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
592*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
593*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm1
594*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
595*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
596*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
597*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
598*9880d681SAndroid Build Coastguard Worker;
599*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_4i64:
600*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
601*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
602*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddq {{.*}}(%rip), %ymm0, %ymm0
603*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
604*9880d681SAndroid Build Coastguard Workerentry:
605*9880d681SAndroid Build Coastguard Worker  %b = load <4 x i64>, <4 x i64> * %pb
606*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
607*9880d681SAndroid Build Coastguard Worker  %res = add <4 x i64> %shuffle, <i64 1, i64 2, i64 3, i64 4>
608*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
609*9880d681SAndroid Build Coastguard Worker}
610*9880d681SAndroid Build Coastguard Worker
611*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @ld0_hi0_lo1_8i32(<8 x i32> * %pa, <8 x i32> %b) nounwind uwtable readnone ssp {
612*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld0_hi0_lo1_8i32:
613*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
614*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
615*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
616*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,2,3,4]
617*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
618*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
619*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
620*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
621*9880d681SAndroid Build Coastguard Worker;
622*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld0_hi0_lo1_8i32:
623*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
624*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = mem[2,3],ymm0[0,1]
625*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
626*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
627*9880d681SAndroid Build Coastguard Workerentry:
628*9880d681SAndroid Build Coastguard Worker  %a = load <8 x i32>, <8 x i32> * %pa
629*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
630*9880d681SAndroid Build Coastguard Worker  %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
631*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
632*9880d681SAndroid Build Coastguard Worker}
633*9880d681SAndroid Build Coastguard Worker
634*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @ld1_hi0_hi1_8i32(<8 x i32> %a, <8 x i32> * %pb) nounwind uwtable readnone ssp {
635*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: ld1_hi0_hi1_8i32:
636*9880d681SAndroid Build Coastguard Worker; AVX1:       ## BB#0: ## %entry
637*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
638*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
639*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,2,3,4]
640*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
641*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
642*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
643*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
644*9880d681SAndroid Build Coastguard Worker;
645*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: ld1_hi0_hi1_8i32:
646*9880d681SAndroid Build Coastguard Worker; AVX2:       ## BB#0: ## %entry
647*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
648*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
649*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
650*9880d681SAndroid Build Coastguard Workerentry:
651*9880d681SAndroid Build Coastguard Worker  %b = load <8 x i32>, <8 x i32> * %pb
652*9880d681SAndroid Build Coastguard Worker  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
653*9880d681SAndroid Build Coastguard Worker  %res = add <8 x i32> %shuffle, <i32 1, i32 2, i32 3, i32 4, i32 1, i32 2, i32 3, i32 4>
654*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %res
655*9880d681SAndroid Build Coastguard Worker}
656