xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx2-builtins.c
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_abs_epi8(<4 x i64> %a0) {
8*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_abs_epi8:
9*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
10*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpabsb %ymm0, %ymm0
11*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_abs_epi8:
14*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
15*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpabsb %ymm0, %ymm0
16*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
17*9880d681SAndroid Build Coastguard Worker  %arg = bitcast <4 x i64> %a0 to <32 x i8>
18*9880d681SAndroid Build Coastguard Worker  %call = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %arg)
19*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %call to <4 x i64>
20*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
21*9880d681SAndroid Build Coastguard Worker}
22*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
23*9880d681SAndroid Build Coastguard Worker
24*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_abs_epi16(<4 x i64> %a0) {
25*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_abs_epi16:
26*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
27*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpabsw %ymm0, %ymm0
28*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
29*9880d681SAndroid Build Coastguard Worker;
30*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_abs_epi16:
31*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
32*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpabsw %ymm0, %ymm0
33*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
34*9880d681SAndroid Build Coastguard Worker  %arg = bitcast <4 x i64> %a0 to <16 x i16>
35*9880d681SAndroid Build Coastguard Worker  %call = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %arg)
36*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %call to <4 x i64>
37*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
38*9880d681SAndroid Build Coastguard Worker}
39*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
40*9880d681SAndroid Build Coastguard Worker
41*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_abs_epi32(<4 x i64> %a0) {
42*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_abs_epi32:
43*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
44*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpabsd %ymm0, %ymm0
45*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
46*9880d681SAndroid Build Coastguard Worker;
47*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_abs_epi32:
48*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
49*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpabsd %ymm0, %ymm0
50*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
51*9880d681SAndroid Build Coastguard Worker  %arg = bitcast <4 x i64> %a0 to <8 x i32>
52*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %arg)
53*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %call to <4 x i64>
54*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
55*9880d681SAndroid Build Coastguard Worker}
56*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_add_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
59*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_add_epi8:
60*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
61*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
62*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
63*9880d681SAndroid Build Coastguard Worker;
64*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_add_epi8:
65*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
66*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
67*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
68*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
69*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
70*9880d681SAndroid Build Coastguard Worker  %res = add <32 x i8> %arg0, %arg1
71*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
72*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
73*9880d681SAndroid Build Coastguard Worker}
74*9880d681SAndroid Build Coastguard Worker
75*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_add_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
76*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_add_epi16:
77*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
78*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
79*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
80*9880d681SAndroid Build Coastguard Worker;
81*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_add_epi16:
82*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
83*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
84*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
85*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
86*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
87*9880d681SAndroid Build Coastguard Worker  %res = add <16 x i16> %arg0, %arg1
88*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
89*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
90*9880d681SAndroid Build Coastguard Worker}
91*9880d681SAndroid Build Coastguard Worker
92*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_add_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
93*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_add_epi32:
94*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
95*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
96*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
97*9880d681SAndroid Build Coastguard Worker;
98*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_add_epi32:
99*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
100*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
101*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
102*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
103*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
104*9880d681SAndroid Build Coastguard Worker  %res = add <8 x i32> %arg0, %arg1
105*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
106*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
107*9880d681SAndroid Build Coastguard Worker}
108*9880d681SAndroid Build Coastguard Worker
109*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_add_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
110*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_add_epi64:
111*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
112*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
113*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
114*9880d681SAndroid Build Coastguard Worker;
115*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_add_epi64:
116*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
117*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
118*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
119*9880d681SAndroid Build Coastguard Worker  %res = add <4 x i64> %a0, %a1
120*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_adds_epi8(<4 x i64> %a0, <4 x i64> %a1) {
124*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_adds_epi8:
125*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
126*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0
127*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
128*9880d681SAndroid Build Coastguard Worker;
129*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_adds_epi8:
130*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
131*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0
132*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
133*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
134*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
135*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %arg0, <32 x i8> %arg1)
136*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
137*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
138*9880d681SAndroid Build Coastguard Worker}
139*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
140*9880d681SAndroid Build Coastguard Worker
141*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_adds_epi16(<4 x i64> %a0, <4 x i64> %a1) {
142*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_adds_epi16:
143*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
144*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0
145*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
146*9880d681SAndroid Build Coastguard Worker;
147*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_adds_epi16:
148*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
149*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0
150*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
151*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
152*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
153*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %arg0, <16 x i16> %arg1)
154*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
155*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
156*9880d681SAndroid Build Coastguard Worker}
157*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
158*9880d681SAndroid Build Coastguard Worker
159*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_adds_epu8(<4 x i64> %a0, <4 x i64> %a1) {
160*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_adds_epu8:
161*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
162*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0
163*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
164*9880d681SAndroid Build Coastguard Worker;
165*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_adds_epu8:
166*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
167*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0
168*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
169*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
170*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
171*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %arg0, <32 x i8> %arg1)
172*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
173*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
174*9880d681SAndroid Build Coastguard Worker}
175*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
176*9880d681SAndroid Build Coastguard Worker
177*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_adds_epu16(<4 x i64> %a0, <4 x i64> %a1) {
178*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_adds_epu16:
179*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
180*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0
181*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
182*9880d681SAndroid Build Coastguard Worker;
183*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_adds_epu16:
184*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
185*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0
186*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
187*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
188*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
189*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %arg0, <16 x i16> %arg1)
190*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
191*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
192*9880d681SAndroid Build Coastguard Worker}
193*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
194*9880d681SAndroid Build Coastguard Worker
195*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) {
196*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_alignr_epi8:
197*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
198*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
199*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
200*9880d681SAndroid Build Coastguard Worker;
201*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_alignr_epi8:
202*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
203*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1],ymm0[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17]
204*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
205*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
206*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
207*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
208*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
209*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
210*9880d681SAndroid Build Coastguard Worker}
211*9880d681SAndroid Build Coastguard Worker
212*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test2_mm256_alignr_epi8(<4 x i64> %a0, <4 x i64> %a1) {
213*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test2_mm256_alignr_epi8:
214*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
215*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
216*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
217*9880d681SAndroid Build Coastguard Worker;
218*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test2_mm256_alignr_epi8:
219*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
220*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16]
221*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
222*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
223*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
224*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
225*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
226*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
227*9880d681SAndroid Build Coastguard Worker}
228*9880d681SAndroid Build Coastguard Worker
229*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_and_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
230*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_and_si256:
231*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
232*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vandps %ymm1, %ymm0, %ymm0
233*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
234*9880d681SAndroid Build Coastguard Worker;
235*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_and_si256:
236*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
237*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vandps %ymm1, %ymm0, %ymm0
238*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
239*9880d681SAndroid Build Coastguard Worker  %res = and <4 x i64> %a0, %a1
240*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
241*9880d681SAndroid Build Coastguard Worker}
242*9880d681SAndroid Build Coastguard Worker
243*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_andnot_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
244*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_andnot_si256:
245*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
246*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
247*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpxor %ymm2, %ymm0, %ymm0
248*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpand %ymm1, %ymm0, %ymm0
249*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
250*9880d681SAndroid Build Coastguard Worker;
251*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_andnot_si256:
252*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
253*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
254*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpxor %ymm2, %ymm0, %ymm0
255*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpand %ymm1, %ymm0, %ymm0
256*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
257*9880d681SAndroid Build Coastguard Worker  %not = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
258*9880d681SAndroid Build Coastguard Worker  %res = and <4 x i64> %not, %a1
259*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
260*9880d681SAndroid Build Coastguard Worker}
261*9880d681SAndroid Build Coastguard Worker
262*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_avg_epu8(<4 x i64> %a0, <4 x i64> %a1) {
263*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_avg_epu8:
264*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
265*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
266*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
267*9880d681SAndroid Build Coastguard Worker;
268*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_avg_epu8:
269*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
270*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
271*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
272*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
273*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
274*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %arg0, <32 x i8> %arg1)
275*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
276*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
277*9880d681SAndroid Build Coastguard Worker}
278*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
279*9880d681SAndroid Build Coastguard Worker
280*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_avg_epu16(<4 x i64> %a0, <4 x i64> %a1) {
281*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_avg_epu16:
282*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
283*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
284*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
285*9880d681SAndroid Build Coastguard Worker;
286*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_avg_epu16:
287*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
288*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
289*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
290*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
291*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
292*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %arg0, <16 x i16> %arg1)
293*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
294*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
295*9880d681SAndroid Build Coastguard Worker}
296*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
297*9880d681SAndroid Build Coastguard Worker
298*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_blend_epi16(<4 x i64> %a0, <4 x i64> %a1) {
299*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_blend_epi16:
300*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
301*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
302*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
303*9880d681SAndroid Build Coastguard Worker;
304*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_blend_epi16:
305*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
306*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15]
307*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
308*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
309*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
310*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
311*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %shuf to <4 x i64>
312*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
313*9880d681SAndroid Build Coastguard Worker}
314*9880d681SAndroid Build Coastguard Worker
315*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_blend_epi32(<2 x i64> %a0, <2 x i64> %a1) {
316*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_blend_epi32:
317*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
318*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
319*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
320*9880d681SAndroid Build Coastguard Worker;
321*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_blend_epi32:
322*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
323*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
324*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
325*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
326*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
327*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
328*9880d681SAndroid Build Coastguard Worker  %res = bitcast <4 x i32> %shuf to <2 x i64>
329*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
330*9880d681SAndroid Build Coastguard Worker}
331*9880d681SAndroid Build Coastguard Worker
332*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_blend_epi32(<4 x i64> %a0, <4 x i64> %a1) {
333*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_blend_epi32:
334*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
335*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
336*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
337*9880d681SAndroid Build Coastguard Worker;
338*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_blend_epi32:
339*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
340*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5],ymm0[6,7]
341*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
342*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
343*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
344*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
345*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %shuf to <4 x i64>
346*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
347*9880d681SAndroid Build Coastguard Worker}
348*9880d681SAndroid Build Coastguard Worker
349*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_blendv_epi8(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
350*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_blendv_epi8:
351*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
352*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
353*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
354*9880d681SAndroid Build Coastguard Worker;
355*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_blendv_epi8:
356*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
357*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
358*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
359*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
360*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
361*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <4 x i64> %a2 to <32 x i8>
362*9880d681SAndroid Build Coastguard Worker  %call = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %arg0, <32 x i8> %arg1, <32 x i8> %arg2)
363*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %call to <4 x i64>
364*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
365*9880d681SAndroid Build Coastguard Worker}
366*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
367*9880d681SAndroid Build Coastguard Worker
368*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) {
369*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastb_epi8:
370*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
371*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastb %xmm0, %xmm0
372*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
373*9880d681SAndroid Build Coastguard Worker;
374*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastb_epi8:
375*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
376*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
377*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
378*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
379*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer
380*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i8> %shuf to <2 x i64>
381*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
382*9880d681SAndroid Build Coastguard Worker}
383*9880d681SAndroid Build Coastguard Worker
384*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastb_epi8(<4 x i64> %a0) {
385*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastb_epi8:
386*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
387*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastb %xmm0, %ymm0
388*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
389*9880d681SAndroid Build Coastguard Worker;
390*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastb_epi8:
391*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
392*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
393*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
394*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
395*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> %arg0, <32 x i8> undef, <32 x i32> zeroinitializer
396*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
397*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
398*9880d681SAndroid Build Coastguard Worker}
399*9880d681SAndroid Build Coastguard Worker
400*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) {
401*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastd_epi32:
402*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
403*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %xmm0
404*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
405*9880d681SAndroid Build Coastguard Worker;
406*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastd_epi32:
407*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
408*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %xmm0
409*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
410*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
411*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
412*9880d681SAndroid Build Coastguard Worker  %res = bitcast <4 x i32> %shuf to <2 x i64>
413*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
414*9880d681SAndroid Build Coastguard Worker}
415*9880d681SAndroid Build Coastguard Worker
416*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastd_epi32(<4 x i64> %a0) {
417*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastd_epi32:
418*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
419*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %ymm0
420*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
421*9880d681SAndroid Build Coastguard Worker;
422*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastd_epi32:
423*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
424*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %ymm0
425*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
426*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
427*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> zeroinitializer
428*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %shuf to <4 x i64>
429*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
430*9880d681SAndroid Build Coastguard Worker}
431*9880d681SAndroid Build Coastguard Worker
432*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
433*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastq_epi64:
434*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
435*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm0, %xmm0
436*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
437*9880d681SAndroid Build Coastguard Worker;
438*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastq_epi64:
439*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
440*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
441*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
442*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
443*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
444*9880d681SAndroid Build Coastguard Worker}
445*9880d681SAndroid Build Coastguard Worker
446*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastq_epi64(<4 x i64> %a0) {
447*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastq_epi64:
448*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
449*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
450*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
451*9880d681SAndroid Build Coastguard Worker;
452*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastq_epi64:
453*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
454*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
455*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
456*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
457*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
458*9880d681SAndroid Build Coastguard Worker}
459*9880d681SAndroid Build Coastguard Worker
460*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_broadcastsd_pd(<2 x double> %a0) {
461*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastsd_pd:
462*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
463*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
464*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
465*9880d681SAndroid Build Coastguard Worker;
466*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastsd_pd:
467*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
468*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
469*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
470*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
471*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
472*9880d681SAndroid Build Coastguard Worker}
473*9880d681SAndroid Build Coastguard Worker
474*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_broadcastsd_pd(<4 x double> %a0) {
475*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastsd_pd:
476*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
477*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
478*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
479*9880d681SAndroid Build Coastguard Worker;
480*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastsd_pd:
481*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
482*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
483*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
484*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> zeroinitializer
485*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
486*9880d681SAndroid Build Coastguard Worker}
487*9880d681SAndroid Build Coastguard Worker
488*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastsi128_si256(<4 x i64> %a0) {
489*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastsi128_si256:
490*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
491*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
492*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
493*9880d681SAndroid Build Coastguard Worker;
494*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastsi128_si256:
495*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
496*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
497*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
498*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
499*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
500*9880d681SAndroid Build Coastguard Worker}
501*9880d681SAndroid Build Coastguard Worker
502*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) {
503*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastss_ps:
504*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
505*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %xmm0
506*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
507*9880d681SAndroid Build Coastguard Worker;
508*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastss_ps:
509*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
510*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %xmm0
511*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
512*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
513*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
514*9880d681SAndroid Build Coastguard Worker}
515*9880d681SAndroid Build Coastguard Worker
516*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_broadcastss_ps(<8 x float> %a0) {
517*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastss_ps:
518*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
519*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %ymm0
520*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
521*9880d681SAndroid Build Coastguard Worker;
522*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastss_ps:
523*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
524*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %ymm0
525*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
526*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
527*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
528*9880d681SAndroid Build Coastguard Worker}
529*9880d681SAndroid Build Coastguard Worker
530*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) {
531*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastw_epi16:
532*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
533*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
534*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
535*9880d681SAndroid Build Coastguard Worker;
536*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastw_epi16:
537*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
538*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
539*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
540*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
541*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer
542*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i16> %shuf to <2 x i64>
543*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
544*9880d681SAndroid Build Coastguard Worker}
545*9880d681SAndroid Build Coastguard Worker
546*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastw_epi16(<4 x i64> %a0) {
547*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastw_epi16:
548*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
549*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastw %xmm0, %ymm0
550*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
551*9880d681SAndroid Build Coastguard Worker;
552*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastw_epi16:
553*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
554*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
555*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
556*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
557*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> zeroinitializer
558*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %shuf to <4 x i64>
559*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
560*9880d681SAndroid Build Coastguard Worker}
561*9880d681SAndroid Build Coastguard Worker
562*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_bslli_epi128(<4 x i64> %a0) {
563*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_bslli_epi128:
564*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
565*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
566*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
567*9880d681SAndroid Build Coastguard Worker;
568*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_bslli_epi128:
569*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
570*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
571*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
572*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
573*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> zeroinitializer, <32 x i8> %arg0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
574*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
575*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
576*9880d681SAndroid Build Coastguard Worker}
577*9880d681SAndroid Build Coastguard Worker
578*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_bsrli_epi128(<4 x i64> %a0) {
579*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_bsrli_epi128:
580*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
581*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero
582*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
583*9880d681SAndroid Build Coastguard Worker;
584*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_bsrli_epi128:
585*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
586*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero
587*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
588*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
589*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> %arg0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
590*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
591*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
592*9880d681SAndroid Build Coastguard Worker}
593*9880d681SAndroid Build Coastguard Worker
594*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpeq_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
595*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpeq_epi8:
596*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
597*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
598*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
599*9880d681SAndroid Build Coastguard Worker;
600*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpeq_epi8:
601*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
602*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
603*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
604*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
605*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
606*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq <32 x i8> %arg0, %arg1
607*9880d681SAndroid Build Coastguard Worker  %res = sext <32 x i1> %cmp to <32 x i8>
608*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
609*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
610*9880d681SAndroid Build Coastguard Worker}
611*9880d681SAndroid Build Coastguard Worker
612*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpeq_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
613*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpeq_epi16:
614*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
615*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
616*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
617*9880d681SAndroid Build Coastguard Worker;
618*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpeq_epi16:
619*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
620*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
621*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
622*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
623*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
624*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq <16 x i16> %arg0, %arg1
625*9880d681SAndroid Build Coastguard Worker  %res = sext <16 x i1> %cmp to <16 x i16>
626*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
627*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
628*9880d681SAndroid Build Coastguard Worker}
629*9880d681SAndroid Build Coastguard Worker
630*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpeq_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
631*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpeq_epi32:
632*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
633*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
634*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
635*9880d681SAndroid Build Coastguard Worker;
636*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpeq_epi32:
637*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
638*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
639*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
640*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
641*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
642*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq <8 x i32> %arg0, %arg1
643*9880d681SAndroid Build Coastguard Worker  %res = sext <8 x i1> %cmp to <8 x i32>
644*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
645*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
646*9880d681SAndroid Build Coastguard Worker}
647*9880d681SAndroid Build Coastguard Worker
648*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpeq_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
649*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpeq_epi64:
650*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
651*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
652*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
653*9880d681SAndroid Build Coastguard Worker;
654*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpeq_epi64:
655*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
656*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
657*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
658*9880d681SAndroid Build Coastguard Worker  %cmp = icmp eq <4 x i64> %a0, %a1
659*9880d681SAndroid Build Coastguard Worker  %res = sext <4 x i1> %cmp to <4 x i64>
660*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
661*9880d681SAndroid Build Coastguard Worker}
662*9880d681SAndroid Build Coastguard Worker
663*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpgt_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
664*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpgt_epi8:
665*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
666*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
667*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
668*9880d681SAndroid Build Coastguard Worker;
669*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpgt_epi8:
670*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
671*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %ymm0
672*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
673*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
674*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
675*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <32 x i8> %arg0, %arg1
676*9880d681SAndroid Build Coastguard Worker  %res = sext <32 x i1> %cmp to <32 x i8>
677*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
678*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
679*9880d681SAndroid Build Coastguard Worker}
680*9880d681SAndroid Build Coastguard Worker
681*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpgt_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
682*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpgt_epi16:
683*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
684*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
685*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
686*9880d681SAndroid Build Coastguard Worker;
687*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpgt_epi16:
688*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
689*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %ymm0
690*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
691*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
692*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
693*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <16 x i16> %arg0, %arg1
694*9880d681SAndroid Build Coastguard Worker  %res = sext <16 x i1> %cmp to <16 x i16>
695*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
696*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
697*9880d681SAndroid Build Coastguard Worker}
698*9880d681SAndroid Build Coastguard Worker
699*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpgt_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
700*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpgt_epi32:
701*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
702*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
703*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
704*9880d681SAndroid Build Coastguard Worker;
705*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpgt_epi32:
706*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
707*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
708*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
709*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
710*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
711*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <8 x i32> %arg0, %arg1
712*9880d681SAndroid Build Coastguard Worker  %res = sext <8 x i1> %cmp to <8 x i32>
713*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
714*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
715*9880d681SAndroid Build Coastguard Worker}
716*9880d681SAndroid Build Coastguard Worker
717*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cmpgt_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
718*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cmpgt_epi64:
719*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
720*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
721*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
722*9880d681SAndroid Build Coastguard Worker;
723*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cmpgt_epi64:
724*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
725*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
726*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
727*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <4 x i64> %a0, %a1
728*9880d681SAndroid Build Coastguard Worker  %res = sext <4 x i1> %cmp to <4 x i64>
729*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
730*9880d681SAndroid Build Coastguard Worker}
731*9880d681SAndroid Build Coastguard Worker
732*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi8_epi16(<2 x i64> %a0) {
733*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi8_epi16:
734*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
735*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxbw %xmm0, %ymm0
736*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
737*9880d681SAndroid Build Coastguard Worker;
738*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi8_epi16:
739*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
740*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
741*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
742*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
743*9880d681SAndroid Build Coastguard Worker  %ext = sext <16 x i8> %arg0 to <16 x i16>
744*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %ext to <4 x i64>
745*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
746*9880d681SAndroid Build Coastguard Worker}
747*9880d681SAndroid Build Coastguard Worker
748*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) {
749*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi8_epi32:
750*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
751*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxbd %xmm0, %ymm0
752*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
753*9880d681SAndroid Build Coastguard Worker;
754*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi8_epi32:
755*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
756*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxbd %xmm0, %ymm0
757*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
758*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
759*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
760*9880d681SAndroid Build Coastguard Worker  %ext = sext <8 x i8> %shuf to <8 x i32>
761*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %ext to <4 x i64>
762*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
763*9880d681SAndroid Build Coastguard Worker}
764*9880d681SAndroid Build Coastguard Worker
765*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) {
766*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi8_epi64:
767*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
768*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxbq %xmm0, %ymm0
769*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
770*9880d681SAndroid Build Coastguard Worker;
771*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi8_epi64:
772*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
773*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxbq %xmm0, %ymm0
774*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
775*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
776*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
777*9880d681SAndroid Build Coastguard Worker  %ext = sext <4 x i8> %shuf to <4 x i64>
778*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
779*9880d681SAndroid Build Coastguard Worker}
780*9880d681SAndroid Build Coastguard Worker
781*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) {
782*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi16_epi32:
783*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
784*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxwd %xmm0, %ymm0
785*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
786*9880d681SAndroid Build Coastguard Worker;
787*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi16_epi32:
788*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
789*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
790*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
791*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
792*9880d681SAndroid Build Coastguard Worker  %ext = sext <8 x i16> %arg0 to <8 x i32>
793*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %ext to <4 x i64>
794*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
795*9880d681SAndroid Build Coastguard Worker}
796*9880d681SAndroid Build Coastguard Worker
797*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) {
798*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi16_epi64:
799*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
800*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxwq %xmm0, %ymm0
801*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
802*9880d681SAndroid Build Coastguard Worker;
803*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi16_epi64:
804*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
805*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxwq %xmm0, %ymm0
806*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
807*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
808*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
809*9880d681SAndroid Build Coastguard Worker  %ext = sext <4 x i16> %shuf to <4 x i64>
810*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
811*9880d681SAndroid Build Coastguard Worker}
812*9880d681SAndroid Build Coastguard Worker
813*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) {
814*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepi32_epi64:
815*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
816*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovsxdq %xmm0, %ymm0
817*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
818*9880d681SAndroid Build Coastguard Worker;
819*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepi32_epi64:
820*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
821*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
822*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
823*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
824*9880d681SAndroid Build Coastguard Worker  %ext = sext <4 x i32> %arg0 to <4 x i64>
825*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
826*9880d681SAndroid Build Coastguard Worker}
827*9880d681SAndroid Build Coastguard Worker
828*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) {
829*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu8_epi16:
830*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
831*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
832*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
833*9880d681SAndroid Build Coastguard Worker;
834*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu8_epi16:
835*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
836*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
837*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
838*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
839*9880d681SAndroid Build Coastguard Worker  %ext = zext <16 x i8> %arg0 to <16 x i16>
840*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %ext to <4 x i64>
841*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
842*9880d681SAndroid Build Coastguard Worker}
843*9880d681SAndroid Build Coastguard Worker
844*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) {
845*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu8_epi32:
846*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
847*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
848*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
849*9880d681SAndroid Build Coastguard Worker;
850*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu8_epi32:
851*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
852*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
853*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
854*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
855*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
856*9880d681SAndroid Build Coastguard Worker  %ext = zext <8 x i8> %shuf to <8 x i32>
857*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %ext to <4 x i64>
858*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
859*9880d681SAndroid Build Coastguard Worker}
860*9880d681SAndroid Build Coastguard Worker
861*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) {
862*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu8_epi64:
863*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
864*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
865*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
866*9880d681SAndroid Build Coastguard Worker;
867*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu8_epi64:
868*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
869*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
870*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
871*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
872*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
873*9880d681SAndroid Build Coastguard Worker  %ext = zext <4 x i8> %shuf to <4 x i64>
874*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
875*9880d681SAndroid Build Coastguard Worker}
876*9880d681SAndroid Build Coastguard Worker
877*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) {
878*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu16_epi32:
879*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
880*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
881*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
882*9880d681SAndroid Build Coastguard Worker;
883*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu16_epi32:
884*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
885*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
886*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
887*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
888*9880d681SAndroid Build Coastguard Worker  %ext = zext <8 x i16> %arg0 to <8 x i32>
889*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %ext to <4 x i64>
890*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
891*9880d681SAndroid Build Coastguard Worker}
892*9880d681SAndroid Build Coastguard Worker
893*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) {
894*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu16_epi64:
895*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
896*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
897*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
898*9880d681SAndroid Build Coastguard Worker;
899*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu16_epi64:
900*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
901*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
902*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
903*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
904*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
905*9880d681SAndroid Build Coastguard Worker  %ext = zext <4 x i16> %shuf to <4 x i64>
906*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
907*9880d681SAndroid Build Coastguard Worker}
908*9880d681SAndroid Build Coastguard Worker
909*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) {
910*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_cvtepu32_epi64:
911*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
912*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
913*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
914*9880d681SAndroid Build Coastguard Worker;
915*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_cvtepu32_epi64:
916*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
917*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
918*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
919*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
920*9880d681SAndroid Build Coastguard Worker  %ext = zext <4 x i32> %arg0 to <4 x i64>
921*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %ext
922*9880d681SAndroid Build Coastguard Worker}
923*9880d681SAndroid Build Coastguard Worker
924*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm256_extracti128_si256(<4 x i64> %a0) nounwind {
925*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_extracti128_si256:
926*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
927*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vextractf128 $1, %ymm0, %xmm0
928*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
929*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
930*9880d681SAndroid Build Coastguard Worker;
931*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_extracti128_si256:
932*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
933*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vextractf128 $1, %ymm0, %xmm0
934*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
935*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
936*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 2, i32 3>
937*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
938*9880d681SAndroid Build Coastguard Worker}
939*9880d681SAndroid Build Coastguard Worker
940*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hadd_epi16(<4 x i64> %a0, <4 x i64> %a1) {
941*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hadd_epi16:
942*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
943*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphaddw %ymm1, %ymm0, %ymm0
944*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
945*9880d681SAndroid Build Coastguard Worker;
946*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hadd_epi16:
947*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
948*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphaddw %ymm1, %ymm0, %ymm0
949*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
950*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
951*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
952*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %arg0, <16 x i16> %arg1)
953*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
954*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
955*9880d681SAndroid Build Coastguard Worker}
956*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
957*9880d681SAndroid Build Coastguard Worker
958*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hadd_epi32(<4 x i64> %a0, <4 x i64> %a1) {
959*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hadd_epi32:
960*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
961*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
962*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
963*9880d681SAndroid Build Coastguard Worker;
964*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hadd_epi32:
965*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
966*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
967*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
968*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
969*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
970*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %arg0, <8 x i32> %arg1)
971*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
972*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
973*9880d681SAndroid Build Coastguard Worker}
974*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
975*9880d681SAndroid Build Coastguard Worker
976*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hadds_epi16(<4 x i64> %a0, <4 x i64> %a1) {
977*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hadds_epi16:
978*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
979*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0
980*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
981*9880d681SAndroid Build Coastguard Worker;
982*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hadds_epi16:
983*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
984*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0
985*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
986*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
987*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
988*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %arg0, <16 x i16> %arg1)
989*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
990*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
991*9880d681SAndroid Build Coastguard Worker}
992*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
993*9880d681SAndroid Build Coastguard Worker
994*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hsub_epi16(<4 x i64> %a0, <4 x i64> %a1) {
995*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hsub_epi16:
996*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
997*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphsubw %ymm1, %ymm0, %ymm0
998*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
999*9880d681SAndroid Build Coastguard Worker;
1000*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hsub_epi16:
1001*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1002*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphsubw %ymm1, %ymm0, %ymm0
1003*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1004*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
1005*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
1006*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %arg0, <16 x i16> %arg1)
1007*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
1008*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1009*9880d681SAndroid Build Coastguard Worker}
1010*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
1011*9880d681SAndroid Build Coastguard Worker
1012*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hsub_epi32(<4 x i64> %a0, <4 x i64> %a1) {
1013*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hsub_epi32:
1014*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1015*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphsubd %ymm1, %ymm0, %ymm0
1016*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1017*9880d681SAndroid Build Coastguard Worker;
1018*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hsub_epi32:
1019*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1020*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphsubd %ymm1, %ymm0, %ymm0
1021*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1022*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
1023*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1024*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %arg0, <8 x i32> %arg1)
1025*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
1026*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1027*9880d681SAndroid Build Coastguard Worker}
1028*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
1029*9880d681SAndroid Build Coastguard Worker
1030*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_hsubs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
1031*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_hsubs_epi16:
1032*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1033*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0
1034*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1035*9880d681SAndroid Build Coastguard Worker;
1036*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_hsubs_epi16:
1037*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1038*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0
1039*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1040*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
1041*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
1042*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %arg0, <16 x i16> %arg1)
1043*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
1044*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1045*9880d681SAndroid Build Coastguard Worker}
1046*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
1047*9880d681SAndroid Build Coastguard Worker
1048*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_i32gather_epi32(i32 *%a0, <2 x i64> %a1) {
1049*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i32gather_epi32:
1050*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1051*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1052*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1053*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdd %xmm2, (%eax,%xmm0,2), %xmm1
1054*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %xmm1, %xmm0
1055*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1056*9880d681SAndroid Build Coastguard Worker;
1057*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i32gather_epi32:
1058*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1059*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1060*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm0,2), %xmm1
1061*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %xmm1, %xmm0
1062*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1063*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32 *%a0 to i8*
1064*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1065*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
1066*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> undef, i8* %arg0, <4 x i32> %arg1, <4 x i32> %mask, i8 2)
1067*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1068*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1069*9880d681SAndroid Build Coastguard Worker}
1070*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
1071*9880d681SAndroid Build Coastguard Worker
1072*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_i32gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
1073*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i32gather_epi32:
1074*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1075*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1076*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0
1077*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1078*9880d681SAndroid Build Coastguard Worker;
1079*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i32gather_epi32:
1080*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1081*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
1082*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1083*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1084*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i32 *%a1 to i8*
1085*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1086*9880d681SAndroid Build Coastguard Worker  %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
1087*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %arg0, i8* %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i8 2)
1088*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1089*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1090*9880d681SAndroid Build Coastguard Worker}
1091*9880d681SAndroid Build Coastguard Worker
1092*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_i32gather_epi32(i32 *%a0, <4 x i64> %a1) {
1093*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i32gather_epi32:
1094*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1095*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1096*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1097*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdd %ymm2, (%eax,%ymm0,2), %ymm1
1098*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %ymm1, %ymm0
1099*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1100*9880d681SAndroid Build Coastguard Worker;
1101*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i32gather_epi32:
1102*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1103*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1104*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm0,2), %ymm1
1105*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %ymm1, %ymm0
1106*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1107*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32 *%a0 to i8*
1108*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1109*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i32>
1110*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> undef, i8* %arg0, <8 x i32> %arg1, <8 x i32> %mask, i8 2)
1111*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %call to <4 x i64>
1112*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1113*9880d681SAndroid Build Coastguard Worker}
1114*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
1115*9880d681SAndroid Build Coastguard Worker
1116*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_i32gather_epi32(<4 x i64> %a0, i32 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
1117*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i32gather_epi32:
1118*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1119*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1120*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0
1121*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1122*9880d681SAndroid Build Coastguard Worker;
1123*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i32gather_epi32:
1124*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1125*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
1126*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1127*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
1128*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i32 *%a1 to i8*
1129*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
1130*9880d681SAndroid Build Coastguard Worker  %arg3 = bitcast <4 x i64> %a3 to <8 x i32>
1131*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %arg0, i8* %arg1, <8 x i32> %arg2, <8 x i32> %arg3, i8 2)
1132*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %call to <4 x i64>
1133*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1134*9880d681SAndroid Build Coastguard Worker}
1135*9880d681SAndroid Build Coastguard Worker
1136*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
1137*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i32gather_epi64:
1138*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1139*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1140*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1141*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdq %xmm2, (%eax,%xmm0,2), %xmm1
1142*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %xmm1, %xmm0
1143*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1144*9880d681SAndroid Build Coastguard Worker;
1145*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i32gather_epi64:
1146*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1147*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1148*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm0,2), %xmm1
1149*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %xmm1, %xmm0
1150*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1151*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64 *%a0 to i8*
1152*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1153*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> undef, i8* %arg0, <4 x i32> %arg1, <2 x i64> <i64 -1, i64 -1>, i8 2)
1154*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1155*9880d681SAndroid Build Coastguard Worker}
1156*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
1157*9880d681SAndroid Build Coastguard Worker
1158*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_i32gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
1159*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i32gather_epi64:
1160*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1161*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1162*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0
1163*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1164*9880d681SAndroid Build Coastguard Worker;
1165*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i32gather_epi64:
1166*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1167*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
1168*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1169*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i64 *%a1 to i8*
1170*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1171*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <2 x i64> %a3, i8 2)
1172*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1173*9880d681SAndroid Build Coastguard Worker}
1174*9880d681SAndroid Build Coastguard Worker
1175*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_i32gather_epi64(i64 *%a0, <2 x i64> %a1) {
1176*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i32gather_epi64:
1177*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1178*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1179*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1180*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdq %ymm2, (%eax,%xmm0,2), %ymm1
1181*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %ymm1, %ymm0
1182*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1183*9880d681SAndroid Build Coastguard Worker;
1184*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i32gather_epi64:
1185*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1186*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1187*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm0,2), %ymm1
1188*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %ymm1, %ymm0
1189*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1190*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64 *%a0 to i8*
1191*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1192*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> undef, i8* %arg0, <4 x i32> %arg1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
1193*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
1194*9880d681SAndroid Build Coastguard Worker}
1195*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
1196*9880d681SAndroid Build Coastguard Worker
1197*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_i32gather_epi64(<4 x i64> %a0, i64 *%a1, <2 x i64> %a2, <4 x i64> %a3) {
1198*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i32gather_epi64:
1199*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1200*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1201*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0
1202*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1203*9880d681SAndroid Build Coastguard Worker;
1204*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i32gather_epi64:
1205*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1206*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
1207*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1208*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i64 *%a1 to i8*
1209*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1210*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %arg1, <4 x i32> %arg2, <4 x i64> %a3, i8 2)
1211*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
1212*9880d681SAndroid Build Coastguard Worker}
1213*9880d681SAndroid Build Coastguard Worker
1214*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_i32gather_pd(double *%a0, <2 x i64> %a1) {
1215*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i32gather_pd:
1216*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1217*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1218*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1219*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdpd %xmm2, (%eax,%xmm0,2), %xmm1
1220*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %xmm1, %xmm0
1221*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1222*9880d681SAndroid Build Coastguard Worker;
1223*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i32gather_pd:
1224*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1225*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1226*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm0,2), %xmm1
1227*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %xmm1, %xmm0
1228*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1229*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast double *%a0 to i8*
1230*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1231*9880d681SAndroid Build Coastguard Worker  %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
1232*9880d681SAndroid Build Coastguard Worker  %sext = sext <2 x i1> %cmp to <2 x i64>
1233*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <2 x i64> %sext to <2 x double>
1234*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> undef, i8* %arg0, <4 x i32> %arg1, <2 x double> %mask, i8 2)
1235*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
1236*9880d681SAndroid Build Coastguard Worker}
1237*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
1238*9880d681SAndroid Build Coastguard Worker
1239*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_mask_i32gather_pd(<2 x double> %a0, double *%a1, <2 x i64> %a2, <2 x double> %a3) {
1240*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i32gather_pd:
1241*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1242*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1243*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0
1244*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1245*9880d681SAndroid Build Coastguard Worker;
1246*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i32gather_pd:
1247*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1248*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
1249*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1250*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast double *%a1 to i8*
1251*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1252*9880d681SAndroid Build Coastguard Worker  %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %arg1, <4 x i32> %arg2, <2 x double> %a3, i8 2)
1253*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
1254*9880d681SAndroid Build Coastguard Worker}
1255*9880d681SAndroid Build Coastguard Worker
1256*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_i32gather_pd(double *%a0, <2 x i64> %a1) {
1257*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i32gather_pd:
1258*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1259*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1260*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1261*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
1262*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdpd %ymm2, (%eax,%xmm0,2), %ymm1
1263*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %ymm1, %ymm0
1264*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1265*9880d681SAndroid Build Coastguard Worker;
1266*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i32gather_pd:
1267*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1268*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1269*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
1270*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm0,2), %ymm1
1271*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %ymm1, %ymm0
1272*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1273*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast double *%a0 to i8*
1274*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1275*9880d681SAndroid Build Coastguard Worker  %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
1276*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> undef, i8* %arg0, <4 x i32> %arg1, <4 x double> %mask, i8 2)
1277*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
1278*9880d681SAndroid Build Coastguard Worker}
1279*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
1280*9880d681SAndroid Build Coastguard Worker
1281*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_i32gather_pd(<4 x double> %a0, double *%a1, <2 x i64> %a2, <4 x double> %a3) {
1282*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i32gather_pd:
1283*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1284*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1285*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0
1286*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1287*9880d681SAndroid Build Coastguard Worker;
1288*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i32gather_pd:
1289*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1290*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0
1291*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1292*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast double *%a1 to i8*
1293*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1294*9880d681SAndroid Build Coastguard Worker  %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %arg1, <4 x i32> %arg2, <4 x double> %a3, i8 2)
1295*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
1296*9880d681SAndroid Build Coastguard Worker}
1297*9880d681SAndroid Build Coastguard Worker
1298*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_i32gather_ps(float *%a0, <2 x i64> %a1) {
1299*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i32gather_ps:
1300*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1301*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1302*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1303*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdps %xmm2, (%eax,%xmm0,2), %xmm1
1304*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %xmm1, %xmm0
1305*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1306*9880d681SAndroid Build Coastguard Worker;
1307*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i32gather_ps:
1308*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1309*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1310*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm0,2), %xmm1
1311*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %xmm1, %xmm0
1312*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1313*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float *%a0 to i8*
1314*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1315*9880d681SAndroid Build Coastguard Worker  %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
1316*9880d681SAndroid Build Coastguard Worker  %sext = sext <4 x i1> %cmp to <4 x i32>
1317*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <4 x i32> %sext to <4 x float>
1318*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> undef, i8* %arg0, <4 x i32> %arg1, <4 x float> %mask, i8 2)
1319*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1320*9880d681SAndroid Build Coastguard Worker}
1321*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
1322*9880d681SAndroid Build Coastguard Worker
1323*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_i32gather_ps(<4 x float> %a0, float *%a1, <2 x i64> %a2, <4 x float> %a3) {
1324*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i32gather_ps:
1325*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1326*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1327*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0
1328*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1329*9880d681SAndroid Build Coastguard Worker;
1330*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i32gather_ps:
1331*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1332*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
1333*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1334*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast float *%a1 to i8*
1335*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1336*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %arg1, <4 x i32> %arg2, <4 x float> %a3, i8 2)
1337*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1338*9880d681SAndroid Build Coastguard Worker}
1339*9880d681SAndroid Build Coastguard Worker
1340*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_i32gather_ps(float *%a0, <4 x i64> %a1) {
1341*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i32gather_ps:
1342*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1343*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1344*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1345*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2
1346*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdps %ymm2, (%eax,%ymm0,2), %ymm1
1347*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %ymm1, %ymm0
1348*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1349*9880d681SAndroid Build Coastguard Worker;
1350*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i32gather_ps:
1351*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1352*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorps %ymm1, %ymm1, %ymm1
1353*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vcmpeqps %ymm1, %ymm1, %ymm2
1354*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm0,2), %ymm1
1355*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %ymm1, %ymm0
1356*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1357*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float *%a0 to i8*
1358*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1359*9880d681SAndroid Build Coastguard Worker  %mask = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> zeroinitializer, i8 0)
1360*9880d681SAndroid Build Coastguard Worker  %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arg0, <8 x i32> %arg1, <8 x float> %mask, i8 2)
1361*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %call
1362*9880d681SAndroid Build Coastguard Worker}
1363*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
1364*9880d681SAndroid Build Coastguard Worker
1365*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_mask_i32gather_ps(<8 x float> %a0, float *%a1, <4 x i64> %a2, <8 x float> %a3) {
1366*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i32gather_ps:
1367*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1368*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1369*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0
1370*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1371*9880d681SAndroid Build Coastguard Worker;
1372*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i32gather_ps:
1373*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1374*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0
1375*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1376*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast float *%a1 to i8*
1377*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
1378*9880d681SAndroid Build Coastguard Worker  %call = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %arg1, <8 x i32> %arg2, <8 x float> %a3, i8 2)
1379*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %call
1380*9880d681SAndroid Build Coastguard Worker}
1381*9880d681SAndroid Build Coastguard Worker
1382*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_i64gather_epi32(i32 *%a0, <2 x i64> %a1) {
1383*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i64gather_epi32:
1384*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1385*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1386*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1387*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqd %xmm2, (%eax,%xmm0,2), %xmm1
1388*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %xmm1, %xmm0
1389*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1390*9880d681SAndroid Build Coastguard Worker;
1391*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i64gather_epi32:
1392*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1393*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1394*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm0,2), %xmm1
1395*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %xmm1, %xmm0
1396*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1397*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32 *%a0 to i8*
1398*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
1399*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> undef, i8* %arg0, <2 x i64> %a1, <4 x i32> %mask, i8 2)
1400*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1401*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1402*9880d681SAndroid Build Coastguard Worker}
1403*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
1404*9880d681SAndroid Build Coastguard Worker
1405*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
1406*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i64gather_epi32:
1407*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1408*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1409*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0
1410*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1411*9880d681SAndroid Build Coastguard Worker;
1412*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i64gather_epi32:
1413*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1414*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
1415*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1416*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1417*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i32 *%a1 to i8*
1418*9880d681SAndroid Build Coastguard Worker  %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
1419*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %arg0, i8* %arg1, <2 x i64> %a2, <4 x i32> %arg3, i8 2)
1420*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1421*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1422*9880d681SAndroid Build Coastguard Worker}
1423*9880d681SAndroid Build Coastguard Worker
1424*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm256_i64gather_epi32(i32 *%a0, <4 x i64> %a1) {
1425*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i64gather_epi32:
1426*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1427*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1428*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1429*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqd %xmm2, (%eax,%ymm0,2), %xmm1
1430*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %xmm1, %xmm0
1431*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1432*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1433*9880d681SAndroid Build Coastguard Worker;
1434*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i64gather_epi32:
1435*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1436*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1437*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm0,2), %xmm1
1438*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %xmm1, %xmm0
1439*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1440*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1441*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32 *%a0 to i8*
1442*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
1443*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> undef, i8* %arg0, <4 x i64> %a1, <4 x i32> %mask, i8 2)
1444*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1445*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1446*9880d681SAndroid Build Coastguard Worker}
1447*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
1448*9880d681SAndroid Build Coastguard Worker
1449*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm256_mask_i64gather_epi32(<2 x i64> %a0, i32 *%a1, <4 x i64> %a2, <2 x i64> %a3) {
1450*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i64gather_epi32:
1451*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1452*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1453*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0
1454*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1455*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1456*9880d681SAndroid Build Coastguard Worker;
1457*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i64gather_epi32:
1458*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1459*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
1460*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1461*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1462*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
1463*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i32 *%a1 to i8*
1464*9880d681SAndroid Build Coastguard Worker  %arg3 = bitcast <2 x i64> %a3 to <4 x i32>
1465*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %arg0, i8* %arg1, <4 x i64> %a2, <4 x i32> %arg3, i8 2)
1466*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1467*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1468*9880d681SAndroid Build Coastguard Worker}
1469*9880d681SAndroid Build Coastguard Worker
1470*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_i64gather_epi64(i64 *%a0, <2 x i64> %a1) {
1471*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i64gather_epi64:
1472*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1473*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1474*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1475*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqq %xmm2, (%eax,%xmm0,2), %xmm1
1476*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %xmm1, %xmm0
1477*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1478*9880d681SAndroid Build Coastguard Worker;
1479*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i64gather_epi64:
1480*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1481*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1482*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm0,2), %xmm1
1483*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %xmm1, %xmm0
1484*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1485*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64 *%a0 to i8*
1486*9880d681SAndroid Build Coastguard Worker  %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> undef, i8* %arg0, <2 x i64> %a1, <2 x i64> <i64 -1, i64 -1>, i8 2)
1487*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %call
1488*9880d681SAndroid Build Coastguard Worker}
1489*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
1490*9880d681SAndroid Build Coastguard Worker
1491*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_i64gather_epi64(<2 x i64> %a0, i64 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
1492*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i64gather_epi64:
1493*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1494*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1495*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0
1496*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1497*9880d681SAndroid Build Coastguard Worker;
1498*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i64gather_epi64:
1499*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1500*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
1501*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1502*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i64 *%a1 to i8*
1503*9880d681SAndroid Build Coastguard Worker  %call = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %arg1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
1504*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %call
1505*9880d681SAndroid Build Coastguard Worker}
1506*9880d681SAndroid Build Coastguard Worker
1507*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_i64gather_epi64(i64 *%a0, <4 x i64> %a1) {
1508*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i64gather_epi64:
1509*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1510*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1511*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1512*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqq %ymm2, (%eax,%ymm0,2), %ymm1
1513*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovdqa %ymm1, %ymm0
1514*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1515*9880d681SAndroid Build Coastguard Worker;
1516*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i64gather_epi64:
1517*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1518*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
1519*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm0,2), %ymm1
1520*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovdqa %ymm1, %ymm0
1521*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1522*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64 *%a0 to i8*
1523*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> undef, i8* %arg0, <4 x i64> %a1, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, i8 2)
1524*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %call
1525*9880d681SAndroid Build Coastguard Worker}
1526*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
1527*9880d681SAndroid Build Coastguard Worker
1528*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_i64gather_epi64(<4 x i64> %a0, i64 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
1529*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i64gather_epi64:
1530*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1531*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1532*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0
1533*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1534*9880d681SAndroid Build Coastguard Worker;
1535*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i64gather_epi64:
1536*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1537*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
1538*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1539*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i64 *%a1 to i8*
1540*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %arg1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
1541*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %call
1542*9880d681SAndroid Build Coastguard Worker}
1543*9880d681SAndroid Build Coastguard Worker
1544*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_i64gather_pd(double *%a0, <2 x i64> %a1) {
1545*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i64gather_pd:
1546*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1547*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1548*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1549*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqpd %xmm2, (%eax,%xmm0,2), %xmm1
1550*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %xmm1, %xmm0
1551*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1552*9880d681SAndroid Build Coastguard Worker;
1553*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i64gather_pd:
1554*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1555*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1556*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm0,2), %xmm1
1557*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %xmm1, %xmm0
1558*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1559*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast double *%a0 to i8*
1560*9880d681SAndroid Build Coastguard Worker  %cmp = fcmp oeq <2 x double> zeroinitializer, zeroinitializer
1561*9880d681SAndroid Build Coastguard Worker  %sext = sext <2 x i1> %cmp to <2 x i64>
1562*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <2 x i64> %sext to <2 x double>
1563*9880d681SAndroid Build Coastguard Worker  %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> undef, i8* %arg0, <2 x i64> %a1, <2 x double> %mask, i8 2)
1564*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %call
1565*9880d681SAndroid Build Coastguard Worker}
1566*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
1567*9880d681SAndroid Build Coastguard Worker
1568*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_mask_i64gather_pd(<2 x double> %a0, double *%a1, <2 x i64> %a2, <2 x double> %a3) {
1569*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i64gather_pd:
1570*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1571*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1572*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0
1573*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1574*9880d681SAndroid Build Coastguard Worker;
1575*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i64gather_pd:
1576*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1577*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
1578*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1579*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast double *%a1 to i8*
1580*9880d681SAndroid Build Coastguard Worker  %call = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %arg1, <2 x i64> %a2, <2 x double> %a3, i8 2)
1581*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %call
1582*9880d681SAndroid Build Coastguard Worker}
1583*9880d681SAndroid Build Coastguard Worker
1584*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_i64gather_pd(double *%a0, <4 x i64> %a1) {
1585*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i64gather_pd:
1586*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1587*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1588*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1589*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
1590*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqpd %ymm2, (%eax,%ymm0,2), %ymm1
1591*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovapd %ymm1, %ymm0
1592*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1593*9880d681SAndroid Build Coastguard Worker;
1594*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i64gather_pd:
1595*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1596*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
1597*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vcmpeqpd %ymm1, %ymm1, %ymm2
1598*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm0,2), %ymm1
1599*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovapd %ymm1, %ymm0
1600*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1601*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast double *%a0 to i8*
1602*9880d681SAndroid Build Coastguard Worker  %mask = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> zeroinitializer, i8 0)
1603*9880d681SAndroid Build Coastguard Worker  %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> undef, i8* %arg0, <4 x i64> %a1, <4 x double> %mask, i8 2)
1604*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %call
1605*9880d681SAndroid Build Coastguard Worker}
1606*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
1607*9880d681SAndroid Build Coastguard Worker
1608*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_i64gather_pd(<4 x double> %a0, i64 *%a1, <4 x i64> %a2, <4 x double> %a3) {
1609*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i64gather_pd:
1610*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1611*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1612*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0
1613*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1614*9880d681SAndroid Build Coastguard Worker;
1615*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i64gather_pd:
1616*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1617*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0
1618*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1619*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i64 *%a1 to i8*
1620*9880d681SAndroid Build Coastguard Worker  %call = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %arg1, <4 x i64> %a2, <4 x double> %a3, i8 2)
1621*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %call
1622*9880d681SAndroid Build Coastguard Worker}
1623*9880d681SAndroid Build Coastguard Worker
1624*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_i64gather_ps(float *%a0, <2 x i64> %a1) {
1625*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_i64gather_ps:
1626*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1627*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1628*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1629*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqps %xmm2, (%eax,%xmm0,2), %xmm1
1630*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %xmm1, %xmm0
1631*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1632*9880d681SAndroid Build Coastguard Worker;
1633*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_i64gather_ps:
1634*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1635*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1636*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm0,2), %xmm1
1637*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %xmm1, %xmm0
1638*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1639*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float *%a0 to i8*
1640*9880d681SAndroid Build Coastguard Worker  %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
1641*9880d681SAndroid Build Coastguard Worker  %sext = sext <4 x i1> %cmp to <4 x i32>
1642*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <4 x i32> %sext to <4 x float>
1643*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> undef, i8* %arg0, <2 x i64> %a1, <4 x float> %mask, i8 2)
1644*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1645*9880d681SAndroid Build Coastguard Worker}
1646*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
1647*9880d681SAndroid Build Coastguard Worker
1648*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_i64gather_ps(<4 x float> %a0, float *%a1, <2 x i64> %a2, <4 x float> %a3) {
1649*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_i64gather_ps:
1650*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1651*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1652*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0
1653*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1654*9880d681SAndroid Build Coastguard Worker;
1655*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_i64gather_ps:
1656*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1657*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
1658*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1659*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast float *%a1 to i8*
1660*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %arg1, <2 x i64> %a2, <4 x float> %a3, i8 2)
1661*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1662*9880d681SAndroid Build Coastguard Worker}
1663*9880d681SAndroid Build Coastguard Worker
1664*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm256_i64gather_ps(float *%a0, <4 x i64> %a1) {
1665*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_i64gather_ps:
1666*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1667*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1668*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1669*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqps %xmm2, (%eax,%ymm0,2), %xmm1
1670*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovaps %xmm1, %xmm0
1671*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1672*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1673*9880d681SAndroid Build Coastguard Worker;
1674*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_i64gather_ps:
1675*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1676*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
1677*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm0,2), %xmm1
1678*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovaps %xmm1, %xmm0
1679*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1680*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1681*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float *%a0 to i8*
1682*9880d681SAndroid Build Coastguard Worker  %cmp = fcmp oeq <4 x float> zeroinitializer, zeroinitializer
1683*9880d681SAndroid Build Coastguard Worker  %sext = sext <4 x i1> %cmp to <4 x i32>
1684*9880d681SAndroid Build Coastguard Worker  %mask = bitcast <4 x i32> %sext to <4 x float>
1685*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> undef, i8* %arg0, <4 x i64> %a1, <4 x float> %mask, i8 2)
1686*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1687*9880d681SAndroid Build Coastguard Worker}
1688*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
1689*9880d681SAndroid Build Coastguard Worker
1690*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm256_mask_i64gather_ps(<4 x float> %a0, float *%a1, <4 x i64> %a2, <4 x float> %a3) {
1691*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_i64gather_ps:
1692*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1693*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1694*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0
1695*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1696*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1697*9880d681SAndroid Build Coastguard Worker;
1698*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_i64gather_ps:
1699*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1700*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0
1701*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1702*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1703*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast float *%a1 to i8*
1704*9880d681SAndroid Build Coastguard Worker  %call = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %arg1, <4 x i64> %a2, <4 x float> %a3, i8 2)
1705*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %call
1706*9880d681SAndroid Build Coastguard Worker}
1707*9880d681SAndroid Build Coastguard Worker
1708*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test0_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
1709*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test0_mm256_inserti128_si256:
1710*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1711*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
1712*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1713*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1714*9880d681SAndroid Build Coastguard Worker;
1715*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test0_mm256_inserti128_si256:
1716*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1717*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
1718*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
1719*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1720*9880d681SAndroid Build Coastguard Worker  %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1721*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1722*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
1723*9880d681SAndroid Build Coastguard Worker}
1724*9880d681SAndroid Build Coastguard Worker
1725*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test1_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
1726*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test1_mm256_inserti128_si256:
1727*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1728*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1729*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1730*9880d681SAndroid Build Coastguard Worker;
1731*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test1_mm256_inserti128_si256:
1732*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1733*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1734*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1735*9880d681SAndroid Build Coastguard Worker  %ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1736*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1737*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
1738*9880d681SAndroid Build Coastguard Worker}
1739*9880d681SAndroid Build Coastguard Worker
1740*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_madd_epi16(<4 x i64> %a0, <4 x i64> %a1) {
1741*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_madd_epi16:
1742*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1743*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
1744*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1745*9880d681SAndroid Build Coastguard Worker;
1746*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_madd_epi16:
1747*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1748*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
1749*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1750*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
1751*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
1752*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %arg0, <16 x i16> %arg1)
1753*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
1754*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1755*9880d681SAndroid Build Coastguard Worker}
1756*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
1757*9880d681SAndroid Build Coastguard Worker
1758*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maddubs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
1759*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maddubs_epi16:
1760*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1761*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0
1762*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1763*9880d681SAndroid Build Coastguard Worker;
1764*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maddubs_epi16:
1765*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1766*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0
1767*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1768*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
1769*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
1770*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %arg0, <32 x i8> %arg1)
1771*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
1772*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1773*9880d681SAndroid Build Coastguard Worker}
1774*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
1775*9880d681SAndroid Build Coastguard Worker
1776*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_maskload_epi32(i32* %a0, <2 x i64> %a1) nounwind {
1777*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskload_epi32:
1778*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1779*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1780*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0
1781*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1782*9880d681SAndroid Build Coastguard Worker;
1783*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskload_epi32:
1784*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1785*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0
1786*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1787*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32* %a0 to i8*
1788*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1789*9880d681SAndroid Build Coastguard Worker  %call = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %arg0, <4 x i32> %arg1)
1790*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %call to <2 x i64>
1791*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
1792*9880d681SAndroid Build Coastguard Worker}
1793*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
1794*9880d681SAndroid Build Coastguard Worker
1795*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maskload_epi32(i32* %a0, <4 x i64> %a1) nounwind {
1796*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskload_epi32:
1797*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1798*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1799*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0
1800*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1801*9880d681SAndroid Build Coastguard Worker;
1802*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskload_epi32:
1803*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1804*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
1805*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1806*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i32* %a0 to i8*
1807*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1808*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %arg0, <8 x i32> %arg1)
1809*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %call to <4 x i64>
1810*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1811*9880d681SAndroid Build Coastguard Worker}
1812*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
1813*9880d681SAndroid Build Coastguard Worker
1814*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_maskload_epi64(i64* %a0, <2 x i64> %a1) nounwind {
1815*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskload_epi64:
1816*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1817*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1818*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0
1819*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1820*9880d681SAndroid Build Coastguard Worker;
1821*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskload_epi64:
1822*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1823*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0
1824*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1825*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64* %a0 to i8*
1826*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %arg0, <2 x i64> %a1)
1827*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
1828*9880d681SAndroid Build Coastguard Worker}
1829*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
1830*9880d681SAndroid Build Coastguard Worker
1831*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maskload_epi64(i64* %a0, <4 x i64> %a1) nounwind {
1832*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskload_epi64:
1833*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1834*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1835*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0
1836*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1837*9880d681SAndroid Build Coastguard Worker;
1838*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskload_epi64:
1839*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1840*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
1841*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1842*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64* %a0 to i8*
1843*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %arg0, <4 x i64> %a1)
1844*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
1845*9880d681SAndroid Build Coastguard Worker}
1846*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
1847*9880d681SAndroid Build Coastguard Worker
1848*9880d681SAndroid Build Coastguard Workerdefine void @test_mm_maskstore_epi32(float* %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
1849*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskstore_epi32:
1850*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1851*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1852*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax)
1853*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1854*9880d681SAndroid Build Coastguard Worker;
1855*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskstore_epi32:
1856*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1857*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
1858*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1859*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float* %a0 to i8*
1860*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
1861*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
1862*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx2.maskstore.d(i8* %arg0, <4 x i32> %arg1, <4 x i32> %arg2)
1863*9880d681SAndroid Build Coastguard Worker  ret void
1864*9880d681SAndroid Build Coastguard Worker}
1865*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind readnone
1866*9880d681SAndroid Build Coastguard Worker
1867*9880d681SAndroid Build Coastguard Workerdefine void @test_mm256_maskstore_epi32(float* %a0, <4 x i64> %a1, <4 x i64> %a2) nounwind {
1868*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskstore_epi32:
1869*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1870*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1871*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax)
1872*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1873*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1874*9880d681SAndroid Build Coastguard Worker;
1875*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskstore_epi32:
1876*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1877*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
1878*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1879*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1880*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast float* %a0 to i8*
1881*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1882*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <4 x i64> %a2 to <8 x i32>
1883*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx2.maskstore.d.256(i8* %arg0, <8 x i32> %arg1, <8 x i32> %arg2)
1884*9880d681SAndroid Build Coastguard Worker  ret void
1885*9880d681SAndroid Build Coastguard Worker}
1886*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind readnone
1887*9880d681SAndroid Build Coastguard Worker
1888*9880d681SAndroid Build Coastguard Workerdefine void @test_mm_maskstore_epi64(i64* %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind {
1889*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskstore_epi64:
1890*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1891*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1892*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax)
1893*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1894*9880d681SAndroid Build Coastguard Worker;
1895*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskstore_epi64:
1896*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1897*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi)
1898*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1899*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64* %a0 to i8*
1900*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx2.maskstore.q(i8* %arg0, <2 x i64> %a1, <2 x i64> %a2)
1901*9880d681SAndroid Build Coastguard Worker  ret void
1902*9880d681SAndroid Build Coastguard Worker}
1903*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind readnone
1904*9880d681SAndroid Build Coastguard Worker
1905*9880d681SAndroid Build Coastguard Workerdefine void @test_mm256_maskstore_epi64(i64* %a0, <4 x i64> %a1, <4 x i64> %a2) nounwind {
1906*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskstore_epi64:
1907*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1908*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
1909*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax)
1910*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
1911*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1912*9880d681SAndroid Build Coastguard Worker;
1913*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskstore_epi64:
1914*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1915*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi)
1916*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
1917*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1918*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i64* %a0 to i8*
1919*9880d681SAndroid Build Coastguard Worker  call void @llvm.x86.avx2.maskstore.q.256(i8* %arg0, <4 x i64> %a1, <4 x i64> %a2)
1920*9880d681SAndroid Build Coastguard Worker  ret void
1921*9880d681SAndroid Build Coastguard Worker}
1922*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind readnone
1923*9880d681SAndroid Build Coastguard Worker
1924*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epi8(<4 x i64> %a0, <4 x i64> %a1) {
1925*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epi8:
1926*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1927*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1928*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1929*9880d681SAndroid Build Coastguard Worker;
1930*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epi8:
1931*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1932*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
1933*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1934*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
1935*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
1936*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <32 x i8> %arg0, %arg1
1937*9880d681SAndroid Build Coastguard Worker  %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
1938*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %sel to <4 x i64>
1939*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1940*9880d681SAndroid Build Coastguard Worker}
1941*9880d681SAndroid Build Coastguard Worker
1942*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epi16(<4 x i64> %a0, <4 x i64> %a1) {
1943*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epi16:
1944*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1945*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1946*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1947*9880d681SAndroid Build Coastguard Worker;
1948*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epi16:
1949*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1950*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
1951*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1952*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
1953*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
1954*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <16 x i16> %arg0, %arg1
1955*9880d681SAndroid Build Coastguard Worker  %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
1956*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %sel to <4 x i64>
1957*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1958*9880d681SAndroid Build Coastguard Worker}
1959*9880d681SAndroid Build Coastguard Worker
1960*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epi32(<4 x i64> %a0, <4 x i64> %a1) {
1961*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epi32:
1962*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1963*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1964*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1965*9880d681SAndroid Build Coastguard Worker;
1966*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epi32:
1967*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1968*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
1969*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1970*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
1971*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
1972*9880d681SAndroid Build Coastguard Worker  %cmp = icmp sgt <8 x i32> %arg0, %arg1
1973*9880d681SAndroid Build Coastguard Worker  %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
1974*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %sel to <4 x i64>
1975*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1976*9880d681SAndroid Build Coastguard Worker}
1977*9880d681SAndroid Build Coastguard Worker
1978*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epu8(<4 x i64> %a0, <4 x i64> %a1) {
1979*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epu8:
1980*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1981*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
1982*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1983*9880d681SAndroid Build Coastguard Worker;
1984*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epu8:
1985*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1986*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
1987*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1988*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
1989*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
1990*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ugt <32 x i8> %arg0, %arg1
1991*9880d681SAndroid Build Coastguard Worker  %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
1992*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %sel to <4 x i64>
1993*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
1994*9880d681SAndroid Build Coastguard Worker}
1995*9880d681SAndroid Build Coastguard Worker
1996*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epu16(<4 x i64> %a0, <4 x i64> %a1) {
1997*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epu16:
1998*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1999*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
2000*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2001*9880d681SAndroid Build Coastguard Worker;
2002*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epu16:
2003*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2004*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
2005*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2006*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2007*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2008*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ugt <16 x i16> %arg0, %arg1
2009*9880d681SAndroid Build Coastguard Worker  %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
2010*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %sel to <4 x i64>
2011*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2012*9880d681SAndroid Build Coastguard Worker}
2013*9880d681SAndroid Build Coastguard Worker
2014*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_max_epu32(<4 x i64> %a0, <4 x i64> %a1) {
2015*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_max_epu32:
2016*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2017*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
2018*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2019*9880d681SAndroid Build Coastguard Worker;
2020*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_max_epu32:
2021*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2022*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
2023*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2024*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2025*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2026*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ugt <8 x i32> %arg0, %arg1
2027*9880d681SAndroid Build Coastguard Worker  %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
2028*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %sel to <4 x i64>
2029*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2030*9880d681SAndroid Build Coastguard Worker}
2031*9880d681SAndroid Build Coastguard Worker
2032*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epi8(<4 x i64> %a0, <4 x i64> %a1) {
2033*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epi8:
2034*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2035*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
2036*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2037*9880d681SAndroid Build Coastguard Worker;
2038*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epi8:
2039*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2040*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
2041*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2042*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2043*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2044*9880d681SAndroid Build Coastguard Worker  %cmp = icmp slt <32 x i8> %arg0, %arg1
2045*9880d681SAndroid Build Coastguard Worker  %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
2046*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %sel to <4 x i64>
2047*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2048*9880d681SAndroid Build Coastguard Worker}
2049*9880d681SAndroid Build Coastguard Worker
2050*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2051*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epi16:
2052*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2053*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
2054*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2055*9880d681SAndroid Build Coastguard Worker;
2056*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epi16:
2057*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2058*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
2059*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2060*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2061*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2062*9880d681SAndroid Build Coastguard Worker  %cmp = icmp slt <16 x i16> %arg0, %arg1
2063*9880d681SAndroid Build Coastguard Worker  %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
2064*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %sel to <4 x i64>
2065*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2066*9880d681SAndroid Build Coastguard Worker}
2067*9880d681SAndroid Build Coastguard Worker
2068*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2069*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epi32:
2070*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2071*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2072*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2073*9880d681SAndroid Build Coastguard Worker;
2074*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epi32:
2075*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2076*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
2077*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2078*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2079*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2080*9880d681SAndroid Build Coastguard Worker  %cmp = icmp slt <8 x i32> %arg0, %arg1
2081*9880d681SAndroid Build Coastguard Worker  %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
2082*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %sel to <4 x i64>
2083*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2084*9880d681SAndroid Build Coastguard Worker}
2085*9880d681SAndroid Build Coastguard Worker
2086*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epu8(<4 x i64> %a0, <4 x i64> %a1) {
2087*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epu8:
2088*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2089*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminub %ymm1, %ymm0, %ymm0
2090*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2091*9880d681SAndroid Build Coastguard Worker;
2092*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epu8:
2093*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2094*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm0
2095*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2096*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2097*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2098*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ult <32 x i8> %arg0, %arg1
2099*9880d681SAndroid Build Coastguard Worker  %sel = select <32 x i1> %cmp, <32 x i8> %arg0, <32 x i8> %arg1
2100*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %sel to <4 x i64>
2101*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2102*9880d681SAndroid Build Coastguard Worker}
2103*9880d681SAndroid Build Coastguard Worker
2104*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epu16(<4 x i64> %a0, <4 x i64> %a1) {
2105*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epu16:
2106*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2107*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
2108*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2109*9880d681SAndroid Build Coastguard Worker;
2110*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epu16:
2111*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2112*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
2113*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2114*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2115*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2116*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ult <16 x i16> %arg0, %arg1
2117*9880d681SAndroid Build Coastguard Worker  %sel = select <16 x i1> %cmp, <16 x i16> %arg0, <16 x i16> %arg1
2118*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %sel to <4 x i64>
2119*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2120*9880d681SAndroid Build Coastguard Worker}
2121*9880d681SAndroid Build Coastguard Worker
2122*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_min_epu32(<4 x i64> %a0, <4 x i64> %a1) {
2123*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_min_epu32:
2124*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2125*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpminud %ymm1, %ymm0, %ymm0
2126*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2127*9880d681SAndroid Build Coastguard Worker;
2128*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_min_epu32:
2129*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2130*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm0
2131*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2132*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2133*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2134*9880d681SAndroid Build Coastguard Worker  %cmp = icmp ult <8 x i32> %arg0, %arg1
2135*9880d681SAndroid Build Coastguard Worker  %sel = select <8 x i1> %cmp, <8 x i32> %arg0, <8 x i32> %arg1
2136*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %sel to <4 x i64>
2137*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2138*9880d681SAndroid Build Coastguard Worker}
2139*9880d681SAndroid Build Coastguard Worker
2140*9880d681SAndroid Build Coastguard Workerdefine i32 @test_mm256_movemask_epi8(<4 x i64> %a0) nounwind {
2141*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_movemask_epi8:
2142*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2143*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmovmskb %ymm0, %eax
2144*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vzeroupper
2145*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2146*9880d681SAndroid Build Coastguard Worker;
2147*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_movemask_epi8:
2148*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2149*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmovmskb %ymm0, %eax
2150*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vzeroupper
2151*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2152*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2153*9880d681SAndroid Build Coastguard Worker  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %arg0)
2154*9880d681SAndroid Build Coastguard Worker  ret i32 %res
2155*9880d681SAndroid Build Coastguard Worker}
2156*9880d681SAndroid Build Coastguard Workerdeclare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
2157*9880d681SAndroid Build Coastguard Worker
2158*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mpsadbw_epu8(<4 x i64> %a0, <4 x i64> %a1) {
2159*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mpsadbw_epu8:
2160*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2161*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmpsadbw $3, %ymm1, %ymm0, %ymm0
2162*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2163*9880d681SAndroid Build Coastguard Worker;
2164*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mpsadbw_epu8:
2165*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2166*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmpsadbw $3, %ymm1, %ymm0, %ymm0
2167*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2168*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2169*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2170*9880d681SAndroid Build Coastguard Worker  %call = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %arg0, <32 x i8> %arg1, i8 3)
2171*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16>  %call to <4 x i64>
2172*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2173*9880d681SAndroid Build Coastguard Worker}
2174*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
2175*9880d681SAndroid Build Coastguard Worker
2176*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mul_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2177*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mul_epi32:
2178*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2179*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
2180*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2181*9880d681SAndroid Build Coastguard Worker;
2182*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mul_epi32:
2183*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2184*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
2185*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2186*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2187*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2188*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %arg0, <8 x i32> %arg1)
2189*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2190*9880d681SAndroid Build Coastguard Worker}
2191*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
2192*9880d681SAndroid Build Coastguard Worker
2193*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mul_epu32(<4 x i64> %a0, <4 x i64> %a1) {
2194*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mul_epu32:
2195*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2196*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
2197*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2198*9880d681SAndroid Build Coastguard Worker;
2199*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mul_epu32:
2200*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2201*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
2202*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2203*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2204*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2205*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %arg0, <8 x i32> %arg1)
2206*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2207*9880d681SAndroid Build Coastguard Worker}
2208*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
2209*9880d681SAndroid Build Coastguard Worker
2210*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mulhi_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2211*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mulhi_epi16:
2212*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2213*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0
2214*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2215*9880d681SAndroid Build Coastguard Worker;
2216*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mulhi_epi16:
2217*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2218*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0
2219*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2220*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2221*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2222*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %arg0, <16 x i16> %arg1)
2223*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2224*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2225*9880d681SAndroid Build Coastguard Worker}
2226*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
2227*9880d681SAndroid Build Coastguard Worker
2228*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mulhi_epu16(<4 x i64> %a0, <4 x i64> %a1) {
2229*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mulhi_epu16:
2230*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2231*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0
2232*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2233*9880d681SAndroid Build Coastguard Worker;
2234*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mulhi_epu16:
2235*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2236*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0
2237*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2238*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2239*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2240*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %arg0, <16 x i16> %arg1)
2241*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2242*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2243*9880d681SAndroid Build Coastguard Worker}
2244*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
2245*9880d681SAndroid Build Coastguard Worker
2246*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mulhrs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2247*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mulhrs_epi16:
2248*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2249*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0
2250*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2251*9880d681SAndroid Build Coastguard Worker;
2252*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mulhrs_epi16:
2253*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2254*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0
2255*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2256*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2257*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2258*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %arg0, <16 x i16> %arg1)
2259*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2260*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2261*9880d681SAndroid Build Coastguard Worker}
2262*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
2263*9880d681SAndroid Build Coastguard Worker
2264*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mullo_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2265*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mullo_epi16:
2266*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2267*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
2268*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2269*9880d681SAndroid Build Coastguard Worker;
2270*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mullo_epi16:
2271*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2272*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
2273*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2274*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2275*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2276*9880d681SAndroid Build Coastguard Worker  %res = mul <16 x i16> %arg0, %arg1
2277*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2278*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2279*9880d681SAndroid Build Coastguard Worker}
2280*9880d681SAndroid Build Coastguard Worker
2281*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mullo_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2282*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mullo_epi32:
2283*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2284*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
2285*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2286*9880d681SAndroid Build Coastguard Worker;
2287*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mullo_epi32:
2288*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2289*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpmulld %ymm1, %ymm0, %ymm0
2290*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2291*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2292*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2293*9880d681SAndroid Build Coastguard Worker  %res = mul <8 x i32> %arg0, %arg1
2294*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2295*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2296*9880d681SAndroid Build Coastguard Worker}
2297*9880d681SAndroid Build Coastguard Worker
2298*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_or_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
2299*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_or_si256:
2300*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2301*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vorps %ymm1, %ymm0, %ymm0
2302*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2303*9880d681SAndroid Build Coastguard Worker;
2304*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_or_si256:
2305*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2306*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vorps %ymm1, %ymm0, %ymm0
2307*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2308*9880d681SAndroid Build Coastguard Worker  %res = or <4 x i64> %a0, %a1
2309*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2310*9880d681SAndroid Build Coastguard Worker}
2311*9880d681SAndroid Build Coastguard Worker
2312*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_packs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2313*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_packs_epi16:
2314*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2315*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
2316*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2317*9880d681SAndroid Build Coastguard Worker;
2318*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_packs_epi16:
2319*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2320*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
2321*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2322*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2323*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2324*9880d681SAndroid Build Coastguard Worker  %call = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %arg0, <16 x i16> %arg1)
2325*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %call to <4 x i64>
2326*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2327*9880d681SAndroid Build Coastguard Worker}
2328*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
2329*9880d681SAndroid Build Coastguard Worker
2330*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_packs_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2331*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_packs_epi32:
2332*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2333*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2334*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2335*9880d681SAndroid Build Coastguard Worker;
2336*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_packs_epi32:
2337*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2338*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
2339*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2340*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2341*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2342*9880d681SAndroid Build Coastguard Worker  %call = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %arg0, <8 x i32> %arg1)
2343*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %call to <4 x i64>
2344*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2345*9880d681SAndroid Build Coastguard Worker}
2346*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
2347*9880d681SAndroid Build Coastguard Worker
2348*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_packus_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2349*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_packus_epi16:
2350*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2351*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
2352*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2353*9880d681SAndroid Build Coastguard Worker;
2354*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_packus_epi16:
2355*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2356*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
2357*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2358*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2359*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2360*9880d681SAndroid Build Coastguard Worker  %call = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %arg0, <16 x i16> %arg1)
2361*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %call to <4 x i64>
2362*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2363*9880d681SAndroid Build Coastguard Worker}
2364*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
2365*9880d681SAndroid Build Coastguard Worker
2366*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_packus_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2367*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_packus_epi32:
2368*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2369*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
2370*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2371*9880d681SAndroid Build Coastguard Worker;
2372*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_packus_epi32:
2373*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2374*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0
2375*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2376*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2377*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2378*9880d681SAndroid Build Coastguard Worker  %call = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %arg0, <8 x i32> %arg1)
2379*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %call to <4 x i64>
2380*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2381*9880d681SAndroid Build Coastguard Worker}
2382*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
2383*9880d681SAndroid Build Coastguard Worker
2384*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_permute2x128_si256(<4 x i64> %a0, <4 x i64> %a1) {
2385*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permute2x128_si256:
2386*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2387*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2388*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2389*9880d681SAndroid Build Coastguard Worker;
2390*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permute2x128_si256:
2391*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2392*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
2393*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2394*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 49)
2395*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2396*9880d681SAndroid Build Coastguard Worker}
2397*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
2398*9880d681SAndroid Build Coastguard Worker
2399*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_permute4x64_epi64(<4 x i64> %a0) {
2400*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permute4x64_epi64:
2401*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2402*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,2,0]
2403*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2404*9880d681SAndroid Build Coastguard Worker;
2405*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permute4x64_epi64:
2406*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2407*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,2,0]
2408*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2409*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 0>
2410*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2411*9880d681SAndroid Build Coastguard Worker}
2412*9880d681SAndroid Build Coastguard Worker
2413*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_permute4x64_pd(<4 x double> %a0) {
2414*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permute4x64_pd:
2415*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2416*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,1,0]
2417*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2418*9880d681SAndroid Build Coastguard Worker;
2419*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permute4x64_pd:
2420*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2421*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,1,0]
2422*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2423*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 2, i32 1, i32 0>
2424*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
2425*9880d681SAndroid Build Coastguard Worker}
2426*9880d681SAndroid Build Coastguard Worker
2427*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_permutevar8x32_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2428*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permutevar8x32_epi32:
2429*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2430*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermd %ymm0, %ymm1, %ymm0
2431*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2432*9880d681SAndroid Build Coastguard Worker;
2433*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permutevar8x32_epi32:
2434*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2435*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm0
2436*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2437*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2438*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2439*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %arg0, <8 x i32> %arg1)
2440*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %call to <4 x i64>
2441*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2442*9880d681SAndroid Build Coastguard Worker}
2443*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
2444*9880d681SAndroid Build Coastguard Worker
2445*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_permutevar8x32_ps(<8 x float> %a0, <4 x i64> %a1) {
2446*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permutevar8x32_ps:
2447*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2448*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermps %ymm0, %ymm1, %ymm0
2449*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2450*9880d681SAndroid Build Coastguard Worker;
2451*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permutevar8x32_ps:
2452*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2453*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm0
2454*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2455*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2456*9880d681SAndroid Build Coastguard Worker  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %arg1)
2457*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
2458*9880d681SAndroid Build Coastguard Worker}
2459*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
2460*9880d681SAndroid Build Coastguard Worker
2461*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sad_epu8(<4 x i64> %a0, <4 x i64> %a1) {
2462*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sad_epu8:
2463*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2464*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
2465*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2466*9880d681SAndroid Build Coastguard Worker;
2467*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sad_epu8:
2468*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2469*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
2470*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2471*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2472*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2473*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %arg0, <32 x i8> %arg1)
2474*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2475*9880d681SAndroid Build Coastguard Worker}
2476*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
2477*9880d681SAndroid Build Coastguard Worker
2478*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_shuffle_epi32(<4 x i64> %a0) {
2479*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shuffle_epi32:
2480*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2481*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
2482*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2483*9880d681SAndroid Build Coastguard Worker;
2484*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shuffle_epi32:
2485*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2486*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,0,0,7,7,4,4]
2487*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2488*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2489*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <8 x i32> %arg0, <8 x i32> undef, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
2490*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %shuf to <4 x i64>
2491*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2492*9880d681SAndroid Build Coastguard Worker}
2493*9880d681SAndroid Build Coastguard Worker
2494*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_shuffle_epi8(<4 x i64> %a0, <4 x i64> %a1) {
2495*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shuffle_epi8:
2496*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2497*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
2498*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2499*9880d681SAndroid Build Coastguard Worker;
2500*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shuffle_epi8:
2501*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2502*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
2503*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2504*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2505*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2506*9880d681SAndroid Build Coastguard Worker  %shuf = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %arg0, <32 x i8> %arg1)
2507*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
2508*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2509*9880d681SAndroid Build Coastguard Worker}
2510*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
2511*9880d681SAndroid Build Coastguard Worker
2512*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_shufflehi_epi16(<4 x i64> %a0) {
2513*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shufflehi_epi16:
2514*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2515*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,5,8,9,10,11,15,14,14,13]
2516*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2517*9880d681SAndroid Build Coastguard Worker;
2518*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shufflehi_epi16:
2519*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2520*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,6,5,8,9,10,11,15,14,14,13]
2521*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2522*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2523*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
2524*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %shuf to <4 x i64>
2525*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2526*9880d681SAndroid Build Coastguard Worker}
2527*9880d681SAndroid Build Coastguard Worker
2528*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_shufflelo_epi16(<4 x i64> %a0) {
2529*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shufflelo_epi16:
2530*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2531*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,1,4,5,6,7,11,8,9,9,12,13,14,15]
2532*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2533*9880d681SAndroid Build Coastguard Worker;
2534*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shufflelo_epi16:
2535*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2536*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,1,4,5,6,7,11,8,9,9,12,13,14,15]
2537*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2538*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2539*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <16 x i16> %arg0, <16 x i16> undef, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
2540*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %shuf to <4 x i64>
2541*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2542*9880d681SAndroid Build Coastguard Worker}
2543*9880d681SAndroid Build Coastguard Worker
2544*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sign_epi8(<4 x i64> %a0, <4 x i64> %a1) {
2545*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sign_epi8:
2546*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2547*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsignb %ymm1, %ymm0, %ymm0
2548*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2549*9880d681SAndroid Build Coastguard Worker;
2550*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sign_epi8:
2551*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2552*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsignb %ymm1, %ymm0, %ymm0
2553*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2554*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2555*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
2556*9880d681SAndroid Build Coastguard Worker  %call = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %arg0, <32 x i8> %arg1)
2557*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %call to <4 x i64>
2558*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2559*9880d681SAndroid Build Coastguard Worker}
2560*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
2561*9880d681SAndroid Build Coastguard Worker
2562*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sign_epi16(<4 x i64> %a0, <4 x i64> %a1) {
2563*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sign_epi16:
2564*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2565*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsignw %ymm1, %ymm0, %ymm0
2566*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2567*9880d681SAndroid Build Coastguard Worker;
2568*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sign_epi16:
2569*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2570*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsignw %ymm1, %ymm0, %ymm0
2571*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2572*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2573*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
2574*9880d681SAndroid Build Coastguard Worker  %call = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %arg0, <16 x i16> %arg1)
2575*9880d681SAndroid Build Coastguard Worker  %res = bitcast <16 x i16> %call to <4 x i64>
2576*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2577*9880d681SAndroid Build Coastguard Worker}
2578*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
2579*9880d681SAndroid Build Coastguard Worker
2580*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sign_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2581*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sign_epi32:
2582*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2583*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsignd %ymm1, %ymm0, %ymm0
2584*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2585*9880d681SAndroid Build Coastguard Worker;
2586*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sign_epi32:
2587*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2588*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsignd %ymm1, %ymm0, %ymm0
2589*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2590*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2591*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2592*9880d681SAndroid Build Coastguard Worker  %call = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %arg0, <8 x i32> %arg1)
2593*9880d681SAndroid Build Coastguard Worker  %res = bitcast <8 x i32> %call to <4 x i64>
2594*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2595*9880d681SAndroid Build Coastguard Worker}
2596*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
2597*9880d681SAndroid Build Coastguard Worker
2598*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sll_epi16(<4 x i64> %a0, <2 x i64> %a1) {
2599*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sll_epi16:
2600*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2601*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
2602*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2603*9880d681SAndroid Build Coastguard Worker;
2604*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sll_epi16:
2605*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2606*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
2607*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2608*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2609*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2610*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %arg0, <8 x i16> %arg1)
2611*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2612*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2613*9880d681SAndroid Build Coastguard Worker}
2614*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
2615*9880d681SAndroid Build Coastguard Worker
2616*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sll_epi32(<4 x i64> %a0, <2 x i64> %a1) {
2617*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sll_epi32:
2618*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2619*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpslld %xmm1, %ymm0, %ymm0
2620*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2621*9880d681SAndroid Build Coastguard Worker;
2622*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sll_epi32:
2623*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2624*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm0
2625*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2626*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2627*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2628*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %arg0, <4 x i32> %arg1)
2629*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2630*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2631*9880d681SAndroid Build Coastguard Worker}
2632*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
2633*9880d681SAndroid Build Coastguard Worker
2634*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sll_epi64(<4 x i64> %a0, <2 x i64> %a1) {
2635*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sll_epi64:
2636*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2637*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
2638*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2639*9880d681SAndroid Build Coastguard Worker;
2640*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sll_epi64:
2641*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2642*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
2643*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2644*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
2645*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2646*9880d681SAndroid Build Coastguard Worker}
2647*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
2648*9880d681SAndroid Build Coastguard Worker
2649*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_slli_epi16(<4 x i64> %a0) {
2650*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_slli_epi16:
2651*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2652*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllw $3, %ymm0, %ymm0
2653*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2654*9880d681SAndroid Build Coastguard Worker;
2655*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_slli_epi16:
2656*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2657*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllw $3, %ymm0, %ymm0
2658*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2659*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2660*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %arg0, i32 3)
2661*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2662*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2663*9880d681SAndroid Build Coastguard Worker}
2664*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
2665*9880d681SAndroid Build Coastguard Worker
2666*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_slli_epi32(<4 x i64> %a0) {
2667*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_slli_epi32:
2668*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2669*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpslld $3, %ymm0, %ymm0
2670*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2671*9880d681SAndroid Build Coastguard Worker;
2672*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_slli_epi32:
2673*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2674*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpslld $3, %ymm0, %ymm0
2675*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2676*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2677*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %arg0, i32 3)
2678*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2679*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2680*9880d681SAndroid Build Coastguard Worker}
2681*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
2682*9880d681SAndroid Build Coastguard Worker
2683*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_slli_epi64(<4 x i64> %a0) {
2684*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_slli_epi64:
2685*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2686*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllq $3, %ymm0, %ymm0
2687*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2688*9880d681SAndroid Build Coastguard Worker;
2689*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_slli_epi64:
2690*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2691*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllq $3, %ymm0, %ymm0
2692*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2693*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 3)
2694*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2695*9880d681SAndroid Build Coastguard Worker}
2696*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
2697*9880d681SAndroid Build Coastguard Worker
2698*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_slli_si256(<4 x i64> %a0) {
2699*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_slli_si256:
2700*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2701*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
2702*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2703*9880d681SAndroid Build Coastguard Worker;
2704*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_slli_si256:
2705*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2706*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28]
2707*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2708*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2709*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> zeroinitializer, <32 x i8> %arg0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
2710*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
2711*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2712*9880d681SAndroid Build Coastguard Worker}
2713*9880d681SAndroid Build Coastguard Worker
2714*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_sllv_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2715*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_sllv_epi32:
2716*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2717*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
2718*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2719*9880d681SAndroid Build Coastguard Worker;
2720*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_sllv_epi32:
2721*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2722*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
2723*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2724*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2725*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2726*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %arg0, <4 x i32> %arg1)
2727*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %res to <2 x i64>
2728*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
2729*9880d681SAndroid Build Coastguard Worker}
2730*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
2731*9880d681SAndroid Build Coastguard Worker
2732*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sllv_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2733*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sllv_epi32:
2734*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2735*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
2736*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2737*9880d681SAndroid Build Coastguard Worker;
2738*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sllv_epi32:
2739*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2740*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
2741*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2742*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2743*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2744*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %arg0, <8 x i32> %arg1)
2745*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2746*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2747*9880d681SAndroid Build Coastguard Worker}
2748*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
2749*9880d681SAndroid Build Coastguard Worker
2750*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_sllv_epi64(<2 x i64> %a0, <2 x i64> %a1) {
2751*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_sllv_epi64:
2752*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2753*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
2754*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2755*9880d681SAndroid Build Coastguard Worker;
2756*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_sllv_epi64:
2757*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2758*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
2759*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2760*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
2761*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
2762*9880d681SAndroid Build Coastguard Worker}
2763*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
2764*9880d681SAndroid Build Coastguard Worker
2765*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sllv_epi64(<4 x i64> %a0, <4 x i64> %a1) {
2766*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sllv_epi64:
2767*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2768*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
2769*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2770*9880d681SAndroid Build Coastguard Worker;
2771*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sllv_epi64:
2772*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2773*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
2774*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2775*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
2776*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2777*9880d681SAndroid Build Coastguard Worker}
2778*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
2779*9880d681SAndroid Build Coastguard Worker
2780*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sra_epi16(<4 x i64> %a0, <2 x i64> %a1) {
2781*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sra_epi16:
2782*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2783*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
2784*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2785*9880d681SAndroid Build Coastguard Worker;
2786*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sra_epi16:
2787*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2788*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
2789*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2790*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2791*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2792*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %arg0, <8 x i16> %arg1)
2793*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2794*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2795*9880d681SAndroid Build Coastguard Worker}
2796*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
2797*9880d681SAndroid Build Coastguard Worker
2798*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sra_epi32(<4 x i64> %a0, <2 x i64> %a1) {
2799*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sra_epi32:
2800*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2801*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
2802*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2803*9880d681SAndroid Build Coastguard Worker;
2804*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sra_epi32:
2805*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2806*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
2807*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2808*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2809*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2810*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %arg0, <4 x i32> %arg1)
2811*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2812*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2813*9880d681SAndroid Build Coastguard Worker}
2814*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
2815*9880d681SAndroid Build Coastguard Worker
2816*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srai_epi16(<4 x i64> %a0) {
2817*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srai_epi16:
2818*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2819*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsraw $3, %ymm0, %ymm0
2820*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2821*9880d681SAndroid Build Coastguard Worker;
2822*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srai_epi16:
2823*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2824*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsraw $3, %ymm0, %ymm0
2825*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2826*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2827*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %arg0, i32 3)
2828*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2829*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2830*9880d681SAndroid Build Coastguard Worker}
2831*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
2832*9880d681SAndroid Build Coastguard Worker
2833*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srai_epi32(<4 x i64> %a0) {
2834*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srai_epi32:
2835*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2836*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrad $3, %ymm0, %ymm0
2837*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2838*9880d681SAndroid Build Coastguard Worker;
2839*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srai_epi32:
2840*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2841*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrad $3, %ymm0, %ymm0
2842*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2843*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2844*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %arg0, i32 3)
2845*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2846*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2847*9880d681SAndroid Build Coastguard Worker}
2848*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
2849*9880d681SAndroid Build Coastguard Worker
2850*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_srav_epi32(<2 x i64> %a0, <2 x i64> %a1) {
2851*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_srav_epi32:
2852*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2853*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
2854*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2855*9880d681SAndroid Build Coastguard Worker;
2856*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_srav_epi32:
2857*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2858*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
2859*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2860*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
2861*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2862*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %arg0, <4 x i32> %arg1)
2863*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %res to <2 x i64>
2864*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
2865*9880d681SAndroid Build Coastguard Worker}
2866*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
2867*9880d681SAndroid Build Coastguard Worker
2868*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srav_epi32(<4 x i64> %a0, <4 x i64> %a1) {
2869*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srav_epi32:
2870*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2871*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
2872*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2873*9880d681SAndroid Build Coastguard Worker;
2874*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srav_epi32:
2875*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2876*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
2877*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2878*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2879*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
2880*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %arg0, <8 x i32> %arg1)
2881*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2882*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2883*9880d681SAndroid Build Coastguard Worker}
2884*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
2885*9880d681SAndroid Build Coastguard Worker
2886*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srl_epi16(<4 x i64> %a0, <2 x i64> %a1) {
2887*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srl_epi16:
2888*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2889*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
2890*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2891*9880d681SAndroid Build Coastguard Worker;
2892*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srl_epi16:
2893*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2894*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
2895*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2896*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2897*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
2898*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %arg0, <8 x i16> %arg1)
2899*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2900*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2901*9880d681SAndroid Build Coastguard Worker}
2902*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
2903*9880d681SAndroid Build Coastguard Worker
2904*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srl_epi32(<4 x i64> %a0, <2 x i64> %a1) {
2905*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srl_epi32:
2906*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2907*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
2908*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2909*9880d681SAndroid Build Coastguard Worker;
2910*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srl_epi32:
2911*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2912*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
2913*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2914*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2915*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
2916*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %arg0, <4 x i32> %arg1)
2917*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2918*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2919*9880d681SAndroid Build Coastguard Worker}
2920*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
2921*9880d681SAndroid Build Coastguard Worker
2922*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srl_epi64(<4 x i64> %a0, <2 x i64> %a1) {
2923*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srl_epi64:
2924*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2925*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
2926*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2927*9880d681SAndroid Build Coastguard Worker;
2928*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srl_epi64:
2929*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2930*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
2931*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2932*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
2933*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2934*9880d681SAndroid Build Coastguard Worker}
2935*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
2936*9880d681SAndroid Build Coastguard Worker
2937*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srli_epi16(<4 x i64> %a0) {
2938*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srli_epi16:
2939*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2940*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlw $3, %ymm0, %ymm0
2941*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2942*9880d681SAndroid Build Coastguard Worker;
2943*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srli_epi16:
2944*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2945*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
2946*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2947*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
2948*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %arg0, i32 3)
2949*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
2950*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2951*9880d681SAndroid Build Coastguard Worker}
2952*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
2953*9880d681SAndroid Build Coastguard Worker
2954*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srli_epi32(<4 x i64> %a0) {
2955*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srli_epi32:
2956*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2957*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrld $3, %ymm0, %ymm0
2958*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2959*9880d681SAndroid Build Coastguard Worker;
2960*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srli_epi32:
2961*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2962*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrld $3, %ymm0, %ymm0
2963*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2964*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
2965*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %arg0, i32 3)
2966*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
2967*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
2968*9880d681SAndroid Build Coastguard Worker}
2969*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
2970*9880d681SAndroid Build Coastguard Worker
2971*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srli_epi64(<4 x i64> %a0) {
2972*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srli_epi64:
2973*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2974*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlq $3, %ymm0, %ymm0
2975*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2976*9880d681SAndroid Build Coastguard Worker;
2977*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srli_epi64:
2978*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2979*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlq $3, %ymm0, %ymm0
2980*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2981*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 3)
2982*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
2983*9880d681SAndroid Build Coastguard Worker}
2984*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
2985*9880d681SAndroid Build Coastguard Worker
2986*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srli_si256(<4 x i64> %a0) {
2987*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srli_si256:
2988*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
2989*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero
2990*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
2991*9880d681SAndroid Build Coastguard Worker;
2992*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srli_si256:
2993*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
2994*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero
2995*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
2996*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
2997*9880d681SAndroid Build Coastguard Worker  %shuf = shufflevector <32 x i8> %arg0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
2998*9880d681SAndroid Build Coastguard Worker  %res = bitcast <32 x i8> %shuf to <4 x i64>
2999*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3000*9880d681SAndroid Build Coastguard Worker}
3001*9880d681SAndroid Build Coastguard Worker
3002*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_srlv_epi32(<2 x i64> %a0, <2 x i64> %a1) {
3003*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_srlv_epi32:
3004*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3005*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
3006*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3007*9880d681SAndroid Build Coastguard Worker;
3008*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_srlv_epi32:
3009*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3010*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
3011*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3012*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
3013*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
3014*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %arg0, <4 x i32> %arg1)
3015*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <4 x i32> %res to <2 x i64>
3016*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %bc
3017*9880d681SAndroid Build Coastguard Worker}
3018*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
3019*9880d681SAndroid Build Coastguard Worker
3020*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srlv_epi32(<4 x i64> %a0, <4 x i64> %a1) {
3021*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srlv_epi32:
3022*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3023*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
3024*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3025*9880d681SAndroid Build Coastguard Worker;
3026*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srlv_epi32:
3027*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3028*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
3029*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3030*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
3031*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
3032*9880d681SAndroid Build Coastguard Worker  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %arg0, <8 x i32> %arg1)
3033*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
3034*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3035*9880d681SAndroid Build Coastguard Worker}
3036*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
3037*9880d681SAndroid Build Coastguard Worker
3038*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_srlv_epi64(<2 x i64> %a0, <2 x i64> %a1) {
3039*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_srlv_epi64:
3040*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3041*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
3042*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3043*9880d681SAndroid Build Coastguard Worker;
3044*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_srlv_epi64:
3045*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3046*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
3047*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3048*9880d681SAndroid Build Coastguard Worker  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
3049*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
3050*9880d681SAndroid Build Coastguard Worker}
3051*9880d681SAndroid Build Coastguard Workerdeclare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
3052*9880d681SAndroid Build Coastguard Worker
3053*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_srlv_epi64(<4 x i64> %a0, <4 x i64> %a1) {
3054*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_srlv_epi64:
3055*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3056*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
3057*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3058*9880d681SAndroid Build Coastguard Worker;
3059*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_srlv_epi64:
3060*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3061*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
3062*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3063*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
3064*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3065*9880d681SAndroid Build Coastguard Worker}
3066*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
3067*9880d681SAndroid Build Coastguard Worker
3068*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_stream_load_si256(<4 x i64> *%a0) {
3069*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_stream_load_si256:
3070*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3071*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
3072*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovntdqa (%eax), %ymm0
3073*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3074*9880d681SAndroid Build Coastguard Worker;
3075*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_stream_load_si256:
3076*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3077*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovntdqa (%rdi), %ymm0
3078*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3079*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> *%a0 to i8*
3080*9880d681SAndroid Build Coastguard Worker  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %arg0)
3081*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3082*9880d681SAndroid Build Coastguard Worker}
3083*9880d681SAndroid Build Coastguard Workerdeclare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
3084*9880d681SAndroid Build Coastguard Worker
3085*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sub_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3086*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sub_epi8:
3087*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3088*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
3089*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3090*9880d681SAndroid Build Coastguard Worker;
3091*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sub_epi8:
3092*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3093*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
3094*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3095*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
3096*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
3097*9880d681SAndroid Build Coastguard Worker  %res = sub <32 x i8> %arg0, %arg1
3098*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
3099*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3100*9880d681SAndroid Build Coastguard Worker}
3101*9880d681SAndroid Build Coastguard Worker
3102*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sub_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3103*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sub_epi16:
3104*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3105*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
3106*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3107*9880d681SAndroid Build Coastguard Worker;
3108*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sub_epi16:
3109*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3110*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
3111*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3112*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
3113*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
3114*9880d681SAndroid Build Coastguard Worker  %res = sub <16 x i16> %arg0, %arg1
3115*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
3116*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3117*9880d681SAndroid Build Coastguard Worker}
3118*9880d681SAndroid Build Coastguard Worker
3119*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sub_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3120*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sub_epi32:
3121*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3122*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
3123*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3124*9880d681SAndroid Build Coastguard Worker;
3125*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sub_epi32:
3126*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3127*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
3128*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3129*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
3130*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
3131*9880d681SAndroid Build Coastguard Worker  %res = sub <8 x i32> %arg0, %arg1
3132*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
3133*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3134*9880d681SAndroid Build Coastguard Worker}
3135*9880d681SAndroid Build Coastguard Worker
3136*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_sub_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3137*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_sub_epi64:
3138*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3139*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
3140*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3141*9880d681SAndroid Build Coastguard Worker;
3142*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_sub_epi64:
3143*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3144*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
3145*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3146*9880d681SAndroid Build Coastguard Worker  %res = sub <4 x i64> %a0, %a1
3147*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3148*9880d681SAndroid Build Coastguard Worker}
3149*9880d681SAndroid Build Coastguard Worker
3150*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_subs_epi8(<4 x i64> %a0, <4 x i64> %a1) {
3151*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_subs_epi8:
3152*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3153*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0
3154*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3155*9880d681SAndroid Build Coastguard Worker;
3156*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_subs_epi8:
3157*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3158*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0
3159*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3160*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
3161*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
3162*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %arg0, <32 x i8> %arg1)
3163*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
3164*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3165*9880d681SAndroid Build Coastguard Worker}
3166*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
3167*9880d681SAndroid Build Coastguard Worker
3168*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_subs_epi16(<4 x i64> %a0, <4 x i64> %a1) {
3169*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_subs_epi16:
3170*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3171*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0
3172*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3173*9880d681SAndroid Build Coastguard Worker;
3174*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_subs_epi16:
3175*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3176*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0
3177*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3178*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
3179*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
3180*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %arg0, <16 x i16> %arg1)
3181*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
3182*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3183*9880d681SAndroid Build Coastguard Worker}
3184*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
3185*9880d681SAndroid Build Coastguard Worker
3186*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_subs_epu8(<4 x i64> %a0, <4 x i64> %a1) {
3187*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_subs_epu8:
3188*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3189*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0
3190*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3191*9880d681SAndroid Build Coastguard Worker;
3192*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_subs_epu8:
3193*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3194*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0
3195*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3196*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
3197*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
3198*9880d681SAndroid Build Coastguard Worker  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %arg0, <32 x i8> %arg1)
3199*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
3200*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3201*9880d681SAndroid Build Coastguard Worker}
3202*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
3203*9880d681SAndroid Build Coastguard Worker
3204*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_subs_epu16(<4 x i64> %a0, <4 x i64> %a1) {
3205*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_subs_epu16:
3206*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3207*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0
3208*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3209*9880d681SAndroid Build Coastguard Worker;
3210*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_subs_epu16:
3211*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3212*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0
3213*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3214*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
3215*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
3216*9880d681SAndroid Build Coastguard Worker  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %arg0, <16 x i16> %arg1)
3217*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
3218*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3219*9880d681SAndroid Build Coastguard Worker}
3220*9880d681SAndroid Build Coastguard Workerdeclare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
3221*9880d681SAndroid Build Coastguard Worker
3222*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpackhi_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3223*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpackhi_epi8:
3224*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3225*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
3226*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3227*9880d681SAndroid Build Coastguard Worker;
3228*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpackhi_epi8:
3229*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3230*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
3231*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3232*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
3233*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
3234*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
3235*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
3236*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3237*9880d681SAndroid Build Coastguard Worker}
3238*9880d681SAndroid Build Coastguard Worker
3239*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpackhi_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3240*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpackhi_epi16:
3241*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3242*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
3243*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3244*9880d681SAndroid Build Coastguard Worker;
3245*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpackhi_epi16:
3246*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3247*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
3248*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3249*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
3250*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
3251*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
3252*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
3253*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3254*9880d681SAndroid Build Coastguard Worker}
3255*9880d681SAndroid Build Coastguard Worker
3256*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpackhi_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3257*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpackhi_epi32:
3258*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3259*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3260*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3261*9880d681SAndroid Build Coastguard Worker;
3262*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpackhi_epi32:
3263*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3264*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
3265*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3266*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
3267*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
3268*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
3269*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
3270*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3271*9880d681SAndroid Build Coastguard Worker}
3272*9880d681SAndroid Build Coastguard Worker
3273*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpackhi_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3274*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpackhi_epi64:
3275*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3276*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3277*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3278*9880d681SAndroid Build Coastguard Worker;
3279*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpackhi_epi64:
3280*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3281*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
3282*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3283*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
3284*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3285*9880d681SAndroid Build Coastguard Worker}
3286*9880d681SAndroid Build Coastguard Worker
3287*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpacklo_epi8(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3288*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpacklo_epi8:
3289*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3290*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
3291*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3292*9880d681SAndroid Build Coastguard Worker;
3293*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpacklo_epi8:
3294*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3295*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
3296*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3297*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
3298*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <32 x i8>
3299*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <32 x i8> %arg0, <32 x i8> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
3300*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <32 x i8> %res to <4 x i64>
3301*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3302*9880d681SAndroid Build Coastguard Worker}
3303*9880d681SAndroid Build Coastguard Worker
3304*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpacklo_epi16(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3305*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpacklo_epi16:
3306*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3307*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
3308*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3309*9880d681SAndroid Build Coastguard Worker;
3310*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpacklo_epi16:
3311*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3312*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
3313*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3314*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
3315*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <16 x i16>
3316*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <16 x i16> %arg0, <16 x i16> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
3317*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <16 x i16> %res to <4 x i64>
3318*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3319*9880d681SAndroid Build Coastguard Worker}
3320*9880d681SAndroid Build Coastguard Worker
3321*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpacklo_epi32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3322*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpacklo_epi32:
3323*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3324*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3325*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3326*9880d681SAndroid Build Coastguard Worker;
3327*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpacklo_epi32:
3328*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3329*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
3330*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3331*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
3332*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <4 x i64> %a1 to <8 x i32>
3333*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
3334*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <8 x i32> %res to <4 x i64>
3335*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %bc
3336*9880d681SAndroid Build Coastguard Worker}
3337*9880d681SAndroid Build Coastguard Worker
3338*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_unpacklo_epi64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3339*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_unpacklo_epi64:
3340*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3341*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3342*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3343*9880d681SAndroid Build Coastguard Worker;
3344*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_unpacklo_epi64:
3345*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3346*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
3347*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3348*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
3349*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3350*9880d681SAndroid Build Coastguard Worker}
3351*9880d681SAndroid Build Coastguard Worker
3352*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_xor_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
3353*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_xor_si256:
3354*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
3355*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vxorps %ymm1, %ymm0, %ymm0
3356*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
3357*9880d681SAndroid Build Coastguard Worker;
3358*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_xor_si256:
3359*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
3360*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vxorps %ymm1, %ymm0, %ymm0
3361*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
3362*9880d681SAndroid Build Coastguard Worker  %res = xor <4 x i64> %a0, %a1
3363*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
3364*9880d681SAndroid Build Coastguard Worker}
3365*9880d681SAndroid Build Coastguard Worker
3366*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
3367*9880d681SAndroid Build Coastguard Worker
3368*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
3369