xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X32
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vl-builtins.c
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) {
8*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastd_epi32:
9*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
10*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm0, %xmm0
11*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastd_epi32:
14*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
15*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm0, %xmm0
16*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
17*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
18*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer
19*9880d681SAndroid Build Coastguard Worker  %res1 = bitcast <4 x i32> %res0 to <2 x i64>
20*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res1
21*9880d681SAndroid Build Coastguard Worker}
22*9880d681SAndroid Build Coastguard Worker
23*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
24*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_broadcastd_epi32:
25*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
26*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
27*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp0:
28*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
29*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
30*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
31*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
32*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
33*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
34*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm1, %xmm0 {%k1}
35*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
36*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
37*9880d681SAndroid Build Coastguard Worker;
38*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_broadcastd_epi32:
39*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
40*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
41*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
42*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
43*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
44*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm1, %xmm0 {%k1}
45*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
46*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
47*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
48*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
49*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
50*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <4 x i32> zeroinitializer
51*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x i32> %res0, <4 x i32> %arg0
52*9880d681SAndroid Build Coastguard Worker  %res2 = bitcast <4 x i32> %res1 to <2 x i64>
53*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res2
54*9880d681SAndroid Build Coastguard Worker}
55*9880d681SAndroid Build Coastguard Worker
56*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
57*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_broadcastd_epi32:
58*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
59*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
60*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp1:
61*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
62*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
63*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
64*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
65*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
66*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
67*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z}
68*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
69*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
70*9880d681SAndroid Build Coastguard Worker;
71*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_broadcastd_epi32:
72*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
73*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
74*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
75*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
76*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
77*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z}
78*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
79*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
80*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
81*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
82*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <4 x i32> zeroinitializer
83*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x i32> %res0, <4 x i32> zeroinitializer
84*9880d681SAndroid Build Coastguard Worker  %res2 = bitcast <4 x i32> %res1 to <2 x i64>
85*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res2
86*9880d681SAndroid Build Coastguard Worker}
87*9880d681SAndroid Build Coastguard Worker
88*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastd_epi32(<2 x i64> %a0) {
89*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastd_epi32:
90*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
91*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm0, %ymm0
92*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
93*9880d681SAndroid Build Coastguard Worker;
94*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastd_epi32:
95*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
96*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm0, %ymm0
97*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
98*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
99*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <8 x i32> zeroinitializer
100*9880d681SAndroid Build Coastguard Worker  %res1 = bitcast <8 x i32> %res0 to <4 x i64>
101*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res1
102*9880d681SAndroid Build Coastguard Worker}
103*9880d681SAndroid Build Coastguard Worker
104*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
105*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_broadcastd_epi32:
106*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
107*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
108*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
109*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm1, %ymm0 {%k1}
110*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
111*9880d681SAndroid Build Coastguard Worker;
112*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_broadcastd_epi32:
113*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
114*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
115*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm1, %ymm0 {%k1}
116*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
117*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
118*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i8 %a1 to <8 x i1>
119*9880d681SAndroid Build Coastguard Worker  %arg2 = bitcast <2 x i64> %a2 to <4 x i32>
120*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <8 x i32> zeroinitializer
121*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg1, <8 x i32> %res0, <8 x i32> %arg0
122*9880d681SAndroid Build Coastguard Worker  %res2 = bitcast <8 x i32> %res1 to <4 x i64>
123*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res2
124*9880d681SAndroid Build Coastguard Worker}
125*9880d681SAndroid Build Coastguard Worker
126*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
127*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_broadcastd_epi32:
128*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
129*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
130*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
131*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z}
132*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
133*9880d681SAndroid Build Coastguard Worker;
134*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_broadcastd_epi32:
135*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
136*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
137*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z}
138*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
139*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i8 %a0 to <8 x i1>
140*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
141*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <8 x i32> zeroinitializer
142*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg0, <8 x i32> %res0, <8 x i32> zeroinitializer
143*9880d681SAndroid Build Coastguard Worker  %res2 = bitcast <8 x i32> %res1 to <4 x i64>
144*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res2
145*9880d681SAndroid Build Coastguard Worker}
146*9880d681SAndroid Build Coastguard Worker
147*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
148*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastq_epi64:
149*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
150*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm0, %xmm0
151*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
152*9880d681SAndroid Build Coastguard Worker;
153*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastq_epi64:
154*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
155*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
156*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
157*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
158*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res
159*9880d681SAndroid Build Coastguard Worker}
160*9880d681SAndroid Build Coastguard Worker
161*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
162*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_broadcastq_epi64:
163*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
164*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
165*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp2:
166*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
167*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
168*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
169*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
170*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
171*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
172*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1}
173*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
174*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
175*9880d681SAndroid Build Coastguard Worker;
176*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_broadcastq_epi64:
177*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
178*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
179*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
180*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
181*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
182*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1}
183*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
184*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i2
185*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i2 %trn1 to <2 x i1>
186*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <2 x i32> zeroinitializer
187*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg1, <2 x i64> %res0, <2 x i64> %a0
188*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res1
189*9880d681SAndroid Build Coastguard Worker}
190*9880d681SAndroid Build Coastguard Worker
191*9880d681SAndroid Build Coastguard Workerdefine <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
192*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_broadcastq_epi64:
193*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
194*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
195*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp3:
196*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
197*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
198*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
199*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
200*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
201*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
202*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z}
203*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
204*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
205*9880d681SAndroid Build Coastguard Worker;
206*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_broadcastq_epi64:
207*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
208*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
209*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
210*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
211*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
212*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z}
213*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
214*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i2
215*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i2 %trn0 to <2 x i1>
216*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> zeroinitializer
217*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg0, <2 x i64> %res0, <2 x i64> zeroinitializer
218*9880d681SAndroid Build Coastguard Worker  ret <2 x i64> %res1
219*9880d681SAndroid Build Coastguard Worker}
220*9880d681SAndroid Build Coastguard Worker
221*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) {
222*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastq_epi64:
223*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
224*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm0, %ymm0
225*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
226*9880d681SAndroid Build Coastguard Worker;
227*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastq_epi64:
228*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
229*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm0, %ymm0
230*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
231*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
232*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
233*9880d681SAndroid Build Coastguard Worker}
234*9880d681SAndroid Build Coastguard Worker
235*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
236*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_broadcastq_epi64:
237*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
238*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
239*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp4:
240*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
241*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
242*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
243*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
244*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
245*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
246*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1}
247*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
248*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
249*9880d681SAndroid Build Coastguard Worker;
250*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_broadcastq_epi64:
251*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
252*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
253*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
254*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
255*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
256*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1}
257*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
258*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
259*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
260*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> zeroinitializer
261*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0
262*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res1
263*9880d681SAndroid Build Coastguard Worker}
264*9880d681SAndroid Build Coastguard Worker
265*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
266*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_broadcastq_epi64:
267*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
268*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
269*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp5:
270*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
271*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
272*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
273*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
274*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
275*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
276*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z}
277*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
278*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
279*9880d681SAndroid Build Coastguard Worker;
280*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_broadcastq_epi64:
281*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
282*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
283*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
284*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
285*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
286*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z}
287*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
288*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
289*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
290*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <4 x i32> zeroinitializer
291*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x i64> %res0, <4 x i64> zeroinitializer
292*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res1
293*9880d681SAndroid Build Coastguard Worker}
294*9880d681SAndroid Build Coastguard Worker
295*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_broadcastsd_pd(<2 x double> %a0) {
296*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastsd_pd:
297*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
298*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
299*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
300*9880d681SAndroid Build Coastguard Worker;
301*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastsd_pd:
302*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
303*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
304*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
305*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
306*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
307*9880d681SAndroid Build Coastguard Worker}
308*9880d681SAndroid Build Coastguard Worker
309*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
310*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_broadcastsd_pd:
311*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
312*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
313*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp6:
314*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
315*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
316*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
317*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
318*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
319*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
320*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
321*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
322*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
323*9880d681SAndroid Build Coastguard Worker;
324*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_broadcastsd_pd:
325*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
326*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
327*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
328*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
329*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
330*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
331*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
332*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i2
333*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i2 %trn1 to <2 x i1>
334*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer
335*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0
336*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
337*9880d681SAndroid Build Coastguard Worker}
338*9880d681SAndroid Build Coastguard Worker
339*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
340*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_broadcastsd_pd:
341*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
342*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
343*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp7:
344*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
345*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
346*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
347*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
348*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
349*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
350*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
351*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
352*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
353*9880d681SAndroid Build Coastguard Worker;
354*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_broadcastsd_pd:
355*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
356*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
357*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
358*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
359*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
360*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
361*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
362*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i2
363*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i2 %trn0 to <2 x i1>
364*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
365*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg0, <2 x double> %res0, <2 x double> zeroinitializer
366*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
367*9880d681SAndroid Build Coastguard Worker}
368*9880d681SAndroid Build Coastguard Worker
369*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_broadcastsd_pd(<2 x double> %a0) {
370*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastsd_pd:
371*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
372*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
373*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
374*9880d681SAndroid Build Coastguard Worker;
375*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastsd_pd:
376*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
377*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
378*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
379*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
380*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
381*9880d681SAndroid Build Coastguard Worker}
382*9880d681SAndroid Build Coastguard Worker
383*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2 x double> %a2) {
384*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_broadcastsd_pd:
385*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
386*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
387*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp8:
388*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
389*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
390*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
391*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
392*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
393*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
394*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
395*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
396*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
397*9880d681SAndroid Build Coastguard Worker;
398*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_broadcastsd_pd:
399*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
400*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
401*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
402*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
403*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
404*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastsd %xmm1, %ymm0 {%k1}
405*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
406*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
407*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
408*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <4 x i32> zeroinitializer
409*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
410*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
411*9880d681SAndroid Build Coastguard Worker}
412*9880d681SAndroid Build Coastguard Worker
413*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
414*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_broadcastsd_pd:
415*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
416*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
417*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp9:
418*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
419*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
420*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
421*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
422*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
423*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
424*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
425*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
426*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
427*9880d681SAndroid Build Coastguard Worker;
428*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_broadcastsd_pd:
429*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
430*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
431*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
432*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
433*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
434*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
435*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
436*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
437*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
438*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <4 x i32> zeroinitializer
439*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
440*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
441*9880d681SAndroid Build Coastguard Worker}
442*9880d681SAndroid Build Coastguard Worker
443*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) {
444*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_broadcastss_ps:
445*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
446*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %xmm0
447*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
448*9880d681SAndroid Build Coastguard Worker;
449*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_broadcastss_ps:
450*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
451*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %xmm0
452*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
453*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
454*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
455*9880d681SAndroid Build Coastguard Worker}
456*9880d681SAndroid Build Coastguard Worker
457*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
458*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_broadcastss_ps:
459*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
460*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
461*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp10:
462*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
463*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
464*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
465*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
466*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
467*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
468*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
469*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
470*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
471*9880d681SAndroid Build Coastguard Worker;
472*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_broadcastss_ps:
473*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
474*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
475*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
476*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
477*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
478*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm1, %xmm0 {%k1}
479*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
480*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
481*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
482*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> zeroinitializer
483*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
484*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
485*9880d681SAndroid Build Coastguard Worker}
486*9880d681SAndroid Build Coastguard Worker
487*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
488*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_broadcastss_ps:
489*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
490*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
491*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp11:
492*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
493*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
494*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
495*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
496*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
497*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
498*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
499*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
500*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
501*9880d681SAndroid Build Coastguard Worker;
502*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_broadcastss_ps:
503*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
504*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
505*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
506*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
507*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
508*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
509*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
510*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
511*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
512*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer
513*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
514*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
515*9880d681SAndroid Build Coastguard Worker}
516*9880d681SAndroid Build Coastguard Worker
517*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_broadcastss_ps(<4 x float> %a0) {
518*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_broadcastss_ps:
519*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
520*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %ymm0
521*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
522*9880d681SAndroid Build Coastguard Worker;
523*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_broadcastss_ps:
524*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
525*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %ymm0
526*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
527*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
528*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
529*9880d681SAndroid Build Coastguard Worker}
530*9880d681SAndroid Build Coastguard Worker
531*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x float> %a2) {
532*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_broadcastss_ps:
533*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
534*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
535*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
536*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm1, %ymm0 {%k1}
537*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
538*9880d681SAndroid Build Coastguard Worker;
539*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_broadcastss_ps:
540*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
541*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
542*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm1, %ymm0 {%k1}
543*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
544*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i8 %a1 to <8 x i1>
545*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <8 x i32> zeroinitializer
546*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
547*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
548*9880d681SAndroid Build Coastguard Worker}
549*9880d681SAndroid Build Coastguard Worker
550*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
551*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_broadcastss_ps:
552*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
553*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
554*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
555*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
556*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
557*9880d681SAndroid Build Coastguard Worker;
558*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_broadcastss_ps:
559*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
560*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
561*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
562*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
563*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i8 %a0 to <8 x i1>
564*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> zeroinitializer
565*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
566*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
567*9880d681SAndroid Build Coastguard Worker}
568*9880d681SAndroid Build Coastguard Worker
569*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_movddup_pd(<2 x double> %a0) {
570*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_movddup_pd:
571*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
572*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
573*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
574*9880d681SAndroid Build Coastguard Worker;
575*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_movddup_pd:
576*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
577*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
578*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
579*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
580*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
581*9880d681SAndroid Build Coastguard Worker}
582*9880d681SAndroid Build Coastguard Worker
583*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
584*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_movddup_pd:
585*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
586*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
587*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp12:
588*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
589*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
590*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
591*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
592*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
593*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
594*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
595*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
596*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
597*9880d681SAndroid Build Coastguard Worker;
598*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_movddup_pd:
599*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
600*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
601*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
602*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
603*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
604*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
605*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
606*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i2
607*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i2 %trn1 to <2 x i1>
608*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a2, <2 x double> undef, <2 x i32> zeroinitializer
609*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0
610*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
611*9880d681SAndroid Build Coastguard Worker}
612*9880d681SAndroid Build Coastguard Worker
613*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
614*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_movddup_pd:
615*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
616*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
617*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp13:
618*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
619*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
620*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
621*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
622*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
623*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
624*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
625*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
626*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
627*9880d681SAndroid Build Coastguard Worker;
628*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_movddup_pd:
629*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
630*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
631*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
632*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
633*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
634*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
635*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
636*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i2
637*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i2 %trn1 to <2 x i1>
638*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer
639*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg0, <2 x double> %res0, <2 x double> zeroinitializer
640*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
641*9880d681SAndroid Build Coastguard Worker}
642*9880d681SAndroid Build Coastguard Worker
643*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_movddup_pd(<4 x double> %a0) {
644*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_movddup_pd:
645*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
646*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
647*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
648*9880d681SAndroid Build Coastguard Worker;
649*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_movddup_pd:
650*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
651*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
652*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
653*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
654*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
655*9880d681SAndroid Build Coastguard Worker}
656*9880d681SAndroid Build Coastguard Worker
657*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
658*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_movddup_pd:
659*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
660*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
661*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp14:
662*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
663*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
664*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
665*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
666*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
667*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
668*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
669*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
670*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
671*9880d681SAndroid Build Coastguard Worker;
672*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_movddup_pd:
673*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
674*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
675*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
676*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
677*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
678*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
679*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
680*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
681*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
682*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
683*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
684*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
685*9880d681SAndroid Build Coastguard Worker}
686*9880d681SAndroid Build Coastguard Worker
687*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
688*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_movddup_pd:
689*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
690*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
691*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp15:
692*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
693*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
694*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
695*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
696*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
697*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
698*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
699*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
700*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
701*9880d681SAndroid Build Coastguard Worker;
702*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_movddup_pd:
703*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
704*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
705*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
706*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
707*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
708*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
709*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
710*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i4
711*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn1 to <4 x i1>
712*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
713*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
714*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
715*9880d681SAndroid Build Coastguard Worker}
716*9880d681SAndroid Build Coastguard Worker
717*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
718*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_movehdup_ps:
719*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
720*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
721*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
722*9880d681SAndroid Build Coastguard Worker;
723*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_movehdup_ps:
724*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
725*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
726*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
727*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
728*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
729*9880d681SAndroid Build Coastguard Worker}
730*9880d681SAndroid Build Coastguard Worker
731*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
732*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_movehdup_ps:
733*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
734*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
735*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp16:
736*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
737*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
738*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
739*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
740*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
741*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
742*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
743*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
744*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
745*9880d681SAndroid Build Coastguard Worker;
746*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_movehdup_ps:
747*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
748*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
749*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
750*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
751*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
752*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
753*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
754*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
755*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
756*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
757*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
758*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
759*9880d681SAndroid Build Coastguard Worker}
760*9880d681SAndroid Build Coastguard Worker
761*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
762*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_movehdup_ps:
763*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
764*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
765*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp17:
766*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
767*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
768*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
769*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
770*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
771*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
772*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
773*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
774*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
775*9880d681SAndroid Build Coastguard Worker;
776*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_movehdup_ps:
777*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
778*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
779*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
780*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
781*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
782*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
783*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
784*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
785*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
786*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
787*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
788*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
789*9880d681SAndroid Build Coastguard Worker}
790*9880d681SAndroid Build Coastguard Worker
791*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) {
792*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_movehdup_ps:
793*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
794*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
795*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
796*9880d681SAndroid Build Coastguard Worker;
797*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_movehdup_ps:
798*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
799*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
800*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
801*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
802*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
803*9880d681SAndroid Build Coastguard Worker}
804*9880d681SAndroid Build Coastguard Worker
805*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
806*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_movehdup_ps:
807*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
808*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
809*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
810*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
811*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
812*9880d681SAndroid Build Coastguard Worker;
813*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_movehdup_ps:
814*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
815*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
816*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7]
817*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
818*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i8 %a1 to <8 x i1>
819*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
820*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
821*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
822*9880d681SAndroid Build Coastguard Worker}
823*9880d681SAndroid Build Coastguard Worker
824*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_maskz_movehdup_ps(i8 %a0, <8 x float> %a1) {
825*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_movehdup_ps:
826*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
827*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
828*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
829*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
830*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
831*9880d681SAndroid Build Coastguard Worker;
832*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_movehdup_ps:
833*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
834*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
835*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
836*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
837*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i8 %a0 to <8 x i1>
838*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
839*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
840*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
841*9880d681SAndroid Build Coastguard Worker}
842*9880d681SAndroid Build Coastguard Worker
843*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
844*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_moveldup_ps:
845*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
846*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
847*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
848*9880d681SAndroid Build Coastguard Worker;
849*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_moveldup_ps:
850*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
851*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
852*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
853*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
854*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
855*9880d681SAndroid Build Coastguard Worker}
856*9880d681SAndroid Build Coastguard Worker
857*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
858*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_moveldup_ps:
859*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
860*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
861*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp18:
862*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
863*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
864*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
865*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
866*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
867*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
868*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
869*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
870*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
871*9880d681SAndroid Build Coastguard Worker;
872*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_moveldup_ps:
873*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
874*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
875*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
876*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
877*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
878*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
879*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
880*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
881*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
882*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
883*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
884*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
885*9880d681SAndroid Build Coastguard Worker}
886*9880d681SAndroid Build Coastguard Worker
887*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
888*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_moveldup_ps:
889*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
890*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
891*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp19:
892*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
893*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
894*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
895*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
896*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
897*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
898*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
899*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
900*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
901*9880d681SAndroid Build Coastguard Worker;
902*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_moveldup_ps:
903*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
904*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
905*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
906*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
907*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
908*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
909*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
910*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
911*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
912*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
913*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
914*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
915*9880d681SAndroid Build Coastguard Worker}
916*9880d681SAndroid Build Coastguard Worker
917*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) {
918*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_moveldup_ps:
919*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
920*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
921*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
922*9880d681SAndroid Build Coastguard Worker;
923*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_moveldup_ps:
924*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
925*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
926*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
927*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
928*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
929*9880d681SAndroid Build Coastguard Worker}
930*9880d681SAndroid Build Coastguard Worker
931*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) {
932*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_moveldup_ps:
933*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
934*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
935*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
936*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
937*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
938*9880d681SAndroid Build Coastguard Worker;
939*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_moveldup_ps:
940*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
941*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
942*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6]
943*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
944*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i8 %a1 to <8 x i1>
945*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a2, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
946*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
947*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
948*9880d681SAndroid Build Coastguard Worker}
949*9880d681SAndroid Build Coastguard Worker
950*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_maskz_moveldup_ps(i8 %a0, <8 x float> %a1) {
951*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_moveldup_ps:
952*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
953*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
954*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
955*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
956*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
957*9880d681SAndroid Build Coastguard Worker;
958*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_moveldup_ps:
959*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
960*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
961*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
962*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
963*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i8 %a0 to <8 x i1>
964*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
965*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
966*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
967*9880d681SAndroid Build Coastguard Worker}
968*9880d681SAndroid Build Coastguard Worker
969*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_permutex_epi64(<4 x i64> %a0) {
970*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permutex_epi64:
971*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
972*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
973*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
974*9880d681SAndroid Build Coastguard Worker;
975*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permutex_epi64:
976*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
977*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
978*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
979*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
980*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res
981*9880d681SAndroid Build Coastguard Worker}
982*9880d681SAndroid Build Coastguard Worker
983*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) {
984*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_permutex_epi64:
985*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
986*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
987*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp20:
988*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
989*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
990*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
991*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
992*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
993*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
994*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
995*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
996*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
997*9880d681SAndroid Build Coastguard Worker;
998*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_permutex_epi64:
999*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1000*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1001*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1002*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1003*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1004*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1005*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1006*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
1007*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
1008*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1009*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x i64> %res0, <4 x i64> %a0
1010*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res1
1011*9880d681SAndroid Build Coastguard Worker}
1012*9880d681SAndroid Build Coastguard Worker
1013*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
1014*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_permutex_epi64:
1015*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1016*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1017*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp21:
1018*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1019*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1020*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1021*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1022*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1023*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1024*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1025*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1026*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1027*9880d681SAndroid Build Coastguard Worker;
1028*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_permutex_epi64:
1029*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1030*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1031*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1032*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1033*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1034*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1035*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1036*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i4
1037*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn1 to <4 x i1>
1038*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1039*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x i64> %res0, <4 x i64> zeroinitializer
1040*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %res1
1041*9880d681SAndroid Build Coastguard Worker}
1042*9880d681SAndroid Build Coastguard Worker
1043*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_permutex_pd(<4 x double> %a0) {
1044*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_permutex_pd:
1045*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1046*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
1047*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1048*9880d681SAndroid Build Coastguard Worker;
1049*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_permutex_pd:
1050*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1051*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
1052*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1053*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1054*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
1055*9880d681SAndroid Build Coastguard Worker}
1056*9880d681SAndroid Build Coastguard Worker
1057*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
1058*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_permutex_pd:
1059*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1060*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1061*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp22:
1062*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1063*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1064*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1065*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1066*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1067*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1068*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1069*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1070*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1071*9880d681SAndroid Build Coastguard Worker;
1072*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_permutex_pd:
1073*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1074*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1075*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1076*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1077*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1078*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
1079*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1080*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
1081*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
1082*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1083*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
1084*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
1085*9880d681SAndroid Build Coastguard Worker}
1086*9880d681SAndroid Build Coastguard Worker
1087*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
1088*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_permutex_pd:
1089*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1090*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1091*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp23:
1092*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1093*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1094*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1095*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1096*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1097*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1098*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1099*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1100*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1101*9880d681SAndroid Build Coastguard Worker;
1102*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_permutex_pd:
1103*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1104*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1105*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1106*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1107*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1108*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
1109*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1110*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i4
1111*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn1 to <4 x i1>
1112*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1113*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
1114*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
1115*9880d681SAndroid Build Coastguard Worker}
1116*9880d681SAndroid Build Coastguard Worker
1117*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
1118*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_shuffle_pd:
1119*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1120*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1121*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1122*9880d681SAndroid Build Coastguard Worker;
1123*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_shuffle_pd:
1124*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1125*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
1126*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1127*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
1128*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res
1129*9880d681SAndroid Build Coastguard Worker}
1130*9880d681SAndroid Build Coastguard Worker
1131*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2, <2 x double> %a3) {
1132*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_shuffle_pd:
1133*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1134*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1135*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp24:
1136*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1137*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1138*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
1139*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
1140*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1141*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1142*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
1143*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1144*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1145*9880d681SAndroid Build Coastguard Worker;
1146*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_shuffle_pd:
1147*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1148*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
1149*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1150*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1151*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1152*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
1153*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1154*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i2
1155*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i2 %trn1 to <2 x i1>
1156*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a2, <2 x double> %a3, <2 x i32> <i32 1, i32 3>
1157*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg1, <2 x double> %res0, <2 x double> %a0
1158*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
1159*9880d681SAndroid Build Coastguard Worker}
1160*9880d681SAndroid Build Coastguard Worker
1161*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
1162*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_shuffle_pd:
1163*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1164*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1165*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp25:
1166*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1167*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1168*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $3, %al
1169*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, {{[0-9]+}}(%esp)
1170*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
1171*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1172*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
1173*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1174*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1175*9880d681SAndroid Build Coastguard Worker;
1176*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_shuffle_pd:
1177*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1178*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $3, %dil
1179*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1180*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1181*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1182*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
1183*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1184*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i2
1185*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i2 %trn1 to <2 x i1>
1186*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <2 x double> %a1, <2 x double> %a2, <2 x i32> <i32 1, i32 3>
1187*9880d681SAndroid Build Coastguard Worker  %res1 = select <2 x i1> %arg0, <2 x double> %res0, <2 x double> zeroinitializer
1188*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %res1
1189*9880d681SAndroid Build Coastguard Worker}
1190*9880d681SAndroid Build Coastguard Worker
1191*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) {
1192*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shuffle_pd:
1193*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1194*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1195*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1196*9880d681SAndroid Build Coastguard Worker;
1197*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shuffle_pd:
1198*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1199*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1200*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1201*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
1202*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res
1203*9880d681SAndroid Build Coastguard Worker}
1204*9880d681SAndroid Build Coastguard Worker
1205*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2, <4 x double> %a3) {
1206*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_shuffle_pd:
1207*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1208*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1209*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp26:
1210*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1211*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1212*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1213*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1214*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1215*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1216*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
1217*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1218*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1219*9880d681SAndroid Build Coastguard Worker;
1220*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_shuffle_pd:
1221*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1222*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1223*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1224*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1225*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1226*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
1227*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1228*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
1229*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
1230*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a2, <4 x double> %a3, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
1231*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x double> %res0, <4 x double> %a0
1232*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
1233*9880d681SAndroid Build Coastguard Worker}
1234*9880d681SAndroid Build Coastguard Worker
1235*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
1236*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_shuffle_pd:
1237*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1238*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1239*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp27:
1240*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1241*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1242*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1243*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1244*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1245*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1246*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1247*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1248*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1249*9880d681SAndroid Build Coastguard Worker;
1250*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_shuffle_pd:
1251*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1252*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1253*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1254*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1255*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1256*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
1257*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1258*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a0 to i4
1259*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn1 to <4 x i1>
1260*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x double> %a1, <4 x double> %a2, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
1261*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x double> %res0, <4 x double> zeroinitializer
1262*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %res1
1263*9880d681SAndroid Build Coastguard Worker}
1264*9880d681SAndroid Build Coastguard Worker
1265*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) {
1266*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_shuffle_ps:
1267*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1268*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1269*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1270*9880d681SAndroid Build Coastguard Worker;
1271*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_shuffle_ps:
1272*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1273*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
1274*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1275*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
1276*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res
1277*9880d681SAndroid Build Coastguard Worker}
1278*9880d681SAndroid Build Coastguard Worker
1279*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2, <4 x float> %a3) {
1280*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_mask_shuffle_ps:
1281*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1282*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1283*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp28:
1284*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1285*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1286*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1287*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1288*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1289*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1290*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
1291*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1292*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1293*9880d681SAndroid Build Coastguard Worker;
1294*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_mask_shuffle_ps:
1295*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1296*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1297*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1298*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1299*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1300*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
1301*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1302*9880d681SAndroid Build Coastguard Worker  %trn1 = trunc i8 %a1 to i4
1303*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i4 %trn1 to <4 x i1>
1304*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a2, <4 x float> %a3, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
1305*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg1, <4 x float> %res0, <4 x float> %a0
1306*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
1307*9880d681SAndroid Build Coastguard Worker}
1308*9880d681SAndroid Build Coastguard Worker
1309*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
1310*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm_maskz_shuffle_ps:
1311*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1312*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    pushl %eax
1313*9880d681SAndroid Build Coastguard Worker; X32-NEXT:  .Ltmp29:
1314*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    .cfi_def_cfa_offset 8
1315*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1316*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    andb $15, %al
1317*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb %al, (%esp)
1318*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movzbl (%esp), %eax
1319*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1320*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
1321*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    popl %eax
1322*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1323*9880d681SAndroid Build Coastguard Worker;
1324*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm_maskz_shuffle_ps:
1325*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1326*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    andb $15, %dil
1327*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
1328*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
1329*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %eax, %k1
1330*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
1331*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1332*9880d681SAndroid Build Coastguard Worker  %trn0 = trunc i8 %a0 to i4
1333*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i4 %trn0 to <4 x i1>
1334*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <4 x float> %a1, <4 x float> %a2, <4 x i32> <i32 0, i32 1, i32 4, i32 4>
1335*9880d681SAndroid Build Coastguard Worker  %res1 = select <4 x i1> %arg0, <4 x float> %res0, <4 x float> zeroinitializer
1336*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %res1
1337*9880d681SAndroid Build Coastguard Worker}
1338*9880d681SAndroid Build Coastguard Worker
1339*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_shuffle_ps(<8 x float> %a0, <8 x float> %a1) {
1340*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_shuffle_ps:
1341*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1342*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1343*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1344*9880d681SAndroid Build Coastguard Worker;
1345*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_shuffle_ps:
1346*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1347*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1348*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1349*9880d681SAndroid Build Coastguard Worker  %res = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
1350*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res
1351*9880d681SAndroid Build Coastguard Worker}
1352*9880d681SAndroid Build Coastguard Worker
1353*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2, <8 x float> %a3) {
1354*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_mask_shuffle_ps:
1355*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1356*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1357*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1358*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
1359*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1360*9880d681SAndroid Build Coastguard Worker;
1361*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_mask_shuffle_ps:
1362*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1363*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
1364*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4]
1365*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1366*9880d681SAndroid Build Coastguard Worker  %arg1 = bitcast i8 %a1 to <8 x i1>
1367*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a2, <8 x float> %a3, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
1368*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg1, <8 x float> %res0, <8 x float> %a0
1369*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
1370*9880d681SAndroid Build Coastguard Worker}
1371*9880d681SAndroid Build Coastguard Worker
1372*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
1373*9880d681SAndroid Build Coastguard Worker; X32-LABEL: test_mm256_maskz_shuffle_ps:
1374*9880d681SAndroid Build Coastguard Worker; X32:       # BB#0:
1375*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    movb {{[0-9]+}}(%esp), %al
1376*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    kmovw %eax, %k1
1377*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1378*9880d681SAndroid Build Coastguard Worker; X32-NEXT:    retl
1379*9880d681SAndroid Build Coastguard Worker;
1380*9880d681SAndroid Build Coastguard Worker; X64-LABEL: test_mm256_maskz_shuffle_ps:
1381*9880d681SAndroid Build Coastguard Worker; X64:       # BB#0:
1382*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    kmovw %edi, %k1
1383*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4]
1384*9880d681SAndroid Build Coastguard Worker; X64-NEXT:    retq
1385*9880d681SAndroid Build Coastguard Worker  %arg0 = bitcast i8 %a0 to <8 x i1>
1386*9880d681SAndroid Build Coastguard Worker  %res0 = shufflevector <8 x float> %a1, <8 x float> %a2, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
1387*9880d681SAndroid Build Coastguard Worker  %res1 = select <8 x i1> %arg0, <8 x float> %res0, <8 x float> zeroinitializer
1388*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %res1
1389*9880d681SAndroid Build Coastguard Worker}
1390*9880d681SAndroid Build Coastguard Worker
1391*9880d681SAndroid Build Coastguard Worker!0 = !{i32 1}
1392