xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/masked_memop.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx < %s | FileCheck %s --check-prefix=AVX  --check-prefix=AVX1
3; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
4; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F
5; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f,avx512bw,avx512vl < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=SKX
6
7; To test for the case where masked load/store is not legal, we should add a run with a target
8; that does not have AVX, but that case should probably be a separate test file using less tests
9; because it takes over 1.2 seconds to codegen these tests on Haswell 4GHz if there's no maskmov.
10
11define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
12; AVX1-LABEL: test1:
13; AVX1:       ## BB#0:
14; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
15; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
16; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
17; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
18; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
19; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
20; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
21; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
22; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
23; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm1
24; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
25; AVX1-NEXT:    retq
26;
27; AVX2-LABEL: test1:
28; AVX2:       ## BB#0:
29; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
30; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
31; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
32; AVX2-NEXT:    vpmaskmovd 32(%rdi), %ymm1, %ymm1
33; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
34; AVX2-NEXT:    retq
35;
36; AVX512-LABEL: test1:
37; AVX512:       ## BB#0:
38; AVX512-NEXT:    vpxord %zmm1, %zmm1, %zmm1
39; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
40; AVX512-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
41; AVX512-NEXT:    retq
42  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
43  %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef)
44  ret <16 x i32> %res
45}
46
47define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
48; AVX1-LABEL: test2:
49; AVX1:       ## BB#0:
50; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
51; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
52; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
53; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
54; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
55; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
56; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
57; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
58; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
59; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm1
60; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
61; AVX1-NEXT:    retq
62;
63; AVX2-LABEL: test2:
64; AVX2:       ## BB#0:
65; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
66; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
67; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
68; AVX2-NEXT:    vpmaskmovd 32(%rdi), %ymm1, %ymm1
69; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
70; AVX2-NEXT:    retq
71;
72; AVX512-LABEL: test2:
73; AVX512:       ## BB#0:
74; AVX512-NEXT:    vpxord %zmm1, %zmm1, %zmm1
75; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
76; AVX512-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
77; AVX512-NEXT:    retq
78  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
79  %res = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer)
80  ret <16 x i32> %res
81}
82
83define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
84; AVX1-LABEL: test3:
85; AVX1:       ## BB#0:
86; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
87; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
88; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
89; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
90; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
91; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
92; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
93; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
94; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
95; AVX1-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
96; AVX1-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
97; AVX1-NEXT:    vzeroupper
98; AVX1-NEXT:    retq
99;
100; AVX2-LABEL: test3:
101; AVX2:       ## BB#0:
102; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
103; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
104; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
105; AVX2-NEXT:    vpmaskmovd %ymm3, %ymm1, 32(%rdi)
106; AVX2-NEXT:    vpmaskmovd %ymm2, %ymm0, (%rdi)
107; AVX2-NEXT:    vzeroupper
108; AVX2-NEXT:    retq
109;
110; AVX512-LABEL: test3:
111; AVX512:       ## BB#0:
112; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
113; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
114; AVX512-NEXT:    vmovdqu32 %zmm1, (%rdi) {%k1}
115; AVX512-NEXT:    retq
116  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
117  call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask)
118  ret void
119}
120
121define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) {
122; AVX1-LABEL: test4:
123; AVX1:       ## BB#0:
124; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
125; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
126; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
127; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
128; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
129; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
130; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
131; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
132; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
133; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm4
134; AVX1-NEXT:    vblendvps %ymm0, %ymm4, %ymm2, %ymm0
135; AVX1-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm2
136; AVX1-NEXT:    vblendvps %ymm1, %ymm2, %ymm3, %ymm1
137; AVX1-NEXT:    retq
138;
139; AVX2-LABEL: test4:
140; AVX2:       ## BB#0:
141; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
142; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
143; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
144; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm4
145; AVX2-NEXT:    vblendvps %ymm0, %ymm4, %ymm2, %ymm0
146; AVX2-NEXT:    vmaskmovps 32(%rdi), %ymm1, %ymm2
147; AVX2-NEXT:    vblendvps %ymm1, %ymm2, %ymm3, %ymm1
148; AVX2-NEXT:    retq
149;
150; AVX512-LABEL: test4:
151; AVX512:       ## BB#0:
152; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
153; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
154; AVX512-NEXT:    vmovups (%rdi), %zmm1 {%k1}
155; AVX512-NEXT:    vmovaps %zmm1, %zmm0
156; AVX512-NEXT:    retq
157  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
158  %res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
159  ret <16 x float> %res
160}
161
162define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) {
163; AVX1-LABEL: test5:
164; AVX1:       ## BB#0:
165; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
166; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
167; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm3, %xmm3
168; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm5
169; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
170; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
171; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm5, %ymm3
172; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm0
173; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm4
174; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
175; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
176; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
177; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
178; AVX1-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
179; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm3, %ymm1
180; AVX1-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
181; AVX1-NEXT:    retq
182;
183; AVX2-LABEL: test5:
184; AVX2:       ## BB#0:
185; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
186; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
187; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm3, %xmm3
188; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
189; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm0, %xmm0
190; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
191; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm4
192; AVX2-NEXT:    vblendvpd %ymm0, %ymm4, %ymm1, %ymm0
193; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm3, %ymm1
194; AVX2-NEXT:    vblendvpd %ymm3, %ymm1, %ymm2, %ymm1
195; AVX2-NEXT:    retq
196;
197; AVX512F-LABEL: test5:
198; AVX512F:       ## BB#0:
199; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
200; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
201; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
202; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
203; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
204; AVX512F-NEXT:    retq
205;
206; SKX-LABEL: test5:
207; SKX:       ## BB#0:
208; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
209; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
210; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
211; SKX-NEXT:    vmovaps %zmm1, %zmm0
212; SKX-NEXT:    retq
213  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
214  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
215  ret <8 x double> %res
216}
217
218define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
219; AVX-LABEL: test6:
220; AVX:       ## BB#0:
221; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
222; AVX-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
223; AVX-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2
224; AVX-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
225; AVX-NEXT:    retq
226;
227; AVX512F-LABEL: test6:
228; AVX512F:       ## BB#0:
229; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
230; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
231; AVX512F-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2
232; AVX512F-NEXT:    vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
233; AVX512F-NEXT:    retq
234;
235; SKX-LABEL: test6:
236; SKX:       ## BB#0:
237; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
238; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k1
239; SKX-NEXT:    vmovupd (%rdi), %xmm1 {%k1}
240; SKX-NEXT:    vmovaps %zmm1, %zmm0
241; SKX-NEXT:    retq
242  %mask = icmp eq <2 x i64> %trigger, zeroinitializer
243  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
244  ret <2 x double> %res
245}
246
247define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %dst) {
248; AVX-LABEL: test7:
249; AVX:       ## BB#0:
250; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
251; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
252; AVX-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
253; AVX-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
254; AVX-NEXT:    retq
255;
256; AVX512F-LABEL: test7:
257; AVX512F:       ## BB#0:
258; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
259; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
260; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
261; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
262; AVX512F-NEXT:    retq
263;
264; SKX-LABEL: test7:
265; SKX:       ## BB#0:
266; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
267; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
268; SKX-NEXT:    vmovups (%rdi), %xmm1 {%k1}
269; SKX-NEXT:    vmovaps %zmm1, %zmm0
270; SKX-NEXT:    retq
271  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
272  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
273  ret <4 x float> %res
274}
275
276define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
277; AVX1-LABEL: test8:
278; AVX1:       ## BB#0:
279; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
280; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
281; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
282; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
283; AVX1-NEXT:    retq
284;
285; AVX2-LABEL: test8:
286; AVX2:       ## BB#0:
287; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
288; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
289; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
290; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
291; AVX2-NEXT:    retq
292;
293; AVX512F-LABEL: test8:
294; AVX512F:       ## BB#0:
295; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
296; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
297; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
298; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
299; AVX512F-NEXT:    retq
300;
301; SKX-LABEL: test8:
302; SKX:       ## BB#0:
303; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
304; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
305; SKX-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1}
306; SKX-NEXT:    vmovaps %zmm1, %zmm0
307; SKX-NEXT:    retq
308  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
309  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
310  ret <4 x i32> %res
311}
312
313define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
314; AVX1-LABEL: test9:
315; AVX1:       ## BB#0:
316; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
317; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
318; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
319; AVX1-NEXT:    retq
320;
321; AVX2-LABEL: test9:
322; AVX2:       ## BB#0:
323; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
324; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
325; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
326; AVX2-NEXT:    retq
327;
328; AVX512F-LABEL: test9:
329; AVX512F:       ## BB#0:
330; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
331; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
332; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
333; AVX512F-NEXT:    retq
334;
335; SKX-LABEL: test9:
336; SKX:       ## BB#0:
337; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
338; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
339; SKX-NEXT:    vmovdqu32 %xmm1, (%rdi) {%k1}
340; SKX-NEXT:    retq
341  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
342  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
343  ret void
344}
345
346define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
347; AVX1-LABEL: test10:
348; AVX1:       ## BB#0:
349; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
350; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
351; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm2
352; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
353; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
354; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
355; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
356; AVX1-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
357; AVX1-NEXT:    retq
358;
359; AVX2-LABEL: test10:
360; AVX2:       ## BB#0:
361; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
362; AVX2-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
363; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
364; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
365; AVX2-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
366; AVX2-NEXT:    retq
367;
368; AVX512F-LABEL: test10:
369; AVX512F:       ## BB#0:
370; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
371; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
372; AVX512F-NEXT:    vpmovsxdq %xmm0, %ymm0
373; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2
374; AVX512F-NEXT:    vblendvpd %ymm0, %ymm2, %ymm1, %ymm0
375; AVX512F-NEXT:    retq
376;
377; SKX-LABEL: test10:
378; SKX:       ## BB#0:
379; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
380; SKX-NEXT:    vpcmpeqd %xmm2, %xmm0, %k1
381; SKX-NEXT:    vmovapd (%rdi), %ymm1 {%k1}
382; SKX-NEXT:    vmovaps %zmm1, %zmm0
383; SKX-NEXT:    retq
384  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
385  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
386  ret <4 x double> %res
387}
388
389define <4 x double> @test10b(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
390; AVX1-LABEL: test10b:
391; AVX1:       ## BB#0:
392; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
393; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
394; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
395; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
396; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
397; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
398; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
399; AVX1-NEXT:    retq
400;
401; AVX2-LABEL: test10b:
402; AVX2:       ## BB#0:
403; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
404; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
405; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
406; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
407; AVX2-NEXT:    retq
408;
409; AVX512F-LABEL: test10b:
410; AVX512F:       ## BB#0:
411; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
412; AVX512F-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
413; AVX512F-NEXT:    vpmovsxdq %xmm0, %ymm0
414; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
415; AVX512F-NEXT:    retq
416;
417; SKX-LABEL: test10b:
418; SKX:       ## BB#0:
419; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
420; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
421; SKX-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z}
422; SKX-NEXT:    retq
423  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
424  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>zeroinitializer)
425  ret <4 x double> %res
426}
427
428define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
429; AVX1-LABEL: test11a:
430; AVX1:       ## BB#0:
431; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
432; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
433; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
434; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
435; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
436; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
437; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
438; AVX1-NEXT:    retq
439;
440; AVX2-LABEL: test11a:
441; AVX2:       ## BB#0:
442; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
443; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
444; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
445; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
446; AVX2-NEXT:    retq
447;
448; AVX512F-LABEL: test11a:
449; AVX512F:       ## BB#0:
450; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
451; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
452; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
453; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
454; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
455; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
456; AVX512F-NEXT:    vmovups (%rdi), %zmm1 {%k1}
457; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
458; AVX512F-NEXT:    retq
459;
460; SKX-LABEL: test11a:
461; SKX:       ## BB#0:
462; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
463; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
464; SKX-NEXT:    vmovaps (%rdi), %ymm1 {%k1}
465; SKX-NEXT:    vmovaps %zmm1, %zmm0
466; SKX-NEXT:    retq
467  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
468  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
469  ret <8 x float> %res
470}
471
472define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
473; AVX1-LABEL: test11b:
474; AVX1:       ## BB#0:
475; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
476; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
477; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
478; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
479; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
480; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
481; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
482; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2
483; AVX1-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
484; AVX1-NEXT:    retq
485;
486; AVX2-LABEL: test11b:
487; AVX2:       ## BB#0:
488; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
489; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
490; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
491; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm2
492; AVX2-NEXT:    vblendvps %ymm0, %ymm2, %ymm1, %ymm0
493; AVX2-NEXT:    retq
494;
495; AVX512F-LABEL: test11b:
496; AVX512F:       ## BB#0:
497; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
498; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
499; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
500; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
501; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
502; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
503; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm1 {%k1}
504; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
505; AVX512F-NEXT:    retq
506;
507; SKX-LABEL: test11b:
508; SKX:       ## BB#0:
509; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
510; SKX-NEXT:    vpmovw2m %xmm0, %k1
511; SKX-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1}
512; SKX-NEXT:    vmovaps %zmm1, %zmm0
513; SKX-NEXT:    retq
514  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
515  ret <8 x i32> %res
516}
517
518define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) {
519; AVX1-LABEL: test11c:
520; AVX1:       ## BB#0:
521; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
522; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
523; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
524; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
525; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
526; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
527; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
528; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
529; AVX1-NEXT:    retq
530;
531; AVX2-LABEL: test11c:
532; AVX2:       ## BB#0:
533; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
534; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
535; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
536; AVX2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
537; AVX2-NEXT:    retq
538;
539; AVX512F-LABEL: test11c:
540; AVX512F:       ## BB#0:
541; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
542; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
543; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
544; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
545; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
546; AVX512F-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z}
547; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
548; AVX512F-NEXT:    retq
549;
550; SKX-LABEL: test11c:
551; SKX:       ## BB#0:
552; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
553; SKX-NEXT:    vpmovw2m %xmm0, %k1
554; SKX-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z}
555; SKX-NEXT:    retq
556  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
557  ret <8 x float> %res
558}
559
560define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) {
561; AVX1-LABEL: test11d:
562; AVX1:       ## BB#0:
563; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
564; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
565; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
566; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
567; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
568; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
569; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
570; AVX1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0
571; AVX1-NEXT:    retq
572;
573; AVX2-LABEL: test11d:
574; AVX2:       ## BB#0:
575; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
576; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
577; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
578; AVX2-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0
579; AVX2-NEXT:    retq
580;
581; AVX512F-LABEL: test11d:
582; AVX512F:       ## BB#0:
583; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
584; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
585; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
586; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
587; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
588; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
589; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
590; AVX512F-NEXT:    retq
591;
592; SKX-LABEL: test11d:
593; SKX:       ## BB#0:
594; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
595; SKX-NEXT:    vpmovw2m %xmm0, %k1
596; SKX-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z}
597; SKX-NEXT:    retq
598  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
599  ret <8 x i32> %res
600}
601
602define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) {
603; AVX1-LABEL: test12:
604; AVX1:       ## BB#0:
605; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
606; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
607; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
608; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
609; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
610; AVX1-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi)
611; AVX1-NEXT:    vzeroupper
612; AVX1-NEXT:    retq
613;
614; AVX2-LABEL: test12:
615; AVX2:       ## BB#0:
616; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
617; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
618; AVX2-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
619; AVX2-NEXT:    vzeroupper
620; AVX2-NEXT:    retq
621;
622; AVX512F-LABEL: test12:
623; AVX512F:       ## BB#0:
624; AVX512F-NEXT:    ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
625; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
626; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
627; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
628; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
629; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
630; AVX512F-NEXT:    vmovdqu32 %zmm1, (%rdi) {%k1}
631; AVX512F-NEXT:    retq
632;
633; SKX-LABEL: test12:
634; SKX:       ## BB#0:
635; SKX-NEXT:    vpxord %ymm2, %ymm2, %ymm2
636; SKX-NEXT:    vpcmpeqd %ymm2, %ymm0, %k1
637; SKX-NEXT:    vmovdqu32 %ymm1, (%rdi) {%k1}
638; SKX-NEXT:    retq
639  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
640  call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
641  ret void
642}
643
644define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) {
645; AVX1-LABEL: test13:
646; AVX1:       ## BB#0:
647; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
648; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
649; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
650; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm0, %xmm0
651; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
652; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
653; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm4, %xmm4
654; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm1, %xmm1
655; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
656; AVX1-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
657; AVX1-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
658; AVX1-NEXT:    vzeroupper
659; AVX1-NEXT:    retq
660;
661; AVX2-LABEL: test13:
662; AVX2:       ## BB#0:
663; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
664; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
665; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm1, %ymm1
666; AVX2-NEXT:    vmaskmovps %ymm3, %ymm1, 32(%rdi)
667; AVX2-NEXT:    vmaskmovps %ymm2, %ymm0, (%rdi)
668; AVX2-NEXT:    vzeroupper
669; AVX2-NEXT:    retq
670;
671; AVX512-LABEL: test13:
672; AVX512:       ## BB#0:
673; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
674; AVX512-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
675; AVX512-NEXT:    vmovups %zmm1, (%rdi) {%k1}
676; AVX512-NEXT:    retq
677  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
678  call void @llvm.masked.store.v16f32.p0v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask)
679  ret void
680}
681
682define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
683; AVX1-LABEL: test14:
684; AVX1:       ## BB#0:
685; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
686; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
687; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
688; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
689; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
690; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
691; AVX1-NEXT:    retq
692;
693; AVX2-LABEL: test14:
694; AVX2:       ## BB#0:
695; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
696; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
697; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
698; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
699; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
700; AVX2-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
701; AVX2-NEXT:    retq
702;
703; AVX512F-LABEL: test14:
704; AVX512F:       ## BB#0:
705; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
706; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
707; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
708; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
709; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
710; AVX512F-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
711; AVX512F-NEXT:    retq
712;
713; SKX-LABEL: test14:
714; SKX:       ## BB#0:
715; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
716; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
717; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
718; SKX-NEXT:    kshiftlw $14, %k0, %k0
719; SKX-NEXT:    kshiftrw $14, %k0, %k1
720; SKX-NEXT:    vmovups %xmm1, (%rdi) {%k1}
721; SKX-NEXT:    retq
722  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
723  call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
724  ret void
725}
726
727define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
728; AVX1-LABEL: test15:
729; AVX1:       ## BB#0:
730; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
731; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
732; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
733; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
734; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
735; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
736; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
737; AVX1-NEXT:    retq
738;
739; AVX2-LABEL: test15:
740; AVX2:       ## BB#0:
741; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
742; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
743; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
744; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
745; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
746; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
747; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
748; AVX2-NEXT:    retq
749;
750; AVX512F-LABEL: test15:
751; AVX512F:       ## BB#0:
752; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
753; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
754; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
755; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
756; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
757; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
758; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
759; AVX512F-NEXT:    retq
760;
761; SKX-LABEL: test15:
762; SKX:       ## BB#0:
763; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
764; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
765; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k1
766; SKX-NEXT:    vpmovqd %xmm1, (%rdi) {%k1}
767; SKX-NEXT:    retq
768  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
769  call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
770  ret void
771}
772
773define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
774; AVX1-LABEL: test16:
775; AVX1:       ## BB#0:
776; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
777; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
778; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
779; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
780; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
781; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
782; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
783; AVX1-NEXT:    retq
784;
785; AVX2-LABEL: test16:
786; AVX2:       ## BB#0:
787; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
788; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
789; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
790; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
791; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
792; AVX2-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
793; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
794; AVX2-NEXT:    retq
795;
796; AVX512F-LABEL: test16:
797; AVX512F:       ## BB#0:
798; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
799; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
800; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
801; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
802; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
803; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
804; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
805; AVX512F-NEXT:    retq
806;
807; SKX-LABEL: test16:
808; SKX:       ## BB#0:
809; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
810; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
811; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
812; SKX-NEXT:    kshiftlw $14, %k0, %k0
813; SKX-NEXT:    kshiftrw $14, %k0, %k1
814; SKX-NEXT:    vmovups (%rdi), %xmm1 {%k1}
815; SKX-NEXT:    vmovaps %zmm1, %zmm0
816; SKX-NEXT:    retq
817  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
818  %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
819  ret <2 x float> %res
820}
821
822define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
823; AVX1-LABEL: test17:
824; AVX1:       ## BB#0:
825; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
826; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
827; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
828; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
829; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
830; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2
831; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
832; AVX1-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
833; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
834; AVX1-NEXT:    retq
835;
836; AVX2-LABEL: test17:
837; AVX2:       ## BB#0:
838; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
839; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
840; AVX2-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
841; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
842; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
843; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
844; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
845; AVX2-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
846; AVX2-NEXT:    vpmovsxdq %xmm0, %xmm0
847; AVX2-NEXT:    retq
848;
849; AVX512F-LABEL: test17:
850; AVX512F:       ## BB#0:
851; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
852; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
853; AVX512F-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
854; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
855; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
856; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm2
857; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
858; AVX512F-NEXT:    vblendvps %xmm0, %xmm2, %xmm1, %xmm0
859; AVX512F-NEXT:    vpmovsxdq %xmm0, %xmm0
860; AVX512F-NEXT:    retq
861;
862; SKX-LABEL: test17:
863; SKX:       ## BB#0:
864; SKX-NEXT:    vpxord %xmm2, %xmm2, %xmm2
865; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
866; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k0
867; SKX-NEXT:    kshiftlw $14, %k0, %k0
868; SKX-NEXT:    kshiftrw $14, %k0, %k1
869; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
870; SKX-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1}
871; SKX-NEXT:    vpmovsxdq %xmm0, %xmm0
872; SKX-NEXT:    retq
873  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
874  %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
875  ret <2 x i32> %res
876}
877
878define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
879; AVX1-LABEL: test18:
880; AVX1:       ## BB#0:
881; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
882; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
883; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
884; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
885; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
886; AVX1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
887; AVX1-NEXT:    retq
888;
889; AVX2-LABEL: test18:
890; AVX2:       ## BB#0:
891; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
892; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
893; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
894; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
895; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
896; AVX2-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
897; AVX2-NEXT:    retq
898;
899; AVX512F-LABEL: test18:
900; AVX512F:       ## BB#0:
901; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
902; AVX512F-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
903; AVX512F-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
904; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
905; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
906; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
907; AVX512F-NEXT:    retq
908;
909; SKX-LABEL: test18:
910; SKX:       ## BB#0:
911; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
912; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
913; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
914; SKX-NEXT:    kshiftlw $14, %k0, %k0
915; SKX-NEXT:    kshiftrw $14, %k0, %k1
916; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z}
917; SKX-NEXT:    retq
918  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
919  %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
920  ret <2 x float> %res
921}
922
923define <4 x float> @load_all(<4 x i32> %trigger, <4 x float>* %addr) {
924; AVX-LABEL: load_all:
925; AVX:       ## BB#0:
926; AVX-NEXT:    vmovups (%rdi), %xmm0
927; AVX-NEXT:    retq
928;
929; AVX512F-LABEL: load_all:
930; AVX512F:       ## BB#0:
931; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
932; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0
933; AVX512F-NEXT:    retq
934;
935; SKX-LABEL: load_all:
936; SKX:       ## BB#0:
937; SKX-NEXT:    kxnorw %k0, %k0, %k1
938; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z}
939; SKX-NEXT:    retq
940  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
941  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
942  ret <4 x float> %res
943}
944
945;;; Loads with Constant Masks - these should be optimized to use something other than a variable blend.
946
947; 128-bit FP vectors are supported with AVX.
948
949define <4 x float> @mload_constmask_v4f32(<4 x float>* %addr, <4 x float> %dst) {
950; AVX-LABEL: mload_constmask_v4f32:
951; AVX:       ## BB#0:
952; AVX-NEXT:    vblendps {{.*#+}} xmm0 = mem[0],xmm0[1],mem[2,3]
953; AVX-NEXT:    retq
954;
955; AVX512F-LABEL: mload_constmask_v4f32:
956; AVX512F:       ## BB#0:
957; AVX512F-NEXT:    vmovaps {{.*#+}} xmm1 = [4294967295,0,4294967295,4294967295]
958; AVX512F-NEXT:    vmaskmovps (%rdi), %xmm1, %xmm2
959; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
960; AVX512F-NEXT:    retq
961;
962; SKX-LABEL: mload_constmask_v4f32:
963; SKX:       ## BB#0:
964; SKX-NEXT:    movb $13, %al
965; SKX-NEXT:    kmovw %eax, %k1
966; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1}
967; SKX-NEXT:    retq
968  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x float> %dst)
969  ret <4 x float> %res
970}
971
972; 128-bit integer vectors are supported with AVX2.
973
974define <4 x i32> @mload_constmask_v4i32(<4 x i32>* %addr, <4 x i32> %dst) {
975; AVX1-LABEL: mload_constmask_v4i32:
976; AVX1:       ## BB#0:
977; AVX1-NEXT:    vmovaps {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
978; AVX1-NEXT:    vmaskmovps (%rdi), %xmm1, %xmm1
979; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
980; AVX1-NEXT:    retq
981;
982; AVX2-LABEL: mload_constmask_v4i32:
983; AVX2:       ## BB#0:
984; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
985; AVX2-NEXT:    vpmaskmovd (%rdi), %xmm1, %xmm1
986; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
987; AVX2-NEXT:    retq
988;
989; AVX512F-LABEL: mload_constmask_v4i32:
990; AVX512F:       ## BB#0:
991; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,4294967295]
992; AVX512F-NEXT:    vpmaskmovd (%rdi), %xmm1, %xmm2
993; AVX512F-NEXT:    vblendvps %xmm1, %xmm2, %xmm0, %xmm0
994; AVX512F-NEXT:    retq
995;
996; SKX-LABEL: mload_constmask_v4i32:
997; SKX:       ## BB#0:
998; SKX-NEXT:    movb $14, %al
999; SKX-NEXT:    kmovw %eax, %k1
1000; SKX-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1}
1001; SKX-NEXT:    retq
1002  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x i32> %dst)
1003  ret <4 x i32> %res
1004}
1005
1006; 256-bit FP vectors are supported with AVX.
1007
1008define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst) {
1009; AVX-LABEL: mload_constmask_v8f32:
1010; AVX:       ## BB#0:
1011; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,0,0,0,0,0]
1012; AVX-NEXT:    vmaskmovps (%rdi), %ymm1, %ymm1
1013; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
1014; AVX-NEXT:    retq
1015;
1016; AVX512F-LABEL: mload_constmask_v8f32:
1017; AVX512F:       ## BB#0:
1018; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1019; AVX512F-NEXT:    movw $7, %ax
1020; AVX512F-NEXT:    kmovw %eax, %k1
1021; AVX512F-NEXT:    vmovups (%rdi), %zmm0 {%k1}
1022; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1023; AVX512F-NEXT:    retq
1024;
1025; SKX-LABEL: mload_constmask_v8f32:
1026; SKX:       ## BB#0:
1027; SKX-NEXT:    movb $7, %al
1028; SKX-NEXT:    kmovw %eax, %k1
1029; SKX-NEXT:    vmovups (%rdi), %ymm0 {%k1}
1030; SKX-NEXT:    retq
1031  %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %dst)
1032  ret <8 x float> %res
1033}
1034
1035define <4 x double> @mload_constmask_v4f64(<4 x double>* %addr, <4 x double> %dst) {
1036; AVX-LABEL: mload_constmask_v4f64:
1037; AVX:       ## BB#0:
1038; AVX-NEXT:    vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
1039; AVX-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm1
1040; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
1041; AVX-NEXT:    retq
1042;
1043; AVX512F-LABEL: mload_constmask_v4f64:
1044; AVX512F:       ## BB#0:
1045; AVX512F-NEXT:    vmovapd {{.*#+}} ymm1 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
1046; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm2
1047; AVX512F-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
1048; AVX512F-NEXT:    retq
1049;
1050; SKX-LABEL: mload_constmask_v4f64:
1051; SKX:       ## BB#0:
1052; SKX-NEXT:    movb $7, %al
1053; SKX-NEXT:    kmovw %eax, %k1
1054; SKX-NEXT:    vmovupd (%rdi), %ymm0 {%k1}
1055; SKX-NEXT:    retq
1056  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> %dst)
1057  ret <4 x double> %res
1058}
1059
1060; 256-bit integer vectors are supported with AVX2.
1061
1062define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) {
1063; AVX1-LABEL: mload_constmask_v8i32:
1064; AVX1:       ## BB#0:
1065; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7]
1066; AVX1-NEXT:    retq
1067;
1068; AVX2-LABEL: mload_constmask_v8i32:
1069; AVX2:       ## BB#0:
1070; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3,4,5,6],mem[7]
1071; AVX2-NEXT:    retq
1072;
1073; AVX512F-LABEL: mload_constmask_v8i32:
1074; AVX512F:       ## BB#0:
1075; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
1076; AVX512F-NEXT:    movw $135, %ax
1077; AVX512F-NEXT:    kmovw %eax, %k1
1078; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1}
1079; AVX512F-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1080; AVX512F-NEXT:    retq
1081;
1082; SKX-LABEL: mload_constmask_v8i32:
1083; SKX:       ## BB#0:
1084; SKX-NEXT:    movb $-121, %al
1085; SKX-NEXT:    kmovw %eax, %k1
1086; SKX-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1}
1087; SKX-NEXT:    retq
1088  %res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x i32> %dst)
1089  ret <8 x i32> %res
1090}
1091
1092define <4 x i64> @mload_constmask_v4i64(<4 x i64>* %addr, <4 x i64> %dst) {
1093; AVX1-LABEL: mload_constmask_v4i64:
1094; AVX1:       ## BB#0:
1095; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0],ymm0[1,2],mem[3]
1096; AVX1-NEXT:    retq
1097;
1098; AVX2-LABEL: mload_constmask_v4i64:
1099; AVX2:       ## BB#0:
1100; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = mem[0,1],ymm0[2,3,4,5],mem[6,7]
1101; AVX2-NEXT:    retq
1102;
1103; AVX512F-LABEL: mload_constmask_v4i64:
1104; AVX512F:       ## BB#0:
1105; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm1 = [18446744073709551615,0,0,18446744073709551615]
1106; AVX512F-NEXT:    vpmaskmovq (%rdi), %ymm1, %ymm2
1107; AVX512F-NEXT:    vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
1108; AVX512F-NEXT:    retq
1109;
1110; SKX-LABEL: mload_constmask_v4i64:
1111; SKX:       ## BB#0:
1112; SKX-NEXT:    movb $9, %al
1113; SKX-NEXT:    kmovw %eax, %k1
1114; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1}
1115; SKX-NEXT:    retq
1116  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i64> %dst)
1117  ret <4 x i64> %res
1118}
1119
1120; 512-bit FP vectors are supported with AVX512.
1121
1122define <8 x double> @mload_constmask_v8f64(<8 x double>* %addr, <8 x double> %dst) {
1123; AVX-LABEL: mload_constmask_v8f64:
1124; AVX:       ## BB#0:
1125; AVX-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0,1,2],mem[3]
1126; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = mem[0,1,2],ymm0[3]
1127; AVX-NEXT:    retq
1128;
1129; AVX512-LABEL: mload_constmask_v8f64:
1130; AVX512:       ## BB#0:
1131; AVX512-NEXT:    movb $-121, %al
1132; AVX512-NEXT:    kmovw %eax, %k1
1133; AVX512-NEXT:    vmovupd (%rdi), %zmm0 {%k1}
1134; AVX512-NEXT:    retq
1135  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1> <i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %dst)
1136  ret <8 x double> %res
1137}
1138
1139; If the pass-through operand is undef, no blend is needed.
1140
1141define <4 x double> @mload_constmask_v4f64_undef_passthrough(<4 x double>* %addr) {
1142; AVX-LABEL: mload_constmask_v4f64_undef_passthrough:
1143; AVX:       ## BB#0:
1144; AVX-NEXT:    vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
1145; AVX-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
1146; AVX-NEXT:    retq
1147;
1148; AVX512F-LABEL: mload_constmask_v4f64_undef_passthrough:
1149; AVX512F:       ## BB#0:
1150; AVX512F-NEXT:    vmovapd {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,18446744073709551615,0]
1151; AVX512F-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
1152; AVX512F-NEXT:    retq
1153;
1154; SKX-LABEL: mload_constmask_v4f64_undef_passthrough:
1155; SKX:       ## BB#0:
1156; SKX-NEXT:    movb $7, %al
1157; SKX-NEXT:    kmovw %eax, %k1
1158; SKX-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z}
1159; SKX-NEXT:    retq
1160  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1> <i1 1, i1 1, i1 1, i1 0>, <4 x double> undef)
1161  ret <4 x double> %res
1162}
1163
1164define <4 x i64> @mload_constmask_v4i64_undef_passthrough(<4 x i64>* %addr) {
1165; AVX1-LABEL: mload_constmask_v4i64_undef_passthrough:
1166; AVX1:       ## BB#0:
1167; AVX1-NEXT:    vmovapd {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
1168; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
1169; AVX1-NEXT:    retq
1170;
1171; AVX2-LABEL: mload_constmask_v4i64_undef_passthrough:
1172; AVX2:       ## BB#0:
1173; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
1174; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
1175; AVX2-NEXT:    retq
1176;
1177; AVX512F-LABEL: mload_constmask_v4i64_undef_passthrough:
1178; AVX512F:       ## BB#0:
1179; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,18446744073709551615,18446744073709551615,0]
1180; AVX512F-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
1181; AVX512F-NEXT:    retq
1182;
1183; SKX-LABEL: mload_constmask_v4i64_undef_passthrough:
1184; SKX:       ## BB#0:
1185; SKX-NEXT:    movb $6, %al
1186; SKX-NEXT:    kmovw %eax, %k1
1187; SKX-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z}
1188; SKX-NEXT:    retq
1189  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1> <i1 0, i1 1, i1 1, i1 0>, <4 x i64> undef)
1190  ret <4 x i64> %res
1191}
1192
1193define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
1194; AVX1-LABEL: test21:
1195; AVX1:       ## BB#0:
1196; AVX1-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1197; AVX1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi)
1198; AVX1-NEXT:    retq
1199;
1200; AVX2-LABEL: test21:
1201; AVX2:       ## BB#0:
1202; AVX2-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1203; AVX2-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
1204; AVX2-NEXT:    retq
1205;
1206; AVX512F-LABEL: test21:
1207; AVX512F:       ## BB#0:
1208; AVX512F-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
1209; AVX512F-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi)
1210; AVX512F-NEXT:    retq
1211;
1212; SKX-LABEL: test21:
1213; SKX:       ## BB#0:
1214; SKX-NEXT:    kxnorw %k0, %k0, %k1
1215; SKX-NEXT:    vmovdqu32 %xmm1, (%rdi) {%k1}
1216; SKX-NEXT:    retq
1217  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
1218  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
1219  ret void
1220}
1221
1222;  When only one element of the mask is set, reduce to a scalar store.
1223
1224define void @one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
1225; AVX-LABEL: one_mask_bit_set1:
1226; AVX:       ## BB#0:
1227; AVX-NEXT:    vmovd %xmm0, (%rdi)
1228; AVX-NEXT:    retq
1229;
1230; AVX512-LABEL: one_mask_bit_set1:
1231; AVX512:       ## BB#0:
1232; AVX512-NEXT:    vmovd %xmm0, (%rdi)
1233; AVX512-NEXT:    retq
1234  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
1235  ret void
1236}
1237
1238; Choose a different element to show that the correct address offset is produced.
1239
1240define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
1241; AVX-LABEL: one_mask_bit_set2:
1242; AVX:       ## BB#0:
1243; AVX-NEXT:    vextractps $2, %xmm0, 8(%rdi)
1244; AVX-NEXT:    retq
1245;
1246; AVX512-LABEL: one_mask_bit_set2:
1247; AVX512:       ## BB#0:
1248; AVX512-NEXT:    vextractps $2, %xmm0, 8(%rdi)
1249; AVX512-NEXT:    retq
1250  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
1251  ret void
1252}
1253
1254; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
1255
1256define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
1257; AVX-LABEL: one_mask_bit_set3:
1258; AVX:       ## BB#0:
1259; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
1260; AVX-NEXT:    vmovlps %xmm0, 16(%rdi)
1261; AVX-NEXT:    vzeroupper
1262; AVX-NEXT:    retq
1263;
1264; AVX512F-LABEL: one_mask_bit_set3:
1265; AVX512F:       ## BB#0:
1266; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm0
1267; AVX512F-NEXT:    vmovq %xmm0, 16(%rdi)
1268; AVX512F-NEXT:    retq
1269;
1270; SKX-LABEL: one_mask_bit_set3:
1271; SKX:       ## BB#0:
1272; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0
1273; SKX-NEXT:    vmovq %xmm0, 16(%rdi)
1274; SKX-NEXT:    retq
1275  call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
1276  ret void
1277}
1278
1279; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
1280
1281define void @one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) {
1282; AVX-LABEL: one_mask_bit_set4:
1283; AVX:       ## BB#0:
1284; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
1285; AVX-NEXT:    vmovhpd %xmm0, 24(%rdi)
1286; AVX-NEXT:    vzeroupper
1287; AVX-NEXT:    retq
1288;
1289; AVX512F-LABEL: one_mask_bit_set4:
1290; AVX512F:       ## BB#0:
1291; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm0
1292; AVX512F-NEXT:    vmovhpd %xmm0, 24(%rdi)
1293; AVX512F-NEXT:    retq
1294;
1295; SKX-LABEL: one_mask_bit_set4:
1296; SKX:       ## BB#0:
1297; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm0
1298; SKX-NEXT:    vmovhpd %xmm0, 24(%rdi)
1299; SKX-NEXT:    retq
1300  call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>)
1301  ret void
1302}
1303
1304; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected.
1305
1306define void @one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) {
1307; AVX-LABEL: one_mask_bit_set5:
1308; AVX:       ## BB#0:
1309; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm0
1310; AVX-NEXT:    vmovlps %xmm0, 48(%rdi)
1311; AVX-NEXT:    vzeroupper
1312; AVX-NEXT:    retq
1313;
1314; AVX512-LABEL: one_mask_bit_set5:
1315; AVX512:       ## BB#0:
1316; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
1317; AVX512-NEXT:    vmovlpd %xmm0, 48(%rdi)
1318; AVX512-NEXT:    retq
1319  call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false>)
1320  ret void
1321}
1322
1323;  When only one element of the mask is set, reduce to a scalar load.
1324
1325define <4 x i32> @load_one_mask_bit_set1(<4 x i32>* %addr, <4 x i32> %val) {
1326; AVX-LABEL: load_one_mask_bit_set1:
1327; AVX:       ## BB#0:
1328; AVX-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
1329; AVX-NEXT:    retq
1330;
1331; AVX512-LABEL: load_one_mask_bit_set1:
1332; AVX512:       ## BB#0:
1333; AVX512-NEXT:    vpinsrd $0, (%rdi), %xmm0, %xmm0
1334; AVX512-NEXT:    retq
1335  %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>, <4 x i32> %val)
1336  ret <4 x i32> %res
1337}
1338
1339; Choose a different element to show that the correct address offset is produced.
1340
1341define <4 x float> @load_one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
1342; AVX-LABEL: load_one_mask_bit_set2:
1343; AVX:       ## BB#0:
1344; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1345; AVX-NEXT:    retq
1346;
1347; AVX512-LABEL: load_one_mask_bit_set2:
1348; AVX512:       ## BB#0:
1349; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
1350; AVX512-NEXT:    retq
1351  %res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x float> %val)
1352  ret <4 x float> %res
1353}
1354
1355; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
1356
1357define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
1358; AVX1-LABEL: load_one_mask_bit_set3:
1359; AVX1:       ## BB#0:
1360; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
1361; AVX1-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
1362; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1363; AVX1-NEXT:    retq
1364;
1365; AVX2-LABEL: load_one_mask_bit_set3:
1366; AVX2:       ## BB#0:
1367; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
1368; AVX2-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
1369; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1370; AVX2-NEXT:    retq
1371;
1372; AVX512F-LABEL: load_one_mask_bit_set3:
1373; AVX512F:       ## BB#0:
1374; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
1375; AVX512F-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
1376; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
1377; AVX512F-NEXT:    retq
1378;
1379; SKX-LABEL: load_one_mask_bit_set3:
1380; SKX:       ## BB#0:
1381; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm1
1382; SKX-NEXT:    vpinsrq $0, 16(%rdi), %xmm1, %xmm1
1383; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
1384; SKX-NEXT:    retq
1385  %res = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>, <4 x i64> %val)
1386  ret <4 x i64> %res
1387}
1388
1389; Choose a different scalar type and a high element of a 256-bit vector because AVX doesn't support those evenly.
1390
1391define <4 x double> @load_one_mask_bit_set4(<4 x double>* %addr, <4 x double> %val) {
1392; AVX-LABEL: load_one_mask_bit_set4:
1393; AVX:       ## BB#0:
1394; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
1395; AVX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1396; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1397; AVX-NEXT:    retq
1398;
1399; AVX512F-LABEL: load_one_mask_bit_set4:
1400; AVX512F:       ## BB#0:
1401; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm1
1402; AVX512F-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1403; AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
1404; AVX512F-NEXT:    retq
1405;
1406; SKX-LABEL: load_one_mask_bit_set4:
1407; SKX:       ## BB#0:
1408; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm1
1409; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1410; SKX-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
1411; SKX-NEXT:    retq
1412  %res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 false, i1 true>, <4 x double> %val)
1413  ret <4 x double> %res
1414}
1415
1416; Try a 512-bit vector to make sure AVX doesn't die and AVX512 works as expected.
1417
1418define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %val) {
1419; AVX-LABEL: load_one_mask_bit_set5:
1420; AVX:       ## BB#0:
1421; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm2
1422; AVX-NEXT:    vmovsd {{.*#+}} xmm3 = mem[0],zero
1423; AVX-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
1424; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
1425; AVX-NEXT:    retq
1426;
1427; AVX512-LABEL: load_one_mask_bit_set5:
1428; AVX512:       ## BB#0:
1429; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm1
1430; AVX512-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
1431; AVX512-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
1432; AVX512-NEXT:    retq
1433  %res = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %addr, i32 4, <8 x i1><i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x double> %val)
1434  ret <8 x double> %res
1435}
1436
1437declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
1438declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
1439declare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
1440declare <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>*, i32, <4 x i1>, <4 x i64>)
1441declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
1442declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
1443declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
1444declare void @llvm.masked.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, i32, <4 x i1>)
1445declare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
1446declare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
1447declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>)
1448declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
1449declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
1450declare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
1451declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
1452declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
1453declare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
1454declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
1455declare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
1456declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
1457declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
1458declare void @llvm.masked.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, i32, <4 x i1>)
1459declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
1460declare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
1461
1462declare <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
1463
1464define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
1465; AVX1-LABEL: test23:
1466; AVX1:       ## BB#0:
1467; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
1468; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
1469; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
1470; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm0, %xmm0
1471; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
1472; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
1473; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
1474; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm1, %xmm1
1475; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
1476; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
1477; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
1478; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm2, %xmm2
1479; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
1480; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
1481; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm4, %xmm4
1482; AVX1-NEXT:    vpcmpeqq %xmm5, %xmm3, %xmm3
1483; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
1484; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm3, %ymm3
1485; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm2
1486; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm1
1487; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0
1488; AVX1-NEXT:    retq
1489;
1490; AVX2-LABEL: test23:
1491; AVX2:       ## BB#0:
1492; AVX2-NEXT:    vpxor %ymm4, %ymm4, %ymm4
1493; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm0, %ymm0
1494; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm1, %ymm1
1495; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm2, %ymm2
1496; AVX2-NEXT:    vpcmpeqq %ymm4, %ymm3, %ymm3
1497; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm3, %ymm3
1498; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm2, %ymm2
1499; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm1, %ymm1
1500; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0
1501; AVX2-NEXT:    retq
1502;
1503; AVX512-LABEL: test23:
1504; AVX512:       ## BB#0:
1505; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
1506; AVX512-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
1507; AVX512-NEXT:    vpcmpeqq %zmm2, %zmm1, %k2
1508; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k2} {z}
1509; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
1510; AVX512-NEXT:    retq
1511  %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
1512  %res = call <16 x i32*> @llvm.masked.load.v16p0i32.p0v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
1513  ret <16 x i32*> %res
1514}
1515
1516%mystruct = type { i16, i16, [1 x i8*] }
1517
1518declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
1519
1520define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
1521; AVX1-LABEL: test24:
1522; AVX1:       ## BB#0:
1523; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1524; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1525; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1526; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
1527; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1528; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1529; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1530; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm1, %ymm4
1531; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1532; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1533; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1534; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1535; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
1536; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1537; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1538; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1539; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm1, %ymm3
1540; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1541; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1542; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1543; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1544; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
1545; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1546; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1547; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
1548; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm1, %ymm2
1549; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1550; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1551; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1552; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1553; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
1554; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1555; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
1556; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1557; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm0, %ymm1
1558; AVX1-NEXT:    vmovapd %ymm4, %ymm0
1559; AVX1-NEXT:    retq
1560;
1561; AVX2-LABEL: test24:
1562; AVX2:       ## BB#0:
1563; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1564; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1565; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1566; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1567; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm1, %ymm4
1568; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1569; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1570; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1571; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1572; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1573; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm1, %ymm3
1574; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1575; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1576; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1577; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1578; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1579; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm1, %ymm2
1580; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1581; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1582; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1583; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1584; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
1585; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm0, %ymm1
1586; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
1587; AVX2-NEXT:    retq
1588;
1589; AVX512F-LABEL: test24:
1590; AVX512F:       ## BB#0:
1591; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1592; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1593; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1594; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
1595; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
1596; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
1597; AVX512F-NEXT:    retq
1598;
1599; SKX-LABEL: test24:
1600; SKX:       ## BB#0:
1601; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1602; SKX-NEXT:    vpmovb2m %xmm0, %k1
1603; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
1604; SKX-NEXT:    kshiftrw $8, %k1, %k1
1605; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
1606; SKX-NEXT:    retq
1607  %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct.p0v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
1608  ret <16 x %mystruct*> %res
1609}
1610
1611define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
1612; AVX1-LABEL: test_store_16i64:
1613; AVX1:       ## BB#0:
1614; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1615; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
1616; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
1617; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
1618; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
1619; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
1620; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
1621; AVX1-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
1622; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1623; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1624; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1625; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1626; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm5
1627; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1628; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1629; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
1630; AVX1-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
1631; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1632; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1633; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1634; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1635; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm4
1636; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1637; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1638; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
1639; AVX1-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
1640; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1641; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1642; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1643; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1644; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
1645; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1646; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
1647; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1648; AVX1-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
1649; AVX1-NEXT:    vzeroupper
1650; AVX1-NEXT:    retq
1651;
1652; AVX2-LABEL: test_store_16i64:
1653; AVX2:       ## BB#0:
1654; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1655; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
1656; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
1657; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
1658; AVX2-NEXT:    vpmaskmovq %ymm1, %ymm5, (%rdi)
1659; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1660; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1661; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1662; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1663; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1664; AVX2-NEXT:    vpmaskmovq %ymm4, %ymm1, 96(%rdi)
1665; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1666; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1667; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1668; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1669; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1670; AVX2-NEXT:    vpmaskmovq %ymm3, %ymm1, 64(%rdi)
1671; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1672; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1673; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1674; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1675; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
1676; AVX2-NEXT:    vpmaskmovq %ymm2, %ymm0, 32(%rdi)
1677; AVX2-NEXT:    vzeroupper
1678; AVX2-NEXT:    retq
1679;
1680; AVX512F-LABEL: test_store_16i64:
1681; AVX512F:       ## BB#0:
1682; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1683; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1684; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1685; AVX512F-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
1686; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
1687; AVX512F-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
1688; AVX512F-NEXT:    retq
1689;
1690; SKX-LABEL: test_store_16i64:
1691; SKX:       ## BB#0:
1692; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1693; SKX-NEXT:    vpmovb2m %xmm0, %k1
1694; SKX-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
1695; SKX-NEXT:    kshiftrw $8, %k1, %k1
1696; SKX-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
1697; SKX-NEXT:    retq
1698  call void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
1699  ret void
1700}
1701declare void @llvm.masked.store.v16i64.p0v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
1702
1703define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
1704; AVX1-LABEL: test_store_16f64:
1705; AVX1:       ## BB#0:
1706; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1707; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
1708; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
1709; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
1710; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
1711; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
1712; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
1713; AVX1-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
1714; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1715; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1716; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1717; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1718; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm5
1719; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1720; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1721; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm5, %ymm1
1722; AVX1-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
1723; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1724; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1725; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1726; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1727; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm4
1728; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1729; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1730; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm4, %ymm1
1731; AVX1-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
1732; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1733; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1734; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1735; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1736; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
1737; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1738; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
1739; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
1740; AVX1-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
1741; AVX1-NEXT:    vzeroupper
1742; AVX1-NEXT:    retq
1743;
1744; AVX2-LABEL: test_store_16f64:
1745; AVX2:       ## BB#0:
1746; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1747; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
1748; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
1749; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
1750; AVX2-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
1751; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
1752; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1753; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1754; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1755; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1756; AVX2-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
1757; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1758; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1759; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1760; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1761; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1762; AVX2-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
1763; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
1764; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1765; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1766; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1767; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
1768; AVX2-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
1769; AVX2-NEXT:    vzeroupper
1770; AVX2-NEXT:    retq
1771;
1772; AVX512F-LABEL: test_store_16f64:
1773; AVX512F:       ## BB#0:
1774; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1775; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1776; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1777; AVX512F-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
1778; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
1779; AVX512F-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
1780; AVX512F-NEXT:    retq
1781;
1782; SKX-LABEL: test_store_16f64:
1783; SKX:       ## BB#0:
1784; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1785; SKX-NEXT:    vpmovb2m %xmm0, %k1
1786; SKX-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
1787; SKX-NEXT:    kshiftrw $8, %k1, %k1
1788; SKX-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
1789; SKX-NEXT:    retq
1790  call void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
1791  ret void
1792}
1793declare void @llvm.masked.store.v16f64.p0v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
1794
1795define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
1796; AVX1-LABEL: test_load_16i64:
1797; AVX1:       ## BB#0:
1798; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1799; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
1800; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
1801; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
1802; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
1803; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
1804; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
1805; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
1806; AVX1-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
1807; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1808; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1809; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1810; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1811; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm6
1812; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1813; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1814; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
1815; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
1816; AVX1-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
1817; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1818; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
1819; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
1820; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
1821; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm6
1822; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
1823; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
1824; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm6, %ymm2
1825; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
1826; AVX1-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
1827; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1828; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1829; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1830; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1831; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm3
1832; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1833; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
1834; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1835; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
1836; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
1837; AVX1-NEXT:    vmovapd %ymm5, %ymm0
1838; AVX1-NEXT:    retq
1839;
1840; AVX2-LABEL: test_load_16i64:
1841; AVX2:       ## BB#0:
1842; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1843; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
1844; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
1845; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
1846; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm5, %ymm6
1847; AVX2-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
1848; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1849; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1850; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1851; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1852; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1853; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm1, %ymm6
1854; AVX2-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
1855; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1856; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
1857; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
1858; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
1859; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
1860; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm2, %ymm6
1861; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
1862; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1863; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1864; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1865; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1866; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
1867; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm0, %ymm3
1868; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
1869; AVX2-NEXT:    vmovapd %ymm5, %ymm0
1870; AVX2-NEXT:    retq
1871;
1872; AVX512F-LABEL: test_load_16i64:
1873; AVX512F:       ## BB#0:
1874; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1875; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1876; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1877; AVX512F-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
1878; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
1879; AVX512F-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
1880; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1881; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
1882; AVX512F-NEXT:    retq
1883;
1884; SKX-LABEL: test_load_16i64:
1885; SKX:       ## BB#0:
1886; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1887; SKX-NEXT:    vpmovb2m %xmm0, %k1
1888; SKX-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
1889; SKX-NEXT:    kshiftrw $8, %k1, %k1
1890; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
1891; SKX-NEXT:    vmovaps %zmm1, %zmm0
1892; SKX-NEXT:    vmovaps %zmm2, %zmm1
1893; SKX-NEXT:    retq
1894  %res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
1895  ret <16 x i64> %res
1896}
1897declare <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
1898
1899define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
1900; AVX1-LABEL: test_load_16f64:
1901; AVX1:       ## BB#0:
1902; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1903; AVX1-NEXT:    vpslld $31, %xmm5, %xmm5
1904; AVX1-NEXT:    vpsrad $31, %xmm5, %xmm5
1905; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm6
1906; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm5[2,3,0,1]
1907; AVX1-NEXT:    vpmovsxdq %xmm5, %xmm5
1908; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
1909; AVX1-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
1910; AVX1-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
1911; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1912; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1913; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
1914; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
1915; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm6
1916; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1917; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
1918; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm6, %ymm1
1919; AVX1-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
1920; AVX1-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
1921; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1922; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
1923; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
1924; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
1925; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm6
1926; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
1927; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
1928; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm6, %ymm2
1929; AVX1-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
1930; AVX1-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
1931; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1932; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1933; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
1934; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
1935; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm3
1936; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1937; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
1938; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm0
1939; AVX1-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
1940; AVX1-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
1941; AVX1-NEXT:    vmovapd %ymm5, %ymm0
1942; AVX1-NEXT:    retq
1943;
1944; AVX2-LABEL: test_load_16f64:
1945; AVX2:       ## BB#0:
1946; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1947; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
1948; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
1949; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
1950; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm6
1951; AVX2-NEXT:    vblendvpd %ymm5, %ymm6, %ymm1, %ymm5
1952; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1953; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
1954; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
1955; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
1956; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
1957; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm1, %ymm6
1958; AVX2-NEXT:    vblendvpd %ymm1, %ymm6, %ymm2, %ymm1
1959; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
1960; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
1961; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
1962; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
1963; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
1964; AVX2-NEXT:    vmaskmovpd 64(%rdi), %ymm2, %ymm6
1965; AVX2-NEXT:    vblendvpd %ymm2, %ymm6, %ymm3, %ymm2
1966; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1967; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1968; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
1969; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
1970; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
1971; AVX2-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
1972; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
1973; AVX2-NEXT:    vmovapd %ymm5, %ymm0
1974; AVX2-NEXT:    retq
1975;
1976; AVX512F-LABEL: test_load_16f64:
1977; AVX512F:       ## BB#0:
1978; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
1979; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
1980; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
1981; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
1982; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
1983; AVX512F-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
1984; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
1985; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
1986; AVX512F-NEXT:    retq
1987;
1988; SKX-LABEL: test_load_16f64:
1989; SKX:       ## BB#0:
1990; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
1991; SKX-NEXT:    vpmovb2m %xmm0, %k1
1992; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
1993; SKX-NEXT:    kshiftrw $8, %k1, %k1
1994; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
1995; SKX-NEXT:    vmovaps %zmm1, %zmm0
1996; SKX-NEXT:    vmovaps %zmm2, %zmm1
1997; SKX-NEXT:    retq
1998  %res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
1999  ret <16 x double> %res
2000}
2001declare <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
2002
2003define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0)  {
2004; AVX1-LABEL: test_load_32f64:
2005; AVX1:       ## BB#0:
2006; AVX1-NEXT:    pushq %rbp
2007; AVX1-NEXT:  Ltmp0:
2008; AVX1-NEXT:    .cfi_def_cfa_offset 16
2009; AVX1-NEXT:  Ltmp1:
2010; AVX1-NEXT:    .cfi_offset %rbp, -16
2011; AVX1-NEXT:    movq %rsp, %rbp
2012; AVX1-NEXT:  Ltmp2:
2013; AVX1-NEXT:    .cfi_def_cfa_register %rbp
2014; AVX1-NEXT:    andq $-32, %rsp
2015; AVX1-NEXT:    subq $32, %rsp
2016; AVX1-NEXT:    vmovapd 16(%rbp), %ymm8
2017; AVX1-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3]
2018; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
2019; AVX1-NEXT:    vpslld $31, %xmm9, %xmm9
2020; AVX1-NEXT:    vpsrad $31, %xmm9, %xmm9
2021; AVX1-NEXT:    vpmovsxdq %xmm9, %xmm10
2022; AVX1-NEXT:    vpshufd {{.*#+}} xmm9 = xmm9[2,3,0,1]
2023; AVX1-NEXT:    vpmovsxdq %xmm9, %xmm9
2024; AVX1-NEXT:    vinsertf128 $1, %xmm9, %ymm10, %ymm9
2025; AVX1-NEXT:    vmaskmovpd 32(%rsi), %ymm9, %ymm10
2026; AVX1-NEXT:    vblendvpd %ymm9, %ymm10, %ymm2, %ymm9
2027; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
2028; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2029; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
2030; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
2031; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm10
2032; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
2033; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
2034; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm10, %ymm2
2035; AVX1-NEXT:    vmaskmovpd 64(%rsi), %ymm2, %ymm10
2036; AVX1-NEXT:    vblendvpd %ymm2, %ymm10, %ymm3, %ymm11
2037; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
2038; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2039; AVX1-NEXT:    vpslld $31, %xmm2, %xmm2
2040; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
2041; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm10
2042; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
2043; AVX1-NEXT:    vpmovsxdq %xmm2, %xmm2
2044; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm10, %ymm2
2045; AVX1-NEXT:    vmaskmovpd 96(%rsi), %ymm2, %ymm10
2046; AVX1-NEXT:    vblendvpd %ymm2, %ymm10, %ymm4, %ymm4
2047; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2048; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
2049; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2050; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
2051; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
2052; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
2053; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2054; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
2055; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
2056; AVX1-NEXT:    vmaskmovpd 160(%rsi), %ymm3, %ymm10
2057; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm6, %ymm6
2058; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
2059; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2060; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
2061; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
2062; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
2063; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2064; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
2065; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
2066; AVX1-NEXT:    vmaskmovpd 192(%rsi), %ymm3, %ymm10
2067; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm7, %ymm7
2068; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[3,1,2,3]
2069; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2070; AVX1-NEXT:    vpslld $31, %xmm3, %xmm3
2071; AVX1-NEXT:    vpsrad $31, %xmm3, %xmm3
2072; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm10
2073; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
2074; AVX1-NEXT:    vpmovsxdq %xmm3, %xmm3
2075; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm10, %ymm3
2076; AVX1-NEXT:    vmaskmovpd 224(%rsi), %ymm3, %ymm10
2077; AVX1-NEXT:    vblendvpd %ymm3, %ymm10, %ymm8, %ymm3
2078; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2079; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
2080; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
2081; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm8
2082; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2083; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
2084; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm8, %ymm0
2085; AVX1-NEXT:    vmaskmovpd (%rsi), %ymm0, %ymm8
2086; AVX1-NEXT:    vblendvpd %ymm0, %ymm8, %ymm1, %ymm0
2087; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2088; AVX1-NEXT:    vpslld $31, %xmm1, %xmm1
2089; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
2090; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm2
2091; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
2092; AVX1-NEXT:    vpmovsxdq %xmm1, %xmm1
2093; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
2094; AVX1-NEXT:    vmaskmovpd 128(%rsi), %ymm1, %ymm2
2095; AVX1-NEXT:    vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
2096; AVX1-NEXT:    vmovapd %ymm1, 128(%rdi)
2097; AVX1-NEXT:    vmovapd %ymm0, (%rdi)
2098; AVX1-NEXT:    vmovapd %ymm3, 224(%rdi)
2099; AVX1-NEXT:    vmovapd %ymm7, 192(%rdi)
2100; AVX1-NEXT:    vmovapd %ymm6, 160(%rdi)
2101; AVX1-NEXT:    vmovapd %ymm4, 96(%rdi)
2102; AVX1-NEXT:    vmovapd %ymm11, 64(%rdi)
2103; AVX1-NEXT:    vmovapd %ymm9, 32(%rdi)
2104; AVX1-NEXT:    movq %rdi, %rax
2105; AVX1-NEXT:    movq %rbp, %rsp
2106; AVX1-NEXT:    popq %rbp
2107; AVX1-NEXT:    vzeroupper
2108; AVX1-NEXT:    retq
2109;
2110; AVX2-LABEL: test_load_32f64:
2111; AVX2:       ## BB#0:
2112; AVX2-NEXT:    pushq %rbp
2113; AVX2-NEXT:  Ltmp0:
2114; AVX2-NEXT:    .cfi_def_cfa_offset 16
2115; AVX2-NEXT:  Ltmp1:
2116; AVX2-NEXT:    .cfi_offset %rbp, -16
2117; AVX2-NEXT:    movq %rsp, %rbp
2118; AVX2-NEXT:  Ltmp2:
2119; AVX2-NEXT:    .cfi_def_cfa_register %rbp
2120; AVX2-NEXT:    andq $-32, %rsp
2121; AVX2-NEXT:    subq $32, %rsp
2122; AVX2-NEXT:    vmovapd 16(%rbp), %ymm8
2123; AVX2-NEXT:    vpshufd {{.*#+}} xmm9 = xmm0[1,1,2,3]
2124; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
2125; AVX2-NEXT:    vpslld $31, %xmm9, %xmm9
2126; AVX2-NEXT:    vpsrad $31, %xmm9, %xmm9
2127; AVX2-NEXT:    vpmovsxdq %xmm9, %ymm9
2128; AVX2-NEXT:    vmaskmovpd 32(%rsi), %ymm9, %ymm10
2129; AVX2-NEXT:    vblendvpd %ymm9, %ymm10, %ymm2, %ymm9
2130; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
2131; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2132; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
2133; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
2134; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
2135; AVX2-NEXT:    vmaskmovpd 64(%rsi), %ymm2, %ymm10
2136; AVX2-NEXT:    vblendvpd %ymm2, %ymm10, %ymm3, %ymm11
2137; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
2138; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2139; AVX2-NEXT:    vpslld $31, %xmm2, %xmm2
2140; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
2141; AVX2-NEXT:    vpmovsxdq %xmm2, %ymm2
2142; AVX2-NEXT:    vmaskmovpd 96(%rsi), %ymm2, %ymm10
2143; AVX2-NEXT:    vblendvpd %ymm2, %ymm10, %ymm4, %ymm4
2144; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2145; AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
2146; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
2147; AVX2-NEXT:    vpslld $31, %xmm3, %xmm3
2148; AVX2-NEXT:    vpsrad $31, %xmm3, %xmm3
2149; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
2150; AVX2-NEXT:    vmaskmovpd 160(%rsi), %ymm3, %ymm10
2151; AVX2-NEXT:    vblendvpd %ymm3, %ymm10, %ymm6, %ymm3
2152; AVX2-NEXT:    vpshufd {{.*#+}} xmm6 = xmm2[2,3,0,1]
2153; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
2154; AVX2-NEXT:    vpslld $31, %xmm6, %xmm6
2155; AVX2-NEXT:    vpsrad $31, %xmm6, %xmm6
2156; AVX2-NEXT:    vpmovsxdq %xmm6, %ymm6
2157; AVX2-NEXT:    vmaskmovpd 192(%rsi), %ymm6, %ymm10
2158; AVX2-NEXT:    vblendvpd %ymm6, %ymm10, %ymm7, %ymm6
2159; AVX2-NEXT:    vpshufd {{.*#+}} xmm7 = xmm2[3,1,2,3]
2160; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
2161; AVX2-NEXT:    vpslld $31, %xmm7, %xmm7
2162; AVX2-NEXT:    vpsrad $31, %xmm7, %xmm7
2163; AVX2-NEXT:    vpmovsxdq %xmm7, %ymm7
2164; AVX2-NEXT:    vmaskmovpd 224(%rsi), %ymm7, %ymm10
2165; AVX2-NEXT:    vblendvpd %ymm7, %ymm10, %ymm8, %ymm7
2166; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
2167; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
2168; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
2169; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
2170; AVX2-NEXT:    vmaskmovpd (%rsi), %ymm0, %ymm8
2171; AVX2-NEXT:    vblendvpd %ymm0, %ymm8, %ymm1, %ymm0
2172; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
2173; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
2174; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
2175; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
2176; AVX2-NEXT:    vmaskmovpd 128(%rsi), %ymm1, %ymm2
2177; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
2178; AVX2-NEXT:    vmovapd %ymm1, 128(%rdi)
2179; AVX2-NEXT:    vmovapd %ymm0, (%rdi)
2180; AVX2-NEXT:    vmovapd %ymm7, 224(%rdi)
2181; AVX2-NEXT:    vmovapd %ymm6, 192(%rdi)
2182; AVX2-NEXT:    vmovapd %ymm3, 160(%rdi)
2183; AVX2-NEXT:    vmovapd %ymm4, 96(%rdi)
2184; AVX2-NEXT:    vmovapd %ymm11, 64(%rdi)
2185; AVX2-NEXT:    vmovapd %ymm9, 32(%rdi)
2186; AVX2-NEXT:    movq %rdi, %rax
2187; AVX2-NEXT:    movq %rbp, %rsp
2188; AVX2-NEXT:    popq %rbp
2189; AVX2-NEXT:    vzeroupper
2190; AVX2-NEXT:    retq
2191;
2192; AVX512F-LABEL: test_load_32f64:
2193; AVX512F:       ## BB#0:
2194; AVX512F-NEXT:    vextractf128 $1, %ymm0, %xmm5
2195; AVX512F-NEXT:    vpmovsxbd %xmm5, %zmm5
2196; AVX512F-NEXT:    vpslld $31, %zmm5, %zmm5
2197; AVX512F-NEXT:    vptestmd %zmm5, %zmm5, %k1
2198; AVX512F-NEXT:    vmovupd 128(%rdi), %zmm3 {%k1}
2199; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
2200; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
2201; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k2
2202; AVX512F-NEXT:    vmovupd (%rdi), %zmm1 {%k2}
2203; AVX512F-NEXT:    kshiftrw $8, %k1, %k1
2204; AVX512F-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
2205; AVX512F-NEXT:    kshiftrw $8, %k2, %k1
2206; AVX512F-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
2207; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
2208; AVX512F-NEXT:    vmovaps %zmm2, %zmm1
2209; AVX512F-NEXT:    vmovaps %zmm3, %zmm2
2210; AVX512F-NEXT:    vmovaps %zmm4, %zmm3
2211; AVX512F-NEXT:    retq
2212;
2213; SKX-LABEL: test_load_32f64:
2214; SKX:       ## BB#0:
2215; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
2216; SKX-NEXT:    vpmovb2m %ymm0, %k1
2217; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
2218; SKX-NEXT:    kshiftrd $16, %k1, %k2
2219; SKX-NEXT:    vmovupd 128(%rdi), %zmm3 {%k2}
2220; SKX-NEXT:    kshiftrw $8, %k1, %k1
2221; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
2222; SKX-NEXT:    kshiftrw $8, %k2, %k1
2223; SKX-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
2224; SKX-NEXT:    vmovaps %zmm1, %zmm0
2225; SKX-NEXT:    vmovaps %zmm2, %zmm1
2226; SKX-NEXT:    vmovaps %zmm3, %zmm2
2227; SKX-NEXT:    vmovaps %zmm4, %zmm3
2228; SKX-NEXT:    retq
2229  %res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
2230  ret <32 x double> %res
2231}
2232
2233declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
2234
2235define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
2236; AVX-LABEL: test_mask_load_16xi8:
2237; AVX:       ## BB#0:
2238; AVX-NEXT:    vpextrb $0, %xmm0, %eax
2239; AVX-NEXT:    ## implicit-def: %XMM1
2240; AVX-NEXT:    testb $1, %al
2241; AVX-NEXT:    je LBB50_2
2242; AVX-NEXT:  ## BB#1: ## %cond.load
2243; AVX-NEXT:    movzbl (%rdi), %eax
2244; AVX-NEXT:    vmovd %eax, %xmm1
2245; AVX-NEXT:  LBB50_2: ## %else
2246; AVX-NEXT:    vpextrb $1, %xmm0, %eax
2247; AVX-NEXT:    testb $1, %al
2248; AVX-NEXT:    je LBB50_4
2249; AVX-NEXT:  ## BB#3: ## %cond.load1
2250; AVX-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm1
2251; AVX-NEXT:  LBB50_4: ## %else2
2252; AVX-NEXT:    vpextrb $2, %xmm0, %eax
2253; AVX-NEXT:    testb $1, %al
2254; AVX-NEXT:    je LBB50_6
2255; AVX-NEXT:  ## BB#5: ## %cond.load4
2256; AVX-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm1
2257; AVX-NEXT:  LBB50_6: ## %else5
2258; AVX-NEXT:    vpextrb $3, %xmm0, %eax
2259; AVX-NEXT:    testb $1, %al
2260; AVX-NEXT:    je LBB50_8
2261; AVX-NEXT:  ## BB#7: ## %cond.load7
2262; AVX-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm1
2263; AVX-NEXT:  LBB50_8: ## %else8
2264; AVX-NEXT:    vpextrb $4, %xmm0, %eax
2265; AVX-NEXT:    testb $1, %al
2266; AVX-NEXT:    je LBB50_10
2267; AVX-NEXT:  ## BB#9: ## %cond.load10
2268; AVX-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm1
2269; AVX-NEXT:  LBB50_10: ## %else11
2270; AVX-NEXT:    vpextrb $5, %xmm0, %eax
2271; AVX-NEXT:    testb $1, %al
2272; AVX-NEXT:    je LBB50_12
2273; AVX-NEXT:  ## BB#11: ## %cond.load13
2274; AVX-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm1
2275; AVX-NEXT:  LBB50_12: ## %else14
2276; AVX-NEXT:    vpextrb $6, %xmm0, %eax
2277; AVX-NEXT:    testb $1, %al
2278; AVX-NEXT:    je LBB50_14
2279; AVX-NEXT:  ## BB#13: ## %cond.load16
2280; AVX-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm1
2281; AVX-NEXT:  LBB50_14: ## %else17
2282; AVX-NEXT:    vpextrb $7, %xmm0, %eax
2283; AVX-NEXT:    testb $1, %al
2284; AVX-NEXT:    je LBB50_16
2285; AVX-NEXT:  ## BB#15: ## %cond.load19
2286; AVX-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm1
2287; AVX-NEXT:  LBB50_16: ## %else20
2288; AVX-NEXT:    vpextrb $8, %xmm0, %eax
2289; AVX-NEXT:    testb $1, %al
2290; AVX-NEXT:    je LBB50_18
2291; AVX-NEXT:  ## BB#17: ## %cond.load22
2292; AVX-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm1
2293; AVX-NEXT:  LBB50_18: ## %else23
2294; AVX-NEXT:    vpextrb $9, %xmm0, %eax
2295; AVX-NEXT:    testb $1, %al
2296; AVX-NEXT:    je LBB50_20
2297; AVX-NEXT:  ## BB#19: ## %cond.load25
2298; AVX-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm1
2299; AVX-NEXT:  LBB50_20: ## %else26
2300; AVX-NEXT:    vpextrb $10, %xmm0, %eax
2301; AVX-NEXT:    testb $1, %al
2302; AVX-NEXT:    je LBB50_22
2303; AVX-NEXT:  ## BB#21: ## %cond.load28
2304; AVX-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm1
2305; AVX-NEXT:  LBB50_22: ## %else29
2306; AVX-NEXT:    vpextrb $11, %xmm0, %eax
2307; AVX-NEXT:    testb $1, %al
2308; AVX-NEXT:    je LBB50_24
2309; AVX-NEXT:  ## BB#23: ## %cond.load31
2310; AVX-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm1
2311; AVX-NEXT:  LBB50_24: ## %else32
2312; AVX-NEXT:    vpextrb $12, %xmm0, %eax
2313; AVX-NEXT:    testb $1, %al
2314; AVX-NEXT:    je LBB50_26
2315; AVX-NEXT:  ## BB#25: ## %cond.load34
2316; AVX-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm1
2317; AVX-NEXT:  LBB50_26: ## %else35
2318; AVX-NEXT:    vpextrb $13, %xmm0, %eax
2319; AVX-NEXT:    testb $1, %al
2320; AVX-NEXT:    je LBB50_28
2321; AVX-NEXT:  ## BB#27: ## %cond.load37
2322; AVX-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm1
2323; AVX-NEXT:  LBB50_28: ## %else38
2324; AVX-NEXT:    vpextrb $14, %xmm0, %eax
2325; AVX-NEXT:    testb $1, %al
2326; AVX-NEXT:    je LBB50_30
2327; AVX-NEXT:  ## BB#29: ## %cond.load40
2328; AVX-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm1
2329; AVX-NEXT:  LBB50_30: ## %else41
2330; AVX-NEXT:    vpextrb $15, %xmm0, %eax
2331; AVX-NEXT:    testb $1, %al
2332; AVX-NEXT:    je LBB50_32
2333; AVX-NEXT:  ## BB#31: ## %cond.load43
2334; AVX-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm1
2335; AVX-NEXT:  LBB50_32: ## %else44
2336; AVX-NEXT:    vpsllw $7, %xmm0, %xmm0
2337; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
2338; AVX-NEXT:    vpblendvb %xmm0, %xmm1, %xmm0, %xmm0
2339; AVX-NEXT:    retq
2340;
2341; AVX512F-LABEL: test_mask_load_16xi8:
2342; AVX512F:       ## BB#0:
2343; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
2344; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
2345; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
2346; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
2347; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2348; AVX512F-NEXT:    kmovw %k0, %eax
2349; AVX512F-NEXT:    ## implicit-def: %XMM0
2350; AVX512F-NEXT:    testb %al, %al
2351; AVX512F-NEXT:    je LBB50_2
2352; AVX512F-NEXT:  ## BB#1: ## %cond.load
2353; AVX512F-NEXT:    movzbl (%rdi), %eax
2354; AVX512F-NEXT:    vmovd %eax, %xmm0
2355; AVX512F-NEXT:  LBB50_2: ## %else
2356; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
2357; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2358; AVX512F-NEXT:    kmovw %k0, %eax
2359; AVX512F-NEXT:    testb %al, %al
2360; AVX512F-NEXT:    je LBB50_4
2361; AVX512F-NEXT:  ## BB#3: ## %cond.load1
2362; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm0
2363; AVX512F-NEXT:  LBB50_4: ## %else2
2364; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
2365; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2366; AVX512F-NEXT:    kmovw %k0, %eax
2367; AVX512F-NEXT:    testb %al, %al
2368; AVX512F-NEXT:    je LBB50_6
2369; AVX512F-NEXT:  ## BB#5: ## %cond.load4
2370; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm0
2371; AVX512F-NEXT:  LBB50_6: ## %else5
2372; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
2373; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2374; AVX512F-NEXT:    kmovw %k0, %eax
2375; AVX512F-NEXT:    testb %al, %al
2376; AVX512F-NEXT:    je LBB50_8
2377; AVX512F-NEXT:  ## BB#7: ## %cond.load7
2378; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm0
2379; AVX512F-NEXT:  LBB50_8: ## %else8
2380; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
2381; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2382; AVX512F-NEXT:    kmovw %k0, %eax
2383; AVX512F-NEXT:    testb %al, %al
2384; AVX512F-NEXT:    je LBB50_10
2385; AVX512F-NEXT:  ## BB#9: ## %cond.load10
2386; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm0
2387; AVX512F-NEXT:  LBB50_10: ## %else11
2388; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
2389; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2390; AVX512F-NEXT:    kmovw %k0, %eax
2391; AVX512F-NEXT:    testb %al, %al
2392; AVX512F-NEXT:    je LBB50_12
2393; AVX512F-NEXT:  ## BB#11: ## %cond.load13
2394; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm0
2395; AVX512F-NEXT:  LBB50_12: ## %else14
2396; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
2397; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2398; AVX512F-NEXT:    kmovw %k0, %eax
2399; AVX512F-NEXT:    testb %al, %al
2400; AVX512F-NEXT:    je LBB50_14
2401; AVX512F-NEXT:  ## BB#13: ## %cond.load16
2402; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm0
2403; AVX512F-NEXT:  LBB50_14: ## %else17
2404; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
2405; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2406; AVX512F-NEXT:    kmovw %k0, %eax
2407; AVX512F-NEXT:    testb %al, %al
2408; AVX512F-NEXT:    je LBB50_16
2409; AVX512F-NEXT:  ## BB#15: ## %cond.load19
2410; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm0
2411; AVX512F-NEXT:  LBB50_16: ## %else20
2412; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
2413; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2414; AVX512F-NEXT:    kmovw %k0, %eax
2415; AVX512F-NEXT:    testb %al, %al
2416; AVX512F-NEXT:    je LBB50_18
2417; AVX512F-NEXT:  ## BB#17: ## %cond.load22
2418; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm0
2419; AVX512F-NEXT:  LBB50_18: ## %else23
2420; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
2421; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2422; AVX512F-NEXT:    kmovw %k0, %eax
2423; AVX512F-NEXT:    testb %al, %al
2424; AVX512F-NEXT:    je LBB50_20
2425; AVX512F-NEXT:  ## BB#19: ## %cond.load25
2426; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm0
2427; AVX512F-NEXT:  LBB50_20: ## %else26
2428; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
2429; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2430; AVX512F-NEXT:    kmovw %k0, %eax
2431; AVX512F-NEXT:    testb %al, %al
2432; AVX512F-NEXT:    je LBB50_22
2433; AVX512F-NEXT:  ## BB#21: ## %cond.load28
2434; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm0
2435; AVX512F-NEXT:  LBB50_22: ## %else29
2436; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
2437; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2438; AVX512F-NEXT:    kmovw %k0, %eax
2439; AVX512F-NEXT:    testb %al, %al
2440; AVX512F-NEXT:    je LBB50_24
2441; AVX512F-NEXT:  ## BB#23: ## %cond.load31
2442; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm0
2443; AVX512F-NEXT:  LBB50_24: ## %else32
2444; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
2445; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2446; AVX512F-NEXT:    kmovw %k0, %eax
2447; AVX512F-NEXT:    testb %al, %al
2448; AVX512F-NEXT:    je LBB50_26
2449; AVX512F-NEXT:  ## BB#25: ## %cond.load34
2450; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm0
2451; AVX512F-NEXT:  LBB50_26: ## %else35
2452; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
2453; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2454; AVX512F-NEXT:    kmovw %k0, %eax
2455; AVX512F-NEXT:    testb %al, %al
2456; AVX512F-NEXT:    je LBB50_28
2457; AVX512F-NEXT:  ## BB#27: ## %cond.load37
2458; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm0
2459; AVX512F-NEXT:  LBB50_28: ## %else38
2460; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
2461; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2462; AVX512F-NEXT:    kmovw %k0, %eax
2463; AVX512F-NEXT:    testb %al, %al
2464; AVX512F-NEXT:    je LBB50_30
2465; AVX512F-NEXT:  ## BB#29: ## %cond.load40
2466; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm0
2467; AVX512F-NEXT:  LBB50_30: ## %else41
2468; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
2469; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
2470; AVX512F-NEXT:    kmovw %k0, %eax
2471; AVX512F-NEXT:    testb %al, %al
2472; AVX512F-NEXT:    je LBB50_32
2473; AVX512F-NEXT:  ## BB#31: ## %cond.load43
2474; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm0
2475; AVX512F-NEXT:  LBB50_32: ## %else44
2476; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
2477; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
2478; AVX512F-NEXT:    vpmovdb %zmm1, %xmm1
2479; AVX512F-NEXT:    vpblendvb %xmm1, %xmm0, %xmm0, %xmm0
2480; AVX512F-NEXT:    retq
2481;
2482; SKX-LABEL: test_mask_load_16xi8:
2483; SKX:       ## BB#0:
2484; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
2485; SKX-NEXT:    vpmovb2m %xmm0, %k1
2486; SKX-NEXT:    vmovdqu8 (%rdi), %xmm0 {%k1} {z}
2487; SKX-NEXT:    retq
2488  %res = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
2489  ret <16 x i8> %res
2490}
2491declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
2492
2493define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
2494; AVX1-LABEL: test_mask_load_32xi8:
2495; AVX1:       ## BB#0:
2496; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
2497; AVX1-NEXT:    ## implicit-def: %YMM1
2498; AVX1-NEXT:    testb $1, %al
2499; AVX1-NEXT:    je LBB51_2
2500; AVX1-NEXT:  ## BB#1: ## %cond.load
2501; AVX1-NEXT:    movzbl (%rdi), %eax
2502; AVX1-NEXT:    vmovd %eax, %xmm1
2503; AVX1-NEXT:  LBB51_2: ## %else
2504; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
2505; AVX1-NEXT:    testb $1, %al
2506; AVX1-NEXT:    je LBB51_4
2507; AVX1-NEXT:  ## BB#3: ## %cond.load1
2508; AVX1-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
2509; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2510; AVX1-NEXT:  LBB51_4: ## %else2
2511; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
2512; AVX1-NEXT:    testb $1, %al
2513; AVX1-NEXT:    je LBB51_6
2514; AVX1-NEXT:  ## BB#5: ## %cond.load4
2515; AVX1-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
2516; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2517; AVX1-NEXT:  LBB51_6: ## %else5
2518; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
2519; AVX1-NEXT:    testb $1, %al
2520; AVX1-NEXT:    je LBB51_8
2521; AVX1-NEXT:  ## BB#7: ## %cond.load7
2522; AVX1-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
2523; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2524; AVX1-NEXT:  LBB51_8: ## %else8
2525; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
2526; AVX1-NEXT:    testb $1, %al
2527; AVX1-NEXT:    je LBB51_10
2528; AVX1-NEXT:  ## BB#9: ## %cond.load10
2529; AVX1-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
2530; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2531; AVX1-NEXT:  LBB51_10: ## %else11
2532; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
2533; AVX1-NEXT:    testb $1, %al
2534; AVX1-NEXT:    je LBB51_12
2535; AVX1-NEXT:  ## BB#11: ## %cond.load13
2536; AVX1-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
2537; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2538; AVX1-NEXT:  LBB51_12: ## %else14
2539; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
2540; AVX1-NEXT:    testb $1, %al
2541; AVX1-NEXT:    je LBB51_14
2542; AVX1-NEXT:  ## BB#13: ## %cond.load16
2543; AVX1-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
2544; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2545; AVX1-NEXT:  LBB51_14: ## %else17
2546; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
2547; AVX1-NEXT:    testb $1, %al
2548; AVX1-NEXT:    je LBB51_16
2549; AVX1-NEXT:  ## BB#15: ## %cond.load19
2550; AVX1-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
2551; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2552; AVX1-NEXT:  LBB51_16: ## %else20
2553; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
2554; AVX1-NEXT:    testb $1, %al
2555; AVX1-NEXT:    je LBB51_18
2556; AVX1-NEXT:  ## BB#17: ## %cond.load22
2557; AVX1-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
2558; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2559; AVX1-NEXT:  LBB51_18: ## %else23
2560; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
2561; AVX1-NEXT:    testb $1, %al
2562; AVX1-NEXT:    je LBB51_20
2563; AVX1-NEXT:  ## BB#19: ## %cond.load25
2564; AVX1-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
2565; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2566; AVX1-NEXT:  LBB51_20: ## %else26
2567; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
2568; AVX1-NEXT:    testb $1, %al
2569; AVX1-NEXT:    je LBB51_22
2570; AVX1-NEXT:  ## BB#21: ## %cond.load28
2571; AVX1-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
2572; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2573; AVX1-NEXT:  LBB51_22: ## %else29
2574; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
2575; AVX1-NEXT:    testb $1, %al
2576; AVX1-NEXT:    je LBB51_24
2577; AVX1-NEXT:  ## BB#23: ## %cond.load31
2578; AVX1-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
2579; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2580; AVX1-NEXT:  LBB51_24: ## %else32
2581; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
2582; AVX1-NEXT:    testb $1, %al
2583; AVX1-NEXT:    je LBB51_26
2584; AVX1-NEXT:  ## BB#25: ## %cond.load34
2585; AVX1-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
2586; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2587; AVX1-NEXT:  LBB51_26: ## %else35
2588; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
2589; AVX1-NEXT:    testb $1, %al
2590; AVX1-NEXT:    je LBB51_28
2591; AVX1-NEXT:  ## BB#27: ## %cond.load37
2592; AVX1-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
2593; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2594; AVX1-NEXT:  LBB51_28: ## %else38
2595; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
2596; AVX1-NEXT:    testb $1, %al
2597; AVX1-NEXT:    je LBB51_30
2598; AVX1-NEXT:  ## BB#29: ## %cond.load40
2599; AVX1-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
2600; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2601; AVX1-NEXT:  LBB51_30: ## %else41
2602; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
2603; AVX1-NEXT:    testb $1, %al
2604; AVX1-NEXT:    je LBB51_32
2605; AVX1-NEXT:  ## BB#31: ## %cond.load43
2606; AVX1-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
2607; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2608; AVX1-NEXT:  LBB51_32: ## %else44
2609; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
2610; AVX1-NEXT:    vpextrb $0, %xmm2, %eax
2611; AVX1-NEXT:    testb $1, %al
2612; AVX1-NEXT:    je LBB51_34
2613; AVX1-NEXT:  ## BB#33: ## %cond.load46
2614; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2615; AVX1-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
2616; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2617; AVX1-NEXT:  LBB51_34: ## %else47
2618; AVX1-NEXT:    vpextrb $1, %xmm2, %eax
2619; AVX1-NEXT:    testb $1, %al
2620; AVX1-NEXT:    je LBB51_36
2621; AVX1-NEXT:  ## BB#35: ## %cond.load49
2622; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2623; AVX1-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
2624; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2625; AVX1-NEXT:  LBB51_36: ## %else50
2626; AVX1-NEXT:    vpextrb $2, %xmm2, %eax
2627; AVX1-NEXT:    testb $1, %al
2628; AVX1-NEXT:    je LBB51_38
2629; AVX1-NEXT:  ## BB#37: ## %cond.load52
2630; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2631; AVX1-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
2632; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2633; AVX1-NEXT:  LBB51_38: ## %else53
2634; AVX1-NEXT:    vpextrb $3, %xmm2, %eax
2635; AVX1-NEXT:    testb $1, %al
2636; AVX1-NEXT:    je LBB51_40
2637; AVX1-NEXT:  ## BB#39: ## %cond.load55
2638; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2639; AVX1-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
2640; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2641; AVX1-NEXT:  LBB51_40: ## %else56
2642; AVX1-NEXT:    vpextrb $4, %xmm2, %eax
2643; AVX1-NEXT:    testb $1, %al
2644; AVX1-NEXT:    je LBB51_42
2645; AVX1-NEXT:  ## BB#41: ## %cond.load58
2646; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2647; AVX1-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
2648; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2649; AVX1-NEXT:  LBB51_42: ## %else59
2650; AVX1-NEXT:    vpextrb $5, %xmm2, %eax
2651; AVX1-NEXT:    testb $1, %al
2652; AVX1-NEXT:    je LBB51_44
2653; AVX1-NEXT:  ## BB#43: ## %cond.load61
2654; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2655; AVX1-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
2656; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2657; AVX1-NEXT:  LBB51_44: ## %else62
2658; AVX1-NEXT:    vpextrb $6, %xmm2, %eax
2659; AVX1-NEXT:    testb $1, %al
2660; AVX1-NEXT:    je LBB51_46
2661; AVX1-NEXT:  ## BB#45: ## %cond.load64
2662; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2663; AVX1-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
2664; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2665; AVX1-NEXT:  LBB51_46: ## %else65
2666; AVX1-NEXT:    vpextrb $7, %xmm2, %eax
2667; AVX1-NEXT:    testb $1, %al
2668; AVX1-NEXT:    je LBB51_48
2669; AVX1-NEXT:  ## BB#47: ## %cond.load67
2670; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2671; AVX1-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
2672; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2673; AVX1-NEXT:  LBB51_48: ## %else68
2674; AVX1-NEXT:    vpextrb $8, %xmm2, %eax
2675; AVX1-NEXT:    testb $1, %al
2676; AVX1-NEXT:    je LBB51_50
2677; AVX1-NEXT:  ## BB#49: ## %cond.load70
2678; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2679; AVX1-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
2680; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2681; AVX1-NEXT:  LBB51_50: ## %else71
2682; AVX1-NEXT:    vpextrb $9, %xmm2, %eax
2683; AVX1-NEXT:    testb $1, %al
2684; AVX1-NEXT:    je LBB51_52
2685; AVX1-NEXT:  ## BB#51: ## %cond.load73
2686; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2687; AVX1-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
2688; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2689; AVX1-NEXT:  LBB51_52: ## %else74
2690; AVX1-NEXT:    vpextrb $10, %xmm2, %eax
2691; AVX1-NEXT:    testb $1, %al
2692; AVX1-NEXT:    je LBB51_54
2693; AVX1-NEXT:  ## BB#53: ## %cond.load76
2694; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2695; AVX1-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
2696; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2697; AVX1-NEXT:  LBB51_54: ## %else77
2698; AVX1-NEXT:    vpextrb $11, %xmm2, %eax
2699; AVX1-NEXT:    testb $1, %al
2700; AVX1-NEXT:    je LBB51_56
2701; AVX1-NEXT:  ## BB#55: ## %cond.load79
2702; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2703; AVX1-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
2704; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2705; AVX1-NEXT:  LBB51_56: ## %else80
2706; AVX1-NEXT:    vpextrb $12, %xmm2, %eax
2707; AVX1-NEXT:    testb $1, %al
2708; AVX1-NEXT:    je LBB51_58
2709; AVX1-NEXT:  ## BB#57: ## %cond.load82
2710; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2711; AVX1-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
2712; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2713; AVX1-NEXT:  LBB51_58: ## %else83
2714; AVX1-NEXT:    vpextrb $13, %xmm2, %eax
2715; AVX1-NEXT:    testb $1, %al
2716; AVX1-NEXT:    je LBB51_60
2717; AVX1-NEXT:  ## BB#59: ## %cond.load85
2718; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2719; AVX1-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
2720; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2721; AVX1-NEXT:  LBB51_60: ## %else86
2722; AVX1-NEXT:    vpextrb $14, %xmm2, %eax
2723; AVX1-NEXT:    testb $1, %al
2724; AVX1-NEXT:    je LBB51_62
2725; AVX1-NEXT:  ## BB#61: ## %cond.load88
2726; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2727; AVX1-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
2728; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2729; AVX1-NEXT:  LBB51_62: ## %else89
2730; AVX1-NEXT:    vpextrb $15, %xmm2, %eax
2731; AVX1-NEXT:    testb $1, %al
2732; AVX1-NEXT:    je LBB51_64
2733; AVX1-NEXT:  ## BB#63: ## %cond.load91
2734; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
2735; AVX1-NEXT:    vpinsrb $15, 31(%rdi), %xmm3, %xmm3
2736; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
2737; AVX1-NEXT:  LBB51_64: ## %else92
2738; AVX1-NEXT:    vpsllw $7, %xmm2, %xmm2
2739; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
2740; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
2741; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
2742; AVX1-NEXT:    vpcmpgtb %xmm2, %xmm4, %xmm2
2743; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
2744; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
2745; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm4, %xmm0
2746; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
2747; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
2748; AVX1-NEXT:    retq
2749;
2750; AVX2-LABEL: test_mask_load_32xi8:
2751; AVX2:       ## BB#0:
2752; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
2753; AVX2-NEXT:    ## implicit-def: %YMM1
2754; AVX2-NEXT:    testb $1, %al
2755; AVX2-NEXT:    je LBB51_2
2756; AVX2-NEXT:  ## BB#1: ## %cond.load
2757; AVX2-NEXT:    movzbl (%rdi), %eax
2758; AVX2-NEXT:    vmovd %eax, %xmm1
2759; AVX2-NEXT:  LBB51_2: ## %else
2760; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
2761; AVX2-NEXT:    testb $1, %al
2762; AVX2-NEXT:    je LBB51_4
2763; AVX2-NEXT:  ## BB#3: ## %cond.load1
2764; AVX2-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
2765; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2766; AVX2-NEXT:  LBB51_4: ## %else2
2767; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
2768; AVX2-NEXT:    testb $1, %al
2769; AVX2-NEXT:    je LBB51_6
2770; AVX2-NEXT:  ## BB#5: ## %cond.load4
2771; AVX2-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
2772; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2773; AVX2-NEXT:  LBB51_6: ## %else5
2774; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
2775; AVX2-NEXT:    testb $1, %al
2776; AVX2-NEXT:    je LBB51_8
2777; AVX2-NEXT:  ## BB#7: ## %cond.load7
2778; AVX2-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
2779; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2780; AVX2-NEXT:  LBB51_8: ## %else8
2781; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
2782; AVX2-NEXT:    testb $1, %al
2783; AVX2-NEXT:    je LBB51_10
2784; AVX2-NEXT:  ## BB#9: ## %cond.load10
2785; AVX2-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
2786; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2787; AVX2-NEXT:  LBB51_10: ## %else11
2788; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
2789; AVX2-NEXT:    testb $1, %al
2790; AVX2-NEXT:    je LBB51_12
2791; AVX2-NEXT:  ## BB#11: ## %cond.load13
2792; AVX2-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
2793; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2794; AVX2-NEXT:  LBB51_12: ## %else14
2795; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
2796; AVX2-NEXT:    testb $1, %al
2797; AVX2-NEXT:    je LBB51_14
2798; AVX2-NEXT:  ## BB#13: ## %cond.load16
2799; AVX2-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
2800; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2801; AVX2-NEXT:  LBB51_14: ## %else17
2802; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
2803; AVX2-NEXT:    testb $1, %al
2804; AVX2-NEXT:    je LBB51_16
2805; AVX2-NEXT:  ## BB#15: ## %cond.load19
2806; AVX2-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
2807; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2808; AVX2-NEXT:  LBB51_16: ## %else20
2809; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
2810; AVX2-NEXT:    testb $1, %al
2811; AVX2-NEXT:    je LBB51_18
2812; AVX2-NEXT:  ## BB#17: ## %cond.load22
2813; AVX2-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
2814; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2815; AVX2-NEXT:  LBB51_18: ## %else23
2816; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
2817; AVX2-NEXT:    testb $1, %al
2818; AVX2-NEXT:    je LBB51_20
2819; AVX2-NEXT:  ## BB#19: ## %cond.load25
2820; AVX2-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
2821; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2822; AVX2-NEXT:  LBB51_20: ## %else26
2823; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
2824; AVX2-NEXT:    testb $1, %al
2825; AVX2-NEXT:    je LBB51_22
2826; AVX2-NEXT:  ## BB#21: ## %cond.load28
2827; AVX2-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
2828; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2829; AVX2-NEXT:  LBB51_22: ## %else29
2830; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
2831; AVX2-NEXT:    testb $1, %al
2832; AVX2-NEXT:    je LBB51_24
2833; AVX2-NEXT:  ## BB#23: ## %cond.load31
2834; AVX2-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
2835; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2836; AVX2-NEXT:  LBB51_24: ## %else32
2837; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
2838; AVX2-NEXT:    testb $1, %al
2839; AVX2-NEXT:    je LBB51_26
2840; AVX2-NEXT:  ## BB#25: ## %cond.load34
2841; AVX2-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
2842; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2843; AVX2-NEXT:  LBB51_26: ## %else35
2844; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
2845; AVX2-NEXT:    testb $1, %al
2846; AVX2-NEXT:    je LBB51_28
2847; AVX2-NEXT:  ## BB#27: ## %cond.load37
2848; AVX2-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
2849; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2850; AVX2-NEXT:  LBB51_28: ## %else38
2851; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
2852; AVX2-NEXT:    testb $1, %al
2853; AVX2-NEXT:    je LBB51_30
2854; AVX2-NEXT:  ## BB#29: ## %cond.load40
2855; AVX2-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
2856; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2857; AVX2-NEXT:  LBB51_30: ## %else41
2858; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
2859; AVX2-NEXT:    testb $1, %al
2860; AVX2-NEXT:    je LBB51_32
2861; AVX2-NEXT:  ## BB#31: ## %cond.load43
2862; AVX2-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
2863; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
2864; AVX2-NEXT:  LBB51_32: ## %else44
2865; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
2866; AVX2-NEXT:    vpextrb $0, %xmm2, %eax
2867; AVX2-NEXT:    testb $1, %al
2868; AVX2-NEXT:    je LBB51_34
2869; AVX2-NEXT:  ## BB#33: ## %cond.load46
2870; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2871; AVX2-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
2872; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2873; AVX2-NEXT:  LBB51_34: ## %else47
2874; AVX2-NEXT:    vpextrb $1, %xmm2, %eax
2875; AVX2-NEXT:    testb $1, %al
2876; AVX2-NEXT:    je LBB51_36
2877; AVX2-NEXT:  ## BB#35: ## %cond.load49
2878; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2879; AVX2-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
2880; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2881; AVX2-NEXT:  LBB51_36: ## %else50
2882; AVX2-NEXT:    vpextrb $2, %xmm2, %eax
2883; AVX2-NEXT:    testb $1, %al
2884; AVX2-NEXT:    je LBB51_38
2885; AVX2-NEXT:  ## BB#37: ## %cond.load52
2886; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2887; AVX2-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
2888; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2889; AVX2-NEXT:  LBB51_38: ## %else53
2890; AVX2-NEXT:    vpextrb $3, %xmm2, %eax
2891; AVX2-NEXT:    testb $1, %al
2892; AVX2-NEXT:    je LBB51_40
2893; AVX2-NEXT:  ## BB#39: ## %cond.load55
2894; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2895; AVX2-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
2896; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2897; AVX2-NEXT:  LBB51_40: ## %else56
2898; AVX2-NEXT:    vpextrb $4, %xmm2, %eax
2899; AVX2-NEXT:    testb $1, %al
2900; AVX2-NEXT:    je LBB51_42
2901; AVX2-NEXT:  ## BB#41: ## %cond.load58
2902; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2903; AVX2-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
2904; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2905; AVX2-NEXT:  LBB51_42: ## %else59
2906; AVX2-NEXT:    vpextrb $5, %xmm2, %eax
2907; AVX2-NEXT:    testb $1, %al
2908; AVX2-NEXT:    je LBB51_44
2909; AVX2-NEXT:  ## BB#43: ## %cond.load61
2910; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2911; AVX2-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
2912; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2913; AVX2-NEXT:  LBB51_44: ## %else62
2914; AVX2-NEXT:    vpextrb $6, %xmm2, %eax
2915; AVX2-NEXT:    testb $1, %al
2916; AVX2-NEXT:    je LBB51_46
2917; AVX2-NEXT:  ## BB#45: ## %cond.load64
2918; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2919; AVX2-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
2920; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2921; AVX2-NEXT:  LBB51_46: ## %else65
2922; AVX2-NEXT:    vpextrb $7, %xmm2, %eax
2923; AVX2-NEXT:    testb $1, %al
2924; AVX2-NEXT:    je LBB51_48
2925; AVX2-NEXT:  ## BB#47: ## %cond.load67
2926; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2927; AVX2-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
2928; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2929; AVX2-NEXT:  LBB51_48: ## %else68
2930; AVX2-NEXT:    vpextrb $8, %xmm2, %eax
2931; AVX2-NEXT:    testb $1, %al
2932; AVX2-NEXT:    je LBB51_50
2933; AVX2-NEXT:  ## BB#49: ## %cond.load70
2934; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2935; AVX2-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
2936; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2937; AVX2-NEXT:  LBB51_50: ## %else71
2938; AVX2-NEXT:    vpextrb $9, %xmm2, %eax
2939; AVX2-NEXT:    testb $1, %al
2940; AVX2-NEXT:    je LBB51_52
2941; AVX2-NEXT:  ## BB#51: ## %cond.load73
2942; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2943; AVX2-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
2944; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2945; AVX2-NEXT:  LBB51_52: ## %else74
2946; AVX2-NEXT:    vpextrb $10, %xmm2, %eax
2947; AVX2-NEXT:    testb $1, %al
2948; AVX2-NEXT:    je LBB51_54
2949; AVX2-NEXT:  ## BB#53: ## %cond.load76
2950; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2951; AVX2-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
2952; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2953; AVX2-NEXT:  LBB51_54: ## %else77
2954; AVX2-NEXT:    vpextrb $11, %xmm2, %eax
2955; AVX2-NEXT:    testb $1, %al
2956; AVX2-NEXT:    je LBB51_56
2957; AVX2-NEXT:  ## BB#55: ## %cond.load79
2958; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2959; AVX2-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
2960; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2961; AVX2-NEXT:  LBB51_56: ## %else80
2962; AVX2-NEXT:    vpextrb $12, %xmm2, %eax
2963; AVX2-NEXT:    testb $1, %al
2964; AVX2-NEXT:    je LBB51_58
2965; AVX2-NEXT:  ## BB#57: ## %cond.load82
2966; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2967; AVX2-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
2968; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2969; AVX2-NEXT:  LBB51_58: ## %else83
2970; AVX2-NEXT:    vpextrb $13, %xmm2, %eax
2971; AVX2-NEXT:    testb $1, %al
2972; AVX2-NEXT:    je LBB51_60
2973; AVX2-NEXT:  ## BB#59: ## %cond.load85
2974; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2975; AVX2-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
2976; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2977; AVX2-NEXT:  LBB51_60: ## %else86
2978; AVX2-NEXT:    vpextrb $14, %xmm2, %eax
2979; AVX2-NEXT:    testb $1, %al
2980; AVX2-NEXT:    je LBB51_62
2981; AVX2-NEXT:  ## BB#61: ## %cond.load88
2982; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
2983; AVX2-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
2984; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
2985; AVX2-NEXT:  LBB51_62: ## %else89
2986; AVX2-NEXT:    vpextrb $15, %xmm2, %eax
2987; AVX2-NEXT:    testb $1, %al
2988; AVX2-NEXT:    je LBB51_64
2989; AVX2-NEXT:  ## BB#63: ## %cond.load91
2990; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
2991; AVX2-NEXT:    vpinsrb $15, 31(%rdi), %xmm2, %xmm2
2992; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
2993; AVX2-NEXT:  LBB51_64: ## %else92
2994; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
2995; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
2996; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
2997; AVX2-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
2998; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
2999; AVX2-NEXT:    retq
3000;
3001; AVX512F-LABEL: test_mask_load_32xi8:
3002; AVX512F:       ## BB#0:
3003; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
3004; AVX512F-NEXT:    ## implicit-def: %YMM1
3005; AVX512F-NEXT:    testb $1, %al
3006; AVX512F-NEXT:    je LBB51_2
3007; AVX512F-NEXT:  ## BB#1: ## %cond.load
3008; AVX512F-NEXT:    movzbl (%rdi), %eax
3009; AVX512F-NEXT:    vmovd %eax, %xmm1
3010; AVX512F-NEXT:  LBB51_2: ## %else
3011; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
3012; AVX512F-NEXT:    testb $1, %al
3013; AVX512F-NEXT:    je LBB51_4
3014; AVX512F-NEXT:  ## BB#3: ## %cond.load1
3015; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm1, %xmm2
3016; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3017; AVX512F-NEXT:  LBB51_4: ## %else2
3018; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
3019; AVX512F-NEXT:    testb $1, %al
3020; AVX512F-NEXT:    je LBB51_6
3021; AVX512F-NEXT:  ## BB#5: ## %cond.load4
3022; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm1, %xmm2
3023; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3024; AVX512F-NEXT:  LBB51_6: ## %else5
3025; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
3026; AVX512F-NEXT:    testb $1, %al
3027; AVX512F-NEXT:    je LBB51_8
3028; AVX512F-NEXT:  ## BB#7: ## %cond.load7
3029; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm1, %xmm2
3030; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3031; AVX512F-NEXT:  LBB51_8: ## %else8
3032; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
3033; AVX512F-NEXT:    testb $1, %al
3034; AVX512F-NEXT:    je LBB51_10
3035; AVX512F-NEXT:  ## BB#9: ## %cond.load10
3036; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm1, %xmm2
3037; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3038; AVX512F-NEXT:  LBB51_10: ## %else11
3039; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
3040; AVX512F-NEXT:    testb $1, %al
3041; AVX512F-NEXT:    je LBB51_12
3042; AVX512F-NEXT:  ## BB#11: ## %cond.load13
3043; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm1, %xmm2
3044; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3045; AVX512F-NEXT:  LBB51_12: ## %else14
3046; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
3047; AVX512F-NEXT:    testb $1, %al
3048; AVX512F-NEXT:    je LBB51_14
3049; AVX512F-NEXT:  ## BB#13: ## %cond.load16
3050; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm1, %xmm2
3051; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3052; AVX512F-NEXT:  LBB51_14: ## %else17
3053; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
3054; AVX512F-NEXT:    testb $1, %al
3055; AVX512F-NEXT:    je LBB51_16
3056; AVX512F-NEXT:  ## BB#15: ## %cond.load19
3057; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm1, %xmm2
3058; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3059; AVX512F-NEXT:  LBB51_16: ## %else20
3060; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
3061; AVX512F-NEXT:    testb $1, %al
3062; AVX512F-NEXT:    je LBB51_18
3063; AVX512F-NEXT:  ## BB#17: ## %cond.load22
3064; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm1, %xmm2
3065; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3066; AVX512F-NEXT:  LBB51_18: ## %else23
3067; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
3068; AVX512F-NEXT:    testb $1, %al
3069; AVX512F-NEXT:    je LBB51_20
3070; AVX512F-NEXT:  ## BB#19: ## %cond.load25
3071; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm1, %xmm2
3072; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3073; AVX512F-NEXT:  LBB51_20: ## %else26
3074; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
3075; AVX512F-NEXT:    testb $1, %al
3076; AVX512F-NEXT:    je LBB51_22
3077; AVX512F-NEXT:  ## BB#21: ## %cond.load28
3078; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm1, %xmm2
3079; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3080; AVX512F-NEXT:  LBB51_22: ## %else29
3081; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
3082; AVX512F-NEXT:    testb $1, %al
3083; AVX512F-NEXT:    je LBB51_24
3084; AVX512F-NEXT:  ## BB#23: ## %cond.load31
3085; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm1, %xmm2
3086; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3087; AVX512F-NEXT:  LBB51_24: ## %else32
3088; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
3089; AVX512F-NEXT:    testb $1, %al
3090; AVX512F-NEXT:    je LBB51_26
3091; AVX512F-NEXT:  ## BB#25: ## %cond.load34
3092; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm1, %xmm2
3093; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3094; AVX512F-NEXT:  LBB51_26: ## %else35
3095; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
3096; AVX512F-NEXT:    testb $1, %al
3097; AVX512F-NEXT:    je LBB51_28
3098; AVX512F-NEXT:  ## BB#27: ## %cond.load37
3099; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm1, %xmm2
3100; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3101; AVX512F-NEXT:  LBB51_28: ## %else38
3102; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
3103; AVX512F-NEXT:    testb $1, %al
3104; AVX512F-NEXT:    je LBB51_30
3105; AVX512F-NEXT:  ## BB#29: ## %cond.load40
3106; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm1, %xmm2
3107; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3108; AVX512F-NEXT:  LBB51_30: ## %else41
3109; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
3110; AVX512F-NEXT:    testb $1, %al
3111; AVX512F-NEXT:    je LBB51_32
3112; AVX512F-NEXT:  ## BB#31: ## %cond.load43
3113; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm1, %xmm2
3114; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
3115; AVX512F-NEXT:  LBB51_32: ## %else44
3116; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
3117; AVX512F-NEXT:    vpextrb $0, %xmm2, %eax
3118; AVX512F-NEXT:    testb $1, %al
3119; AVX512F-NEXT:    je LBB51_34
3120; AVX512F-NEXT:  ## BB#33: ## %cond.load46
3121; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3122; AVX512F-NEXT:    vpinsrb $0, 16(%rdi), %xmm3, %xmm3
3123; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3124; AVX512F-NEXT:  LBB51_34: ## %else47
3125; AVX512F-NEXT:    vpextrb $1, %xmm2, %eax
3126; AVX512F-NEXT:    testb $1, %al
3127; AVX512F-NEXT:    je LBB51_36
3128; AVX512F-NEXT:  ## BB#35: ## %cond.load49
3129; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3130; AVX512F-NEXT:    vpinsrb $1, 17(%rdi), %xmm3, %xmm3
3131; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3132; AVX512F-NEXT:  LBB51_36: ## %else50
3133; AVX512F-NEXT:    vpextrb $2, %xmm2, %eax
3134; AVX512F-NEXT:    testb $1, %al
3135; AVX512F-NEXT:    je LBB51_38
3136; AVX512F-NEXT:  ## BB#37: ## %cond.load52
3137; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3138; AVX512F-NEXT:    vpinsrb $2, 18(%rdi), %xmm3, %xmm3
3139; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3140; AVX512F-NEXT:  LBB51_38: ## %else53
3141; AVX512F-NEXT:    vpextrb $3, %xmm2, %eax
3142; AVX512F-NEXT:    testb $1, %al
3143; AVX512F-NEXT:    je LBB51_40
3144; AVX512F-NEXT:  ## BB#39: ## %cond.load55
3145; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3146; AVX512F-NEXT:    vpinsrb $3, 19(%rdi), %xmm3, %xmm3
3147; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3148; AVX512F-NEXT:  LBB51_40: ## %else56
3149; AVX512F-NEXT:    vpextrb $4, %xmm2, %eax
3150; AVX512F-NEXT:    testb $1, %al
3151; AVX512F-NEXT:    je LBB51_42
3152; AVX512F-NEXT:  ## BB#41: ## %cond.load58
3153; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3154; AVX512F-NEXT:    vpinsrb $4, 20(%rdi), %xmm3, %xmm3
3155; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3156; AVX512F-NEXT:  LBB51_42: ## %else59
3157; AVX512F-NEXT:    vpextrb $5, %xmm2, %eax
3158; AVX512F-NEXT:    testb $1, %al
3159; AVX512F-NEXT:    je LBB51_44
3160; AVX512F-NEXT:  ## BB#43: ## %cond.load61
3161; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3162; AVX512F-NEXT:    vpinsrb $5, 21(%rdi), %xmm3, %xmm3
3163; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3164; AVX512F-NEXT:  LBB51_44: ## %else62
3165; AVX512F-NEXT:    vpextrb $6, %xmm2, %eax
3166; AVX512F-NEXT:    testb $1, %al
3167; AVX512F-NEXT:    je LBB51_46
3168; AVX512F-NEXT:  ## BB#45: ## %cond.load64
3169; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3170; AVX512F-NEXT:    vpinsrb $6, 22(%rdi), %xmm3, %xmm3
3171; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3172; AVX512F-NEXT:  LBB51_46: ## %else65
3173; AVX512F-NEXT:    vpextrb $7, %xmm2, %eax
3174; AVX512F-NEXT:    testb $1, %al
3175; AVX512F-NEXT:    je LBB51_48
3176; AVX512F-NEXT:  ## BB#47: ## %cond.load67
3177; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3178; AVX512F-NEXT:    vpinsrb $7, 23(%rdi), %xmm3, %xmm3
3179; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3180; AVX512F-NEXT:  LBB51_48: ## %else68
3181; AVX512F-NEXT:    vpextrb $8, %xmm2, %eax
3182; AVX512F-NEXT:    testb $1, %al
3183; AVX512F-NEXT:    je LBB51_50
3184; AVX512F-NEXT:  ## BB#49: ## %cond.load70
3185; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3186; AVX512F-NEXT:    vpinsrb $8, 24(%rdi), %xmm3, %xmm3
3187; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3188; AVX512F-NEXT:  LBB51_50: ## %else71
3189; AVX512F-NEXT:    vpextrb $9, %xmm2, %eax
3190; AVX512F-NEXT:    testb $1, %al
3191; AVX512F-NEXT:    je LBB51_52
3192; AVX512F-NEXT:  ## BB#51: ## %cond.load73
3193; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3194; AVX512F-NEXT:    vpinsrb $9, 25(%rdi), %xmm3, %xmm3
3195; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3196; AVX512F-NEXT:  LBB51_52: ## %else74
3197; AVX512F-NEXT:    vpextrb $10, %xmm2, %eax
3198; AVX512F-NEXT:    testb $1, %al
3199; AVX512F-NEXT:    je LBB51_54
3200; AVX512F-NEXT:  ## BB#53: ## %cond.load76
3201; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3202; AVX512F-NEXT:    vpinsrb $10, 26(%rdi), %xmm3, %xmm3
3203; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3204; AVX512F-NEXT:  LBB51_54: ## %else77
3205; AVX512F-NEXT:    vpextrb $11, %xmm2, %eax
3206; AVX512F-NEXT:    testb $1, %al
3207; AVX512F-NEXT:    je LBB51_56
3208; AVX512F-NEXT:  ## BB#55: ## %cond.load79
3209; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3210; AVX512F-NEXT:    vpinsrb $11, 27(%rdi), %xmm3, %xmm3
3211; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3212; AVX512F-NEXT:  LBB51_56: ## %else80
3213; AVX512F-NEXT:    vpextrb $12, %xmm2, %eax
3214; AVX512F-NEXT:    testb $1, %al
3215; AVX512F-NEXT:    je LBB51_58
3216; AVX512F-NEXT:  ## BB#57: ## %cond.load82
3217; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3218; AVX512F-NEXT:    vpinsrb $12, 28(%rdi), %xmm3, %xmm3
3219; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3220; AVX512F-NEXT:  LBB51_58: ## %else83
3221; AVX512F-NEXT:    vpextrb $13, %xmm2, %eax
3222; AVX512F-NEXT:    testb $1, %al
3223; AVX512F-NEXT:    je LBB51_60
3224; AVX512F-NEXT:  ## BB#59: ## %cond.load85
3225; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3226; AVX512F-NEXT:    vpinsrb $13, 29(%rdi), %xmm3, %xmm3
3227; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3228; AVX512F-NEXT:  LBB51_60: ## %else86
3229; AVX512F-NEXT:    vpextrb $14, %xmm2, %eax
3230; AVX512F-NEXT:    testb $1, %al
3231; AVX512F-NEXT:    je LBB51_62
3232; AVX512F-NEXT:  ## BB#61: ## %cond.load88
3233; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
3234; AVX512F-NEXT:    vpinsrb $14, 30(%rdi), %xmm3, %xmm3
3235; AVX512F-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
3236; AVX512F-NEXT:  LBB51_62: ## %else89
3237; AVX512F-NEXT:    vpextrb $15, %xmm2, %eax
3238; AVX512F-NEXT:    testb $1, %al
3239; AVX512F-NEXT:    je LBB51_64
3240; AVX512F-NEXT:  ## BB#63: ## %cond.load91
3241; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
3242; AVX512F-NEXT:    vpinsrb $15, 31(%rdi), %xmm2, %xmm2
3243; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
3244; AVX512F-NEXT:  LBB51_64: ## %else92
3245; AVX512F-NEXT:    vpsllw $7, %ymm0, %ymm0
3246; AVX512F-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
3247; AVX512F-NEXT:    vpxor %ymm2, %ymm2, %ymm2
3248; AVX512F-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
3249; AVX512F-NEXT:    vpand %ymm1, %ymm0, %ymm0
3250; AVX512F-NEXT:    retq
3251;
3252; SKX-LABEL: test_mask_load_32xi8:
3253; SKX:       ## BB#0:
3254; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
3255; SKX-NEXT:    vpmovb2m %ymm0, %k1
3256; SKX-NEXT:    vmovdqu8 (%rdi), %ymm0 {%k1} {z}
3257; SKX-NEXT:    retq
3258  %res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
3259  ret <32 x i8> %res
3260}
3261declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>)
3262
3263define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
3264; AVX1-LABEL: test_mask_load_64xi8:
3265; AVX1:       ## BB#0:
3266; AVX1-NEXT:    pushq %rbp
3267; AVX1-NEXT:  Ltmp3:
3268; AVX1-NEXT:    .cfi_def_cfa_offset 16
3269; AVX1-NEXT:    pushq %r15
3270; AVX1-NEXT:  Ltmp4:
3271; AVX1-NEXT:    .cfi_def_cfa_offset 24
3272; AVX1-NEXT:    pushq %r14
3273; AVX1-NEXT:  Ltmp5:
3274; AVX1-NEXT:    .cfi_def_cfa_offset 32
3275; AVX1-NEXT:    pushq %r13
3276; AVX1-NEXT:  Ltmp6:
3277; AVX1-NEXT:    .cfi_def_cfa_offset 40
3278; AVX1-NEXT:    pushq %r12
3279; AVX1-NEXT:  Ltmp7:
3280; AVX1-NEXT:    .cfi_def_cfa_offset 48
3281; AVX1-NEXT:    pushq %rbx
3282; AVX1-NEXT:  Ltmp8:
3283; AVX1-NEXT:    .cfi_def_cfa_offset 56
3284; AVX1-NEXT:    pushq %rax
3285; AVX1-NEXT:  Ltmp9:
3286; AVX1-NEXT:    .cfi_def_cfa_offset 64
3287; AVX1-NEXT:  Ltmp10:
3288; AVX1-NEXT:    .cfi_offset %rbx, -56
3289; AVX1-NEXT:  Ltmp11:
3290; AVX1-NEXT:    .cfi_offset %r12, -48
3291; AVX1-NEXT:  Ltmp12:
3292; AVX1-NEXT:    .cfi_offset %r13, -40
3293; AVX1-NEXT:  Ltmp13:
3294; AVX1-NEXT:    .cfi_offset %r14, -32
3295; AVX1-NEXT:  Ltmp14:
3296; AVX1-NEXT:    .cfi_offset %r15, -24
3297; AVX1-NEXT:  Ltmp15:
3298; AVX1-NEXT:    .cfi_offset %rbp, -16
3299; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %rax
3300; AVX1-NEXT:    movl %edi, %r13d
3301; AVX1-NEXT:    testb $1, %dil
3302; AVX1-NEXT:    je LBB52_2
3303; AVX1-NEXT:  ## BB#1: ## %cond.load
3304; AVX1-NEXT:    movzbl (%rax), %ebp
3305; AVX1-NEXT:    vmovd %ebp, %xmm9
3306; AVX1-NEXT:  LBB52_2: ## %else
3307; AVX1-NEXT:    testb $1, %sil
3308; AVX1-NEXT:    je LBB52_4
3309; AVX1-NEXT:  ## BB#3: ## %cond.load1
3310; AVX1-NEXT:    vpinsrb $1, 1(%rax), %xmm9, %xmm3
3311; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3312; AVX1-NEXT:  LBB52_4: ## %else2
3313; AVX1-NEXT:    testb $1, %dl
3314; AVX1-NEXT:    je LBB52_6
3315; AVX1-NEXT:  ## BB#5: ## %cond.load4
3316; AVX1-NEXT:    vpinsrb $2, 2(%rax), %xmm9, %xmm3
3317; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3318; AVX1-NEXT:  LBB52_6: ## %else5
3319; AVX1-NEXT:    testb $1, %cl
3320; AVX1-NEXT:    je LBB52_8
3321; AVX1-NEXT:  ## BB#7: ## %cond.load7
3322; AVX1-NEXT:    vpinsrb $3, 3(%rax), %xmm9, %xmm3
3323; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3324; AVX1-NEXT:  LBB52_8: ## %else8
3325; AVX1-NEXT:    testb $1, %r8b
3326; AVX1-NEXT:    je LBB52_10
3327; AVX1-NEXT:  ## BB#9: ## %cond.load10
3328; AVX1-NEXT:    vpinsrb $4, 4(%rax), %xmm9, %xmm3
3329; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3330; AVX1-NEXT:  LBB52_10: ## %else11
3331; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3332; AVX1-NEXT:    testb $1, %r9b
3333; AVX1-NEXT:    je LBB52_12
3334; AVX1-NEXT:  ## BB#11: ## %cond.load13
3335; AVX1-NEXT:    vpinsrb $5, 5(%rax), %xmm9, %xmm3
3336; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3337; AVX1-NEXT:  LBB52_12: ## %else14
3338; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3339; AVX1-NEXT:    testb $1, %r10b
3340; AVX1-NEXT:    je LBB52_14
3341; AVX1-NEXT:  ## BB#13: ## %cond.load16
3342; AVX1-NEXT:    vpinsrb $6, 6(%rax), %xmm9, %xmm3
3343; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3344; AVX1-NEXT:  LBB52_14: ## %else17
3345; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
3346; AVX1-NEXT:    testb $1, %r11b
3347; AVX1-NEXT:    je LBB52_16
3348; AVX1-NEXT:  ## BB#15: ## %cond.load19
3349; AVX1-NEXT:    vpinsrb $7, 7(%rax), %xmm9, %xmm3
3350; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3351; AVX1-NEXT:  LBB52_16: ## %else20
3352; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
3353; AVX1-NEXT:    testb $1, %r14b
3354; AVX1-NEXT:    je LBB52_18
3355; AVX1-NEXT:  ## BB#17: ## %cond.load22
3356; AVX1-NEXT:    vpinsrb $8, 8(%rax), %xmm9, %xmm3
3357; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3358; AVX1-NEXT:  LBB52_18: ## %else23
3359; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
3360; AVX1-NEXT:    testb $1, %r15b
3361; AVX1-NEXT:    je LBB52_20
3362; AVX1-NEXT:  ## BB#19: ## %cond.load25
3363; AVX1-NEXT:    vpinsrb $9, 9(%rax), %xmm9, %xmm3
3364; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3365; AVX1-NEXT:  LBB52_20: ## %else26
3366; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dil
3367; AVX1-NEXT:    testb $1, %r12b
3368; AVX1-NEXT:    je LBB52_22
3369; AVX1-NEXT:  ## BB#21: ## %cond.load28
3370; AVX1-NEXT:    vpinsrb $10, 10(%rax), %xmm9, %xmm3
3371; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3372; AVX1-NEXT:  LBB52_22: ## %else29
3373; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
3374; AVX1-NEXT:    testb $1, %dil
3375; AVX1-NEXT:    je LBB52_24
3376; AVX1-NEXT:  ## BB#23: ## %cond.load31
3377; AVX1-NEXT:    vpinsrb $11, 11(%rax), %xmm9, %xmm3
3378; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3379; AVX1-NEXT:  LBB52_24: ## %else32
3380; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %bl
3381; AVX1-NEXT:    testb $1, %bpl
3382; AVX1-NEXT:    je LBB52_26
3383; AVX1-NEXT:  ## BB#25: ## %cond.load34
3384; AVX1-NEXT:    vpinsrb $12, 12(%rax), %xmm9, %xmm3
3385; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3386; AVX1-NEXT:  LBB52_26: ## %else35
3387; AVX1-NEXT:    testb $1, %bl
3388; AVX1-NEXT:    je LBB52_28
3389; AVX1-NEXT:  ## BB#27: ## %cond.load37
3390; AVX1-NEXT:    vpinsrb $13, 13(%rax), %xmm9, %xmm3
3391; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3392; AVX1-NEXT:  LBB52_28: ## %else38
3393; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3394; AVX1-NEXT:    je LBB52_30
3395; AVX1-NEXT:  ## BB#29: ## %cond.load40
3396; AVX1-NEXT:    vpinsrb $14, 14(%rax), %xmm9, %xmm3
3397; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3398; AVX1-NEXT:  LBB52_30: ## %else41
3399; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3400; AVX1-NEXT:    je LBB52_32
3401; AVX1-NEXT:  ## BB#31: ## %cond.load43
3402; AVX1-NEXT:    vpinsrb $15, 15(%rax), %xmm9, %xmm3
3403; AVX1-NEXT:    vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7]
3404; AVX1-NEXT:  LBB52_32: ## %else44
3405; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3406; AVX1-NEXT:    je LBB52_34
3407; AVX1-NEXT:  ## BB#33: ## %cond.load46
3408; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3409; AVX1-NEXT:    vpinsrb $0, 16(%rax), %xmm3, %xmm3
3410; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3411; AVX1-NEXT:  LBB52_34: ## %else47
3412; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3413; AVX1-NEXT:    je LBB52_36
3414; AVX1-NEXT:  ## BB#35: ## %cond.load49
3415; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3416; AVX1-NEXT:    vpinsrb $1, 17(%rax), %xmm3, %xmm3
3417; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3418; AVX1-NEXT:  LBB52_36: ## %else50
3419; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3420; AVX1-NEXT:    je LBB52_38
3421; AVX1-NEXT:  ## BB#37: ## %cond.load52
3422; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3423; AVX1-NEXT:    vpinsrb $2, 18(%rax), %xmm3, %xmm3
3424; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3425; AVX1-NEXT:  LBB52_38: ## %else53
3426; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3427; AVX1-NEXT:    je LBB52_40
3428; AVX1-NEXT:  ## BB#39: ## %cond.load55
3429; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3430; AVX1-NEXT:    vpinsrb $3, 19(%rax), %xmm3, %xmm3
3431; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3432; AVX1-NEXT:  LBB52_40: ## %else56
3433; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3434; AVX1-NEXT:    je LBB52_42
3435; AVX1-NEXT:  ## BB#41: ## %cond.load58
3436; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3437; AVX1-NEXT:    vpinsrb $4, 20(%rax), %xmm3, %xmm3
3438; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3439; AVX1-NEXT:  LBB52_42: ## %else59
3440; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3441; AVX1-NEXT:    je LBB52_44
3442; AVX1-NEXT:  ## BB#43: ## %cond.load61
3443; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3444; AVX1-NEXT:    vpinsrb $5, 21(%rax), %xmm3, %xmm3
3445; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3446; AVX1-NEXT:  LBB52_44: ## %else62
3447; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3448; AVX1-NEXT:    je LBB52_46
3449; AVX1-NEXT:  ## BB#45: ## %cond.load64
3450; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3451; AVX1-NEXT:    vpinsrb $6, 22(%rax), %xmm3, %xmm3
3452; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3453; AVX1-NEXT:  LBB52_46: ## %else65
3454; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3455; AVX1-NEXT:    je LBB52_48
3456; AVX1-NEXT:  ## BB#47: ## %cond.load67
3457; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3458; AVX1-NEXT:    vpinsrb $7, 23(%rax), %xmm3, %xmm3
3459; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3460; AVX1-NEXT:  LBB52_48: ## %else68
3461; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3462; AVX1-NEXT:    je LBB52_50
3463; AVX1-NEXT:  ## BB#49: ## %cond.load70
3464; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3465; AVX1-NEXT:    vpinsrb $8, 24(%rax), %xmm3, %xmm3
3466; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3467; AVX1-NEXT:  LBB52_50: ## %else71
3468; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3469; AVX1-NEXT:    je LBB52_52
3470; AVX1-NEXT:  ## BB#51: ## %cond.load73
3471; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3472; AVX1-NEXT:    vpinsrb $9, 25(%rax), %xmm3, %xmm3
3473; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3474; AVX1-NEXT:  LBB52_52: ## %else74
3475; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3476; AVX1-NEXT:    je LBB52_54
3477; AVX1-NEXT:  ## BB#53: ## %cond.load76
3478; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3479; AVX1-NEXT:    vpinsrb $10, 26(%rax), %xmm3, %xmm3
3480; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3481; AVX1-NEXT:  LBB52_54: ## %else77
3482; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3483; AVX1-NEXT:    je LBB52_56
3484; AVX1-NEXT:  ## BB#55: ## %cond.load79
3485; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3486; AVX1-NEXT:    vpinsrb $11, 27(%rax), %xmm3, %xmm3
3487; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3488; AVX1-NEXT:  LBB52_56: ## %else80
3489; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3490; AVX1-NEXT:    je LBB52_58
3491; AVX1-NEXT:  ## BB#57: ## %cond.load82
3492; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3493; AVX1-NEXT:    vpinsrb $12, 28(%rax), %xmm3, %xmm3
3494; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3495; AVX1-NEXT:  LBB52_58: ## %else83
3496; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3497; AVX1-NEXT:    je LBB52_60
3498; AVX1-NEXT:  ## BB#59: ## %cond.load85
3499; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3500; AVX1-NEXT:    vpinsrb $13, 29(%rax), %xmm3, %xmm3
3501; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3502; AVX1-NEXT:  LBB52_60: ## %else86
3503; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3504; AVX1-NEXT:    je LBB52_62
3505; AVX1-NEXT:  ## BB#61: ## %cond.load88
3506; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3507; AVX1-NEXT:    vpinsrb $14, 30(%rax), %xmm3, %xmm3
3508; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3509; AVX1-NEXT:  LBB52_62: ## %else89
3510; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3511; AVX1-NEXT:    je LBB52_64
3512; AVX1-NEXT:  ## BB#63: ## %cond.load91
3513; AVX1-NEXT:    vextractf128 $1, %ymm9, %xmm3
3514; AVX1-NEXT:    vpinsrb $15, 31(%rax), %xmm3, %xmm3
3515; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm9, %ymm9
3516; AVX1-NEXT:  LBB52_64: ## %else92
3517; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3518; AVX1-NEXT:    je LBB52_66
3519; AVX1-NEXT:  ## BB#65: ## %cond.load94
3520; AVX1-NEXT:    vpinsrb $0, 32(%rax), %xmm0, %xmm3
3521; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7]
3522; AVX1-NEXT:  LBB52_66: ## %else95
3523; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3524; AVX1-NEXT:    je LBB52_68
3525; AVX1-NEXT:  ## BB#67: ## %cond.load97
3526; AVX1-NEXT:    vpinsrb $1, 33(%rax), %xmm3, %xmm4
3527; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3528; AVX1-NEXT:  LBB52_68: ## %else98
3529; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3530; AVX1-NEXT:    je LBB52_70
3531; AVX1-NEXT:  ## BB#69: ## %cond.load100
3532; AVX1-NEXT:    vpinsrb $2, 34(%rax), %xmm3, %xmm4
3533; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3534; AVX1-NEXT:  LBB52_70: ## %else101
3535; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3536; AVX1-NEXT:    je LBB52_72
3537; AVX1-NEXT:  ## BB#71: ## %cond.load103
3538; AVX1-NEXT:    vpinsrb $3, 35(%rax), %xmm3, %xmm4
3539; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3540; AVX1-NEXT:  LBB52_72: ## %else104
3541; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3542; AVX1-NEXT:    je LBB52_74
3543; AVX1-NEXT:  ## BB#73: ## %cond.load106
3544; AVX1-NEXT:    vpinsrb $4, 36(%rax), %xmm3, %xmm4
3545; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3546; AVX1-NEXT:  LBB52_74: ## %else107
3547; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3548; AVX1-NEXT:    je LBB52_76
3549; AVX1-NEXT:  ## BB#75: ## %cond.load109
3550; AVX1-NEXT:    vpinsrb $5, 37(%rax), %xmm3, %xmm4
3551; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3552; AVX1-NEXT:  LBB52_76: ## %else110
3553; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3554; AVX1-NEXT:    je LBB52_78
3555; AVX1-NEXT:  ## BB#77: ## %cond.load112
3556; AVX1-NEXT:    vpinsrb $6, 38(%rax), %xmm3, %xmm4
3557; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3558; AVX1-NEXT:  LBB52_78: ## %else113
3559; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3560; AVX1-NEXT:    je LBB52_80
3561; AVX1-NEXT:  ## BB#79: ## %cond.load115
3562; AVX1-NEXT:    vpinsrb $7, 39(%rax), %xmm3, %xmm4
3563; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3564; AVX1-NEXT:  LBB52_80: ## %else116
3565; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3566; AVX1-NEXT:    je LBB52_82
3567; AVX1-NEXT:  ## BB#81: ## %cond.load118
3568; AVX1-NEXT:    vpinsrb $8, 40(%rax), %xmm3, %xmm4
3569; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3570; AVX1-NEXT:  LBB52_82: ## %else119
3571; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3572; AVX1-NEXT:    je LBB52_84
3573; AVX1-NEXT:  ## BB#83: ## %cond.load121
3574; AVX1-NEXT:    vpinsrb $9, 41(%rax), %xmm3, %xmm4
3575; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3576; AVX1-NEXT:  LBB52_84: ## %else122
3577; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3578; AVX1-NEXT:    je LBB52_86
3579; AVX1-NEXT:  ## BB#85: ## %cond.load124
3580; AVX1-NEXT:    vpinsrb $10, 42(%rax), %xmm3, %xmm4
3581; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3582; AVX1-NEXT:  LBB52_86: ## %else125
3583; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3584; AVX1-NEXT:    je LBB52_88
3585; AVX1-NEXT:  ## BB#87: ## %cond.load127
3586; AVX1-NEXT:    vpinsrb $11, 43(%rax), %xmm3, %xmm4
3587; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3588; AVX1-NEXT:  LBB52_88: ## %else128
3589; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3590; AVX1-NEXT:    je LBB52_90
3591; AVX1-NEXT:  ## BB#89: ## %cond.load130
3592; AVX1-NEXT:    vpinsrb $12, 44(%rax), %xmm3, %xmm4
3593; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3594; AVX1-NEXT:  LBB52_90: ## %else131
3595; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3596; AVX1-NEXT:    je LBB52_92
3597; AVX1-NEXT:  ## BB#91: ## %cond.load133
3598; AVX1-NEXT:    vpinsrb $13, 45(%rax), %xmm3, %xmm4
3599; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3600; AVX1-NEXT:  LBB52_92: ## %else134
3601; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3602; AVX1-NEXT:    je LBB52_94
3603; AVX1-NEXT:  ## BB#93: ## %cond.load136
3604; AVX1-NEXT:    vpinsrb $14, 46(%rax), %xmm3, %xmm4
3605; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3606; AVX1-NEXT:  LBB52_94: ## %else137
3607; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3608; AVX1-NEXT:    je LBB52_96
3609; AVX1-NEXT:  ## BB#95: ## %cond.load139
3610; AVX1-NEXT:    vpinsrb $15, 47(%rax), %xmm3, %xmm4
3611; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
3612; AVX1-NEXT:  LBB52_96: ## %else140
3613; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3614; AVX1-NEXT:    je LBB52_98
3615; AVX1-NEXT:  ## BB#97: ## %cond.load142
3616; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3617; AVX1-NEXT:    vpinsrb $0, 48(%rax), %xmm4, %xmm4
3618; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3619; AVX1-NEXT:  LBB52_98: ## %else143
3620; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3621; AVX1-NEXT:    je LBB52_100
3622; AVX1-NEXT:  ## BB#99: ## %cond.load145
3623; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3624; AVX1-NEXT:    vpinsrb $1, 49(%rax), %xmm4, %xmm4
3625; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3626; AVX1-NEXT:  LBB52_100: ## %else146
3627; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3628; AVX1-NEXT:    je LBB52_102
3629; AVX1-NEXT:  ## BB#101: ## %cond.load148
3630; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3631; AVX1-NEXT:    vpinsrb $2, 50(%rax), %xmm4, %xmm4
3632; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3633; AVX1-NEXT:  LBB52_102: ## %else149
3634; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3635; AVX1-NEXT:    je LBB52_104
3636; AVX1-NEXT:  ## BB#103: ## %cond.load151
3637; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3638; AVX1-NEXT:    vpinsrb $3, 51(%rax), %xmm4, %xmm4
3639; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3640; AVX1-NEXT:  LBB52_104: ## %else152
3641; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3642; AVX1-NEXT:    je LBB52_106
3643; AVX1-NEXT:  ## BB#105: ## %cond.load154
3644; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3645; AVX1-NEXT:    vpinsrb $4, 52(%rax), %xmm4, %xmm4
3646; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3647; AVX1-NEXT:  LBB52_106: ## %else155
3648; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3649; AVX1-NEXT:    je LBB52_108
3650; AVX1-NEXT:  ## BB#107: ## %cond.load157
3651; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3652; AVX1-NEXT:    vpinsrb $5, 53(%rax), %xmm4, %xmm4
3653; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3654; AVX1-NEXT:  LBB52_108: ## %else158
3655; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3656; AVX1-NEXT:    je LBB52_110
3657; AVX1-NEXT:  ## BB#109: ## %cond.load160
3658; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3659; AVX1-NEXT:    vpinsrb $6, 54(%rax), %xmm4, %xmm4
3660; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3661; AVX1-NEXT:  LBB52_110: ## %else161
3662; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3663; AVX1-NEXT:    je LBB52_112
3664; AVX1-NEXT:  ## BB#111: ## %cond.load163
3665; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3666; AVX1-NEXT:    vpinsrb $7, 55(%rax), %xmm4, %xmm4
3667; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3668; AVX1-NEXT:  LBB52_112: ## %else164
3669; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3670; AVX1-NEXT:    je LBB52_114
3671; AVX1-NEXT:  ## BB#113: ## %cond.load166
3672; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3673; AVX1-NEXT:    vpinsrb $8, 56(%rax), %xmm4, %xmm4
3674; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3675; AVX1-NEXT:  LBB52_114: ## %else167
3676; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3677; AVX1-NEXT:    je LBB52_116
3678; AVX1-NEXT:  ## BB#115: ## %cond.load169
3679; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3680; AVX1-NEXT:    vpinsrb $9, 57(%rax), %xmm4, %xmm4
3681; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3682; AVX1-NEXT:  LBB52_116: ## %else170
3683; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3684; AVX1-NEXT:    je LBB52_118
3685; AVX1-NEXT:  ## BB#117: ## %cond.load172
3686; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3687; AVX1-NEXT:    vpinsrb $10, 58(%rax), %xmm4, %xmm4
3688; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3689; AVX1-NEXT:  LBB52_118: ## %else173
3690; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3691; AVX1-NEXT:    je LBB52_120
3692; AVX1-NEXT:  ## BB#119: ## %cond.load175
3693; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3694; AVX1-NEXT:    vpinsrb $11, 59(%rax), %xmm4, %xmm4
3695; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3696; AVX1-NEXT:  LBB52_120: ## %else176
3697; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3698; AVX1-NEXT:    je LBB52_122
3699; AVX1-NEXT:  ## BB#121: ## %cond.load178
3700; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3701; AVX1-NEXT:    vpinsrb $12, 60(%rax), %xmm4, %xmm4
3702; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3703; AVX1-NEXT:  LBB52_122: ## %else179
3704; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3705; AVX1-NEXT:    je LBB52_124
3706; AVX1-NEXT:  ## BB#123: ## %cond.load181
3707; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3708; AVX1-NEXT:    vpinsrb $13, 61(%rax), %xmm4, %xmm4
3709; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3710; AVX1-NEXT:  LBB52_124: ## %else182
3711; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3712; AVX1-NEXT:    je LBB52_126
3713; AVX1-NEXT:  ## BB#125: ## %cond.load184
3714; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3715; AVX1-NEXT:    vpinsrb $14, 62(%rax), %xmm4, %xmm4
3716; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3717; AVX1-NEXT:  LBB52_126: ## %else185
3718; AVX1-NEXT:    testb $1, {{[0-9]+}}(%rsp)
3719; AVX1-NEXT:    movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill
3720; AVX1-NEXT:    movl %r8d, (%rsp) ## 4-byte Spill
3721; AVX1-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3722; AVX1-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3723; AVX1-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3724; AVX1-NEXT:    je LBB52_128
3725; AVX1-NEXT:  ## BB#127: ## %cond.load187
3726; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
3727; AVX1-NEXT:    vpinsrb $15, 63(%rax), %xmm4, %xmm4
3728; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
3729; AVX1-NEXT:  LBB52_128: ## %else188
3730; AVX1-NEXT:    movzbl %r10b, %eax
3731; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3732; AVX1-NEXT:    movzbl %r11b, %eax
3733; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3734; AVX1-NEXT:    movzbl %r14b, %eax
3735; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3736; AVX1-NEXT:    movzbl %r15b, %eax
3737; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3738; AVX1-NEXT:    movzbl %r12b, %eax
3739; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3740; AVX1-NEXT:    movzbl %dil, %eax
3741; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3742; AVX1-NEXT:    movzbl %bpl, %eax
3743; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3744; AVX1-NEXT:    movzbl %bl, %eax
3745; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3746; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3747; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3748; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3749; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3750; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3751; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3752; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3753; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3754; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3755; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3756; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3757; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3758; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3759; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3760; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3761; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3762; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3763; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3764; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3765; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3766; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3767; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3768; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3769; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3770; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3771; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3772; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3773; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3774; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3775; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3776; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3777; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3778; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3779; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3780; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3781; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3782; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3783; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3784; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3785; AVX1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3786; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
3787; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
3788; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
3789; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
3790; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
3791; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
3792; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3793; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
3794; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
3795; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
3796; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
3797; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
3798; AVX1-NEXT:    movzbl %r13b, %r13d
3799; AVX1-NEXT:    vmovd %r13d, %xmm4
3800; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload
3801; AVX1-NEXT:    movzbl %dil, %ebp
3802; AVX1-NEXT:    vpinsrb $1, %ebp, %xmm4, %xmm4
3803; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
3804; AVX1-NEXT:    movzbl %bpl, %ebp
3805; AVX1-NEXT:    vpinsrb $2, %ebp, %xmm4, %xmm4
3806; AVX1-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
3807; AVX1-NEXT:    movzbl %bpl, %ebp
3808; AVX1-NEXT:    vpinsrb $3, %ebp, %xmm4, %xmm4
3809; AVX1-NEXT:    movl (%rsp), %ebp ## 4-byte Reload
3810; AVX1-NEXT:    movzbl %bpl, %ebp
3811; AVX1-NEXT:    vpinsrb $4, %ebp, %xmm4, %xmm4
3812; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
3813; AVX1-NEXT:    movzbl %bpl, %ebp
3814; AVX1-NEXT:    vpinsrb $5, %ebp, %xmm4, %xmm4
3815; AVX1-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3816; AVX1-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3817; AVX1-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3818; AVX1-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3819; AVX1-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3820; AVX1-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3821; AVX1-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3822; AVX1-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3823; AVX1-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3824; AVX1-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
3825; AVX1-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload
3826; AVX1-NEXT:    ## xmm5 = mem[0],zero,zero,zero
3827; AVX1-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3828; AVX1-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3829; AVX1-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3830; AVX1-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3831; AVX1-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3832; AVX1-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3833; AVX1-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3834; AVX1-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3835; AVX1-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3836; AVX1-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3837; AVX1-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3838; AVX1-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3839; AVX1-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3840; AVX1-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
3841; AVX1-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm8 ## 4-byte Folded Reload
3842; AVX1-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm6 ## 4-byte Folded Reload
3843; AVX1-NEXT:    ## xmm6 = mem[0],zero,zero,zero
3844; AVX1-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
3845; AVX1-NEXT:    vpinsrb $2, %r12d, %xmm6, %xmm6
3846; AVX1-NEXT:    vpinsrb $3, %r15d, %xmm6, %xmm6
3847; AVX1-NEXT:    vpinsrb $4, %r14d, %xmm6, %xmm6
3848; AVX1-NEXT:    vpinsrb $5, %r11d, %xmm6, %xmm6
3849; AVX1-NEXT:    vpinsrb $6, %r8d, %xmm6, %xmm6
3850; AVX1-NEXT:    vpinsrb $7, %edx, %xmm6, %xmm6
3851; AVX1-NEXT:    vpinsrb $8, %eax, %xmm6, %xmm6
3852; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
3853; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
3854; AVX1-NEXT:    vpinsrb $9, %ecx, %xmm6, %xmm6
3855; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
3856; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
3857; AVX1-NEXT:    vpinsrb $10, %esi, %xmm6, %xmm6
3858; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
3859; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
3860; AVX1-NEXT:    vpinsrb $11, %r9d, %xmm6, %xmm6
3861; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
3862; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
3863; AVX1-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
3864; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
3865; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
3866; AVX1-NEXT:    vpinsrb $13, %ebx, %xmm6, %xmm6
3867; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
3868; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
3869; AVX1-NEXT:    vpinsrb $14, %r13d, %xmm6, %xmm6
3870; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
3871; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
3872; AVX1-NEXT:    vpinsrb $15, %r14d, %xmm6, %xmm10
3873; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
3874; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
3875; AVX1-NEXT:    vmovd %edi, %xmm7
3876; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
3877; AVX1-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
3878; AVX1-NEXT:    vpinsrb $1, %r11d, %xmm7, %xmm7
3879; AVX1-NEXT:    vpinsrb $2, %r15d, %xmm7, %xmm7
3880; AVX1-NEXT:    vpinsrb $3, %r12d, %xmm7, %xmm7
3881; AVX1-NEXT:    vpinsrb $4, %r8d, %xmm7, %xmm7
3882; AVX1-NEXT:    vpinsrb $5, %ecx, %xmm7, %xmm7
3883; AVX1-NEXT:    vpinsrb $6, %r9d, %xmm7, %xmm7
3884; AVX1-NEXT:    vpinsrb $7, %esi, %xmm7, %xmm7
3885; AVX1-NEXT:    vpinsrb $8, %r10d, %xmm7, %xmm7
3886; AVX1-NEXT:    vpinsrb $9, %eax, %xmm7, %xmm7
3887; AVX1-NEXT:    vpinsrb $10, %r13d, %xmm7, %xmm7
3888; AVX1-NEXT:    vpinsrb $11, %edx, %xmm7, %xmm7
3889; AVX1-NEXT:    vpinsrb $12, %r14d, %xmm7, %xmm7
3890; AVX1-NEXT:    vpinsrb $13, %ebx, %xmm7, %xmm7
3891; AVX1-NEXT:    vpinsrb $14, %edi, %xmm7, %xmm7
3892; AVX1-NEXT:    vpinsrb $15, %ebp, %xmm7, %xmm7
3893; AVX1-NEXT:    vpsllw $7, %xmm4, %xmm4
3894; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
3895; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
3896; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
3897; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm4
3898; AVX1-NEXT:    vpsllw $7, %xmm8, %xmm6
3899; AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm6
3900; AVX1-NEXT:    vpcmpgtb %xmm6, %xmm2, %xmm6
3901; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm4, %ymm4
3902; AVX1-NEXT:    vandnps %ymm0, %ymm4, %ymm0
3903; AVX1-NEXT:    vandps %ymm4, %ymm9, %ymm4
3904; AVX1-NEXT:    vorps %ymm0, %ymm4, %ymm0
3905; AVX1-NEXT:    vpsllw $7, %xmm10, %xmm4
3906; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
3907; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm4
3908; AVX1-NEXT:    vpsllw $7, %xmm7, %xmm6
3909; AVX1-NEXT:    vpand %xmm5, %xmm6, %xmm5
3910; AVX1-NEXT:    vpcmpgtb %xmm5, %xmm2, %xmm2
3911; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm4, %ymm2
3912; AVX1-NEXT:    vandnps %ymm1, %ymm2, %ymm1
3913; AVX1-NEXT:    vandps %ymm2, %ymm3, %ymm2
3914; AVX1-NEXT:    vorps %ymm1, %ymm2, %ymm1
3915; AVX1-NEXT:    addq $8, %rsp
3916; AVX1-NEXT:    popq %rbx
3917; AVX1-NEXT:    popq %r12
3918; AVX1-NEXT:    popq %r13
3919; AVX1-NEXT:    popq %r14
3920; AVX1-NEXT:    popq %r15
3921; AVX1-NEXT:    popq %rbp
3922; AVX1-NEXT:    retq
3923;
3924; AVX2-LABEL: test_mask_load_64xi8:
3925; AVX2:       ## BB#0:
3926; AVX2-NEXT:    pushq %rbp
3927; AVX2-NEXT:  Ltmp3:
3928; AVX2-NEXT:    .cfi_def_cfa_offset 16
3929; AVX2-NEXT:    pushq %r15
3930; AVX2-NEXT:  Ltmp4:
3931; AVX2-NEXT:    .cfi_def_cfa_offset 24
3932; AVX2-NEXT:    pushq %r14
3933; AVX2-NEXT:  Ltmp5:
3934; AVX2-NEXT:    .cfi_def_cfa_offset 32
3935; AVX2-NEXT:    pushq %r13
3936; AVX2-NEXT:  Ltmp6:
3937; AVX2-NEXT:    .cfi_def_cfa_offset 40
3938; AVX2-NEXT:    pushq %r12
3939; AVX2-NEXT:  Ltmp7:
3940; AVX2-NEXT:    .cfi_def_cfa_offset 48
3941; AVX2-NEXT:    pushq %rbx
3942; AVX2-NEXT:  Ltmp8:
3943; AVX2-NEXT:    .cfi_def_cfa_offset 56
3944; AVX2-NEXT:    pushq %rax
3945; AVX2-NEXT:  Ltmp9:
3946; AVX2-NEXT:    .cfi_def_cfa_offset 64
3947; AVX2-NEXT:  Ltmp10:
3948; AVX2-NEXT:    .cfi_offset %rbx, -56
3949; AVX2-NEXT:  Ltmp11:
3950; AVX2-NEXT:    .cfi_offset %r12, -48
3951; AVX2-NEXT:  Ltmp12:
3952; AVX2-NEXT:    .cfi_offset %r13, -40
3953; AVX2-NEXT:  Ltmp13:
3954; AVX2-NEXT:    .cfi_offset %r14, -32
3955; AVX2-NEXT:  Ltmp14:
3956; AVX2-NEXT:    .cfi_offset %r15, -24
3957; AVX2-NEXT:  Ltmp15:
3958; AVX2-NEXT:    .cfi_offset %rbp, -16
3959; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
3960; AVX2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
3961; AVX2-NEXT:    testb $1, %dil
3962; AVX2-NEXT:    je LBB52_2
3963; AVX2-NEXT:  ## BB#1: ## %cond.load
3964; AVX2-NEXT:    movzbl (%rax), %ebp
3965; AVX2-NEXT:    vmovd %ebp, %xmm2
3966; AVX2-NEXT:  LBB52_2: ## %else
3967; AVX2-NEXT:    testb $1, %sil
3968; AVX2-NEXT:    je LBB52_4
3969; AVX2-NEXT:  ## BB#3: ## %cond.load1
3970; AVX2-NEXT:    vpinsrb $1, 1(%rax), %xmm2, %xmm3
3971; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
3972; AVX2-NEXT:  LBB52_4: ## %else2
3973; AVX2-NEXT:    testb $1, %dl
3974; AVX2-NEXT:    je LBB52_6
3975; AVX2-NEXT:  ## BB#5: ## %cond.load4
3976; AVX2-NEXT:    vpinsrb $2, 2(%rax), %xmm2, %xmm3
3977; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
3978; AVX2-NEXT:  LBB52_6: ## %else5
3979; AVX2-NEXT:    testb $1, %cl
3980; AVX2-NEXT:    je LBB52_8
3981; AVX2-NEXT:  ## BB#7: ## %cond.load7
3982; AVX2-NEXT:    vpinsrb $3, 3(%rax), %xmm2, %xmm3
3983; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
3984; AVX2-NEXT:  LBB52_8: ## %else8
3985; AVX2-NEXT:    testb $1, %r8b
3986; AVX2-NEXT:    je LBB52_10
3987; AVX2-NEXT:  ## BB#9: ## %cond.load10
3988; AVX2-NEXT:    vpinsrb $4, 4(%rax), %xmm2, %xmm3
3989; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
3990; AVX2-NEXT:  LBB52_10: ## %else11
3991; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r10b
3992; AVX2-NEXT:    testb $1, %r9b
3993; AVX2-NEXT:    je LBB52_12
3994; AVX2-NEXT:  ## BB#11: ## %cond.load13
3995; AVX2-NEXT:    vpinsrb $5, 5(%rax), %xmm2, %xmm3
3996; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
3997; AVX2-NEXT:  LBB52_12: ## %else14
3998; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r11b
3999; AVX2-NEXT:    testb $1, %r10b
4000; AVX2-NEXT:    je LBB52_14
4001; AVX2-NEXT:  ## BB#13: ## %cond.load16
4002; AVX2-NEXT:    vpinsrb $6, 6(%rax), %xmm2, %xmm3
4003; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4004; AVX2-NEXT:  LBB52_14: ## %else17
4005; AVX2-NEXT:    testb $1, %r11b
4006; AVX2-NEXT:    je LBB52_16
4007; AVX2-NEXT:  ## BB#15: ## %cond.load19
4008; AVX2-NEXT:    vpinsrb $7, 7(%rax), %xmm2, %xmm3
4009; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4010; AVX2-NEXT:  LBB52_16: ## %else20
4011; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4012; AVX2-NEXT:    je LBB52_18
4013; AVX2-NEXT:  ## BB#17: ## %cond.load22
4014; AVX2-NEXT:    vpinsrb $8, 8(%rax), %xmm2, %xmm3
4015; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4016; AVX2-NEXT:  LBB52_18: ## %else23
4017; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4018; AVX2-NEXT:    je LBB52_20
4019; AVX2-NEXT:  ## BB#19: ## %cond.load25
4020; AVX2-NEXT:    vpinsrb $9, 9(%rax), %xmm2, %xmm3
4021; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4022; AVX2-NEXT:  LBB52_20: ## %else26
4023; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4024; AVX2-NEXT:    je LBB52_22
4025; AVX2-NEXT:  ## BB#21: ## %cond.load28
4026; AVX2-NEXT:    vpinsrb $10, 10(%rax), %xmm2, %xmm3
4027; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4028; AVX2-NEXT:  LBB52_22: ## %else29
4029; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %bpl
4030; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4031; AVX2-NEXT:    je LBB52_24
4032; AVX2-NEXT:  ## BB#23: ## %cond.load31
4033; AVX2-NEXT:    vpinsrb $11, 11(%rax), %xmm2, %xmm3
4034; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4035; AVX2-NEXT:  LBB52_24: ## %else32
4036; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %bl
4037; AVX2-NEXT:    testb $1, %bpl
4038; AVX2-NEXT:    je LBB52_26
4039; AVX2-NEXT:  ## BB#25: ## %cond.load34
4040; AVX2-NEXT:    vpinsrb $12, 12(%rax), %xmm2, %xmm3
4041; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4042; AVX2-NEXT:  LBB52_26: ## %else35
4043; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r14b
4044; AVX2-NEXT:    testb $1, %bl
4045; AVX2-NEXT:    je LBB52_28
4046; AVX2-NEXT:  ## BB#27: ## %cond.load37
4047; AVX2-NEXT:    vpinsrb $13, 13(%rax), %xmm2, %xmm3
4048; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4049; AVX2-NEXT:  LBB52_28: ## %else38
4050; AVX2-NEXT:    testb $1, %r14b
4051; AVX2-NEXT:    je LBB52_30
4052; AVX2-NEXT:  ## BB#29: ## %cond.load40
4053; AVX2-NEXT:    vpinsrb $14, 14(%rax), %xmm2, %xmm3
4054; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4055; AVX2-NEXT:  LBB52_30: ## %else41
4056; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r13b
4057; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4058; AVX2-NEXT:    je LBB52_32
4059; AVX2-NEXT:  ## BB#31: ## %cond.load43
4060; AVX2-NEXT:    vpinsrb $15, 15(%rax), %xmm2, %xmm3
4061; AVX2-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
4062; AVX2-NEXT:  LBB52_32: ## %else44
4063; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r12b
4064; AVX2-NEXT:    testb $1, %r13b
4065; AVX2-NEXT:    je LBB52_34
4066; AVX2-NEXT:  ## BB#33: ## %cond.load46
4067; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4068; AVX2-NEXT:    vpinsrb $0, 16(%rax), %xmm3, %xmm3
4069; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4070; AVX2-NEXT:  LBB52_34: ## %else47
4071; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %r15b
4072; AVX2-NEXT:    testb $1, %r12b
4073; AVX2-NEXT:    je LBB52_36
4074; AVX2-NEXT:  ## BB#35: ## %cond.load49
4075; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4076; AVX2-NEXT:    vpinsrb $1, 17(%rax), %xmm3, %xmm3
4077; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4078; AVX2-NEXT:  LBB52_36: ## %else50
4079; AVX2-NEXT:    testb $1, %r15b
4080; AVX2-NEXT:    je LBB52_38
4081; AVX2-NEXT:  ## BB#37: ## %cond.load52
4082; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4083; AVX2-NEXT:    vpinsrb $2, 18(%rax), %xmm3, %xmm3
4084; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4085; AVX2-NEXT:  LBB52_38: ## %else53
4086; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4087; AVX2-NEXT:    je LBB52_40
4088; AVX2-NEXT:  ## BB#39: ## %cond.load55
4089; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4090; AVX2-NEXT:    vpinsrb $3, 19(%rax), %xmm3, %xmm3
4091; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4092; AVX2-NEXT:  LBB52_40: ## %else56
4093; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4094; AVX2-NEXT:    je LBB52_42
4095; AVX2-NEXT:  ## BB#41: ## %cond.load58
4096; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4097; AVX2-NEXT:    vpinsrb $4, 20(%rax), %xmm3, %xmm3
4098; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4099; AVX2-NEXT:  LBB52_42: ## %else59
4100; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4101; AVX2-NEXT:    je LBB52_44
4102; AVX2-NEXT:  ## BB#43: ## %cond.load61
4103; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4104; AVX2-NEXT:    vpinsrb $5, 21(%rax), %xmm3, %xmm3
4105; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4106; AVX2-NEXT:  LBB52_44: ## %else62
4107; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4108; AVX2-NEXT:    je LBB52_46
4109; AVX2-NEXT:  ## BB#45: ## %cond.load64
4110; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4111; AVX2-NEXT:    vpinsrb $6, 22(%rax), %xmm3, %xmm3
4112; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4113; AVX2-NEXT:  LBB52_46: ## %else65
4114; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4115; AVX2-NEXT:    je LBB52_48
4116; AVX2-NEXT:  ## BB#47: ## %cond.load67
4117; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4118; AVX2-NEXT:    vpinsrb $7, 23(%rax), %xmm3, %xmm3
4119; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4120; AVX2-NEXT:  LBB52_48: ## %else68
4121; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4122; AVX2-NEXT:    je LBB52_50
4123; AVX2-NEXT:  ## BB#49: ## %cond.load70
4124; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4125; AVX2-NEXT:    vpinsrb $8, 24(%rax), %xmm3, %xmm3
4126; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4127; AVX2-NEXT:  LBB52_50: ## %else71
4128; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4129; AVX2-NEXT:    je LBB52_52
4130; AVX2-NEXT:  ## BB#51: ## %cond.load73
4131; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4132; AVX2-NEXT:    vpinsrb $9, 25(%rax), %xmm3, %xmm3
4133; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4134; AVX2-NEXT:  LBB52_52: ## %else74
4135; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4136; AVX2-NEXT:    je LBB52_54
4137; AVX2-NEXT:  ## BB#53: ## %cond.load76
4138; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4139; AVX2-NEXT:    vpinsrb $10, 26(%rax), %xmm3, %xmm3
4140; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4141; AVX2-NEXT:  LBB52_54: ## %else77
4142; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4143; AVX2-NEXT:    je LBB52_56
4144; AVX2-NEXT:  ## BB#55: ## %cond.load79
4145; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4146; AVX2-NEXT:    vpinsrb $11, 27(%rax), %xmm3, %xmm3
4147; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4148; AVX2-NEXT:  LBB52_56: ## %else80
4149; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4150; AVX2-NEXT:    je LBB52_58
4151; AVX2-NEXT:  ## BB#57: ## %cond.load82
4152; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4153; AVX2-NEXT:    vpinsrb $12, 28(%rax), %xmm3, %xmm3
4154; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4155; AVX2-NEXT:  LBB52_58: ## %else83
4156; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4157; AVX2-NEXT:    je LBB52_60
4158; AVX2-NEXT:  ## BB#59: ## %cond.load85
4159; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4160; AVX2-NEXT:    vpinsrb $13, 29(%rax), %xmm3, %xmm3
4161; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4162; AVX2-NEXT:  LBB52_60: ## %else86
4163; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4164; AVX2-NEXT:    je LBB52_62
4165; AVX2-NEXT:  ## BB#61: ## %cond.load88
4166; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4167; AVX2-NEXT:    vpinsrb $14, 30(%rax), %xmm3, %xmm3
4168; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4169; AVX2-NEXT:  LBB52_62: ## %else89
4170; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4171; AVX2-NEXT:    je LBB52_64
4172; AVX2-NEXT:  ## BB#63: ## %cond.load91
4173; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
4174; AVX2-NEXT:    vpinsrb $15, 31(%rax), %xmm3, %xmm3
4175; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
4176; AVX2-NEXT:  LBB52_64: ## %else92
4177; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4178; AVX2-NEXT:    je LBB52_66
4179; AVX2-NEXT:  ## BB#65: ## %cond.load94
4180; AVX2-NEXT:    vpinsrb $0, 32(%rax), %xmm0, %xmm3
4181; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7]
4182; AVX2-NEXT:  LBB52_66: ## %else95
4183; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4184; AVX2-NEXT:    je LBB52_68
4185; AVX2-NEXT:  ## BB#67: ## %cond.load97
4186; AVX2-NEXT:    vpinsrb $1, 33(%rax), %xmm3, %xmm4
4187; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4188; AVX2-NEXT:  LBB52_68: ## %else98
4189; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4190; AVX2-NEXT:    je LBB52_70
4191; AVX2-NEXT:  ## BB#69: ## %cond.load100
4192; AVX2-NEXT:    vpinsrb $2, 34(%rax), %xmm3, %xmm4
4193; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4194; AVX2-NEXT:  LBB52_70: ## %else101
4195; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4196; AVX2-NEXT:    je LBB52_72
4197; AVX2-NEXT:  ## BB#71: ## %cond.load103
4198; AVX2-NEXT:    vpinsrb $3, 35(%rax), %xmm3, %xmm4
4199; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4200; AVX2-NEXT:  LBB52_72: ## %else104
4201; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4202; AVX2-NEXT:    je LBB52_74
4203; AVX2-NEXT:  ## BB#73: ## %cond.load106
4204; AVX2-NEXT:    vpinsrb $4, 36(%rax), %xmm3, %xmm4
4205; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4206; AVX2-NEXT:  LBB52_74: ## %else107
4207; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4208; AVX2-NEXT:    je LBB52_76
4209; AVX2-NEXT:  ## BB#75: ## %cond.load109
4210; AVX2-NEXT:    vpinsrb $5, 37(%rax), %xmm3, %xmm4
4211; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4212; AVX2-NEXT:  LBB52_76: ## %else110
4213; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4214; AVX2-NEXT:    je LBB52_78
4215; AVX2-NEXT:  ## BB#77: ## %cond.load112
4216; AVX2-NEXT:    vpinsrb $6, 38(%rax), %xmm3, %xmm4
4217; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4218; AVX2-NEXT:  LBB52_78: ## %else113
4219; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4220; AVX2-NEXT:    je LBB52_80
4221; AVX2-NEXT:  ## BB#79: ## %cond.load115
4222; AVX2-NEXT:    vpinsrb $7, 39(%rax), %xmm3, %xmm4
4223; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4224; AVX2-NEXT:  LBB52_80: ## %else116
4225; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4226; AVX2-NEXT:    je LBB52_82
4227; AVX2-NEXT:  ## BB#81: ## %cond.load118
4228; AVX2-NEXT:    vpinsrb $8, 40(%rax), %xmm3, %xmm4
4229; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4230; AVX2-NEXT:  LBB52_82: ## %else119
4231; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4232; AVX2-NEXT:    je LBB52_84
4233; AVX2-NEXT:  ## BB#83: ## %cond.load121
4234; AVX2-NEXT:    vpinsrb $9, 41(%rax), %xmm3, %xmm4
4235; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4236; AVX2-NEXT:  LBB52_84: ## %else122
4237; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4238; AVX2-NEXT:    je LBB52_86
4239; AVX2-NEXT:  ## BB#85: ## %cond.load124
4240; AVX2-NEXT:    vpinsrb $10, 42(%rax), %xmm3, %xmm4
4241; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4242; AVX2-NEXT:  LBB52_86: ## %else125
4243; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4244; AVX2-NEXT:    je LBB52_88
4245; AVX2-NEXT:  ## BB#87: ## %cond.load127
4246; AVX2-NEXT:    vpinsrb $11, 43(%rax), %xmm3, %xmm4
4247; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4248; AVX2-NEXT:  LBB52_88: ## %else128
4249; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4250; AVX2-NEXT:    je LBB52_90
4251; AVX2-NEXT:  ## BB#89: ## %cond.load130
4252; AVX2-NEXT:    vpinsrb $12, 44(%rax), %xmm3, %xmm4
4253; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4254; AVX2-NEXT:  LBB52_90: ## %else131
4255; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4256; AVX2-NEXT:    je LBB52_92
4257; AVX2-NEXT:  ## BB#91: ## %cond.load133
4258; AVX2-NEXT:    vpinsrb $13, 45(%rax), %xmm3, %xmm4
4259; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4260; AVX2-NEXT:  LBB52_92: ## %else134
4261; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4262; AVX2-NEXT:    je LBB52_94
4263; AVX2-NEXT:  ## BB#93: ## %cond.load136
4264; AVX2-NEXT:    vpinsrb $14, 46(%rax), %xmm3, %xmm4
4265; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4266; AVX2-NEXT:  LBB52_94: ## %else137
4267; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4268; AVX2-NEXT:    je LBB52_96
4269; AVX2-NEXT:  ## BB#95: ## %cond.load139
4270; AVX2-NEXT:    vpinsrb $15, 47(%rax), %xmm3, %xmm4
4271; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
4272; AVX2-NEXT:  LBB52_96: ## %else140
4273; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4274; AVX2-NEXT:    je LBB52_98
4275; AVX2-NEXT:  ## BB#97: ## %cond.load142
4276; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4277; AVX2-NEXT:    vpinsrb $0, 48(%rax), %xmm4, %xmm4
4278; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4279; AVX2-NEXT:  LBB52_98: ## %else143
4280; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4281; AVX2-NEXT:    je LBB52_100
4282; AVX2-NEXT:  ## BB#99: ## %cond.load145
4283; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4284; AVX2-NEXT:    vpinsrb $1, 49(%rax), %xmm4, %xmm4
4285; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4286; AVX2-NEXT:  LBB52_100: ## %else146
4287; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4288; AVX2-NEXT:    je LBB52_102
4289; AVX2-NEXT:  ## BB#101: ## %cond.load148
4290; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4291; AVX2-NEXT:    vpinsrb $2, 50(%rax), %xmm4, %xmm4
4292; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4293; AVX2-NEXT:  LBB52_102: ## %else149
4294; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4295; AVX2-NEXT:    je LBB52_104
4296; AVX2-NEXT:  ## BB#103: ## %cond.load151
4297; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4298; AVX2-NEXT:    vpinsrb $3, 51(%rax), %xmm4, %xmm4
4299; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4300; AVX2-NEXT:  LBB52_104: ## %else152
4301; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4302; AVX2-NEXT:    je LBB52_106
4303; AVX2-NEXT:  ## BB#105: ## %cond.load154
4304; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4305; AVX2-NEXT:    vpinsrb $4, 52(%rax), %xmm4, %xmm4
4306; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4307; AVX2-NEXT:  LBB52_106: ## %else155
4308; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4309; AVX2-NEXT:    je LBB52_108
4310; AVX2-NEXT:  ## BB#107: ## %cond.load157
4311; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4312; AVX2-NEXT:    vpinsrb $5, 53(%rax), %xmm4, %xmm4
4313; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4314; AVX2-NEXT:  LBB52_108: ## %else158
4315; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4316; AVX2-NEXT:    je LBB52_110
4317; AVX2-NEXT:  ## BB#109: ## %cond.load160
4318; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4319; AVX2-NEXT:    vpinsrb $6, 54(%rax), %xmm4, %xmm4
4320; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4321; AVX2-NEXT:  LBB52_110: ## %else161
4322; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4323; AVX2-NEXT:    je LBB52_112
4324; AVX2-NEXT:  ## BB#111: ## %cond.load163
4325; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4326; AVX2-NEXT:    vpinsrb $7, 55(%rax), %xmm4, %xmm4
4327; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4328; AVX2-NEXT:  LBB52_112: ## %else164
4329; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4330; AVX2-NEXT:    je LBB52_114
4331; AVX2-NEXT:  ## BB#113: ## %cond.load166
4332; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4333; AVX2-NEXT:    vpinsrb $8, 56(%rax), %xmm4, %xmm4
4334; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4335; AVX2-NEXT:  LBB52_114: ## %else167
4336; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4337; AVX2-NEXT:    je LBB52_116
4338; AVX2-NEXT:  ## BB#115: ## %cond.load169
4339; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4340; AVX2-NEXT:    vpinsrb $9, 57(%rax), %xmm4, %xmm4
4341; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4342; AVX2-NEXT:  LBB52_116: ## %else170
4343; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4344; AVX2-NEXT:    je LBB52_118
4345; AVX2-NEXT:  ## BB#117: ## %cond.load172
4346; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4347; AVX2-NEXT:    vpinsrb $10, 58(%rax), %xmm4, %xmm4
4348; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4349; AVX2-NEXT:  LBB52_118: ## %else173
4350; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4351; AVX2-NEXT:    je LBB52_120
4352; AVX2-NEXT:  ## BB#119: ## %cond.load175
4353; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4354; AVX2-NEXT:    vpinsrb $11, 59(%rax), %xmm4, %xmm4
4355; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4356; AVX2-NEXT:  LBB52_120: ## %else176
4357; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4358; AVX2-NEXT:    je LBB52_122
4359; AVX2-NEXT:  ## BB#121: ## %cond.load178
4360; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4361; AVX2-NEXT:    vpinsrb $12, 60(%rax), %xmm4, %xmm4
4362; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4363; AVX2-NEXT:  LBB52_122: ## %else179
4364; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4365; AVX2-NEXT:    je LBB52_124
4366; AVX2-NEXT:  ## BB#123: ## %cond.load181
4367; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4368; AVX2-NEXT:    vpinsrb $13, 61(%rax), %xmm4, %xmm4
4369; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4370; AVX2-NEXT:  LBB52_124: ## %else182
4371; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4372; AVX2-NEXT:    jne LBB52_126
4373; AVX2-NEXT:  ## BB#125:
4374; AVX2-NEXT:    movq %rax, %rdi
4375; AVX2-NEXT:    jmp LBB52_127
4376; AVX2-NEXT:  LBB52_126: ## %cond.load184
4377; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4378; AVX2-NEXT:    movq %rax, %rdi
4379; AVX2-NEXT:    vpinsrb $14, 62(%rax), %xmm4, %xmm4
4380; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4381; AVX2-NEXT:  LBB52_127: ## %else185
4382; AVX2-NEXT:    movl %ebp, %eax
4383; AVX2-NEXT:    testb $1, {{[0-9]+}}(%rsp)
4384; AVX2-NEXT:    movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill
4385; AVX2-NEXT:    movl %r8d, (%rsp) ## 4-byte Spill
4386; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4387; AVX2-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4388; AVX2-NEXT:    movl %esi, %ebp
4389; AVX2-NEXT:    je LBB52_129
4390; AVX2-NEXT:  ## BB#128: ## %cond.load187
4391; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
4392; AVX2-NEXT:    vpinsrb $15, 63(%rdi), %xmm4, %xmm4
4393; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
4394; AVX2-NEXT:  LBB52_129: ## %else188
4395; AVX2-NEXT:    movzbl %r10b, %ecx
4396; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4397; AVX2-NEXT:    movzbl %r11b, %ecx
4398; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4399; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4400; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4401; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4402; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4403; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4404; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4405; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4406; AVX2-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4407; AVX2-NEXT:    movzbl %al, %eax
4408; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4409; AVX2-NEXT:    movzbl %bl, %eax
4410; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4411; AVX2-NEXT:    movzbl %r14b, %eax
4412; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4413; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4414; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4415; AVX2-NEXT:    movzbl %r12b, %eax
4416; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4417; AVX2-NEXT:    movzbl %r13b, %eax
4418; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4419; AVX2-NEXT:    movzbl %r15b, %eax
4420; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4421; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4422; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4423; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4424; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4425; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4426; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4427; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4428; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4429; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4430; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4431; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4432; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4433; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4434; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4435; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4436; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4437; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4438; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4439; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4440; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4441; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4442; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4443; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4444; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4445; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4446; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4447; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4448; AVX2-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4449; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
4450; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
4451; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
4452; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
4453; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
4454; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
4455; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
4456; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4457; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4458; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
4459; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
4460; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
4461; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
4462; AVX2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
4463; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload
4464; AVX2-NEXT:    movzbl %dil, %r13d
4465; AVX2-NEXT:    vmovd %r13d, %xmm4
4466; AVX2-NEXT:    movzbl %bpl, %ebp
4467; AVX2-NEXT:    vpinsrb $1, %ebp, %xmm4, %xmm4
4468; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
4469; AVX2-NEXT:    movzbl %bpl, %ebp
4470; AVX2-NEXT:    vpinsrb $2, %ebp, %xmm4, %xmm4
4471; AVX2-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
4472; AVX2-NEXT:    movzbl %bpl, %ebp
4473; AVX2-NEXT:    vpinsrb $3, %ebp, %xmm4, %xmm4
4474; AVX2-NEXT:    movl (%rsp), %ebp ## 4-byte Reload
4475; AVX2-NEXT:    movzbl %bpl, %ebp
4476; AVX2-NEXT:    vpinsrb $4, %ebp, %xmm4, %xmm4
4477; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
4478; AVX2-NEXT:    movzbl %bpl, %ebp
4479; AVX2-NEXT:    vpinsrb $5, %ebp, %xmm4, %xmm4
4480; AVX2-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4481; AVX2-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4482; AVX2-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4483; AVX2-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4484; AVX2-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4485; AVX2-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4486; AVX2-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4487; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4488; AVX2-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4489; AVX2-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
4490; AVX2-NEXT:    vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload
4491; AVX2-NEXT:    ## xmm5 = mem[0],zero,zero,zero
4492; AVX2-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4493; AVX2-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4494; AVX2-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4495; AVX2-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4496; AVX2-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4497; AVX2-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4498; AVX2-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4499; AVX2-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4500; AVX2-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4501; AVX2-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4502; AVX2-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4503; AVX2-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4504; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4505; AVX2-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4506; AVX2-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
4507; AVX2-NEXT:    vmovd %r12d, %xmm6
4508; AVX2-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
4509; AVX2-NEXT:    vpinsrb $2, %r15d, %xmm6, %xmm6
4510; AVX2-NEXT:    vpinsrb $3, %r14d, %xmm6, %xmm6
4511; AVX2-NEXT:    vpinsrb $4, %ebx, %xmm6, %xmm6
4512; AVX2-NEXT:    vpinsrb $5, %r11d, %xmm6, %xmm6
4513; AVX2-NEXT:    vpinsrb $6, %r9d, %xmm6, %xmm6
4514; AVX2-NEXT:    vpinsrb $7, %esi, %xmm6, %xmm6
4515; AVX2-NEXT:    vpinsrb $8, %eax, %xmm6, %xmm6
4516; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4517; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
4518; AVX2-NEXT:    vpinsrb $9, %ecx, %xmm6, %xmm6
4519; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r9d
4520; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
4521; AVX2-NEXT:    vpinsrb $10, %edx, %xmm6, %xmm6
4522; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r11d
4523; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r14d
4524; AVX2-NEXT:    vpinsrb $11, %r8d, %xmm6, %xmm6
4525; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r13d
4526; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
4527; AVX2-NEXT:    vpinsrb $12, %r10d, %xmm6, %xmm6
4528; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r8d
4529; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebx
4530; AVX2-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
4531; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
4532; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %ebp
4533; AVX2-NEXT:    vpinsrb $14, %eax, %xmm6, %xmm6
4534; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
4535; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
4536; AVX2-NEXT:    vpinsrb $15, %r15d, %xmm6, %xmm6
4537; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r15d
4538; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %esi
4539; AVX2-NEXT:    vmovd %r12d, %xmm7
4540; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %r12d
4541; AVX2-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
4542; AVX2-NEXT:    vpinsrb $1, %r9d, %xmm7, %xmm7
4543; AVX2-NEXT:    vpinsrb $2, %r11d, %xmm7, %xmm7
4544; AVX2-NEXT:    vpinsrb $3, %r14d, %xmm7, %xmm7
4545; AVX2-NEXT:    vpinsrb $4, %r13d, %xmm7, %xmm7
4546; AVX2-NEXT:    vpinsrb $5, %ecx, %xmm7, %xmm7
4547; AVX2-NEXT:    vpinsrb $6, %r8d, %xmm7, %xmm7
4548; AVX2-NEXT:    vpinsrb $7, %ebx, %xmm7, %xmm7
4549; AVX2-NEXT:    vpinsrb $8, %r10d, %xmm7, %xmm7
4550; AVX2-NEXT:    vpinsrb $9, %ebp, %xmm7, %xmm7
4551; AVX2-NEXT:    vpinsrb $10, %eax, %xmm7, %xmm7
4552; AVX2-NEXT:    vpinsrb $11, %edi, %xmm7, %xmm7
4553; AVX2-NEXT:    vpinsrb $12, %r15d, %xmm7, %xmm7
4554; AVX2-NEXT:    vpinsrb $13, %esi, %xmm7, %xmm7
4555; AVX2-NEXT:    vpinsrb $14, %r12d, %xmm7, %xmm7
4556; AVX2-NEXT:    vpinsrb $15, %edx, %xmm7, %xmm7
4557; AVX2-NEXT:    vinserti128 $1, %xmm5, %ymm4, %ymm4
4558; AVX2-NEXT:    vpsllw $7, %ymm4, %ymm4
4559; AVX2-NEXT:    vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
4560; AVX2-NEXT:    vpand %ymm5, %ymm4, %ymm4
4561; AVX2-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
4562; AVX2-NEXT:    vinserti128 $1, %xmm7, %ymm6, %ymm2
4563; AVX2-NEXT:    vpsllw $7, %ymm2, %ymm2
4564; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
4565; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
4566; AVX2-NEXT:    addq $8, %rsp
4567; AVX2-NEXT:    popq %rbx
4568; AVX2-NEXT:    popq %r12
4569; AVX2-NEXT:    popq %r13
4570; AVX2-NEXT:    popq %r14
4571; AVX2-NEXT:    popq %r15
4572; AVX2-NEXT:    popq %rbp
4573; AVX2-NEXT:    retq
4574;
4575; AVX512F-LABEL: test_mask_load_64xi8:
4576; AVX512F:       ## BB#0:
4577; AVX512F-NEXT:    pushq %rbp
4578; AVX512F-NEXT:  Ltmp0:
4579; AVX512F-NEXT:    .cfi_def_cfa_offset 16
4580; AVX512F-NEXT:    pushq %r15
4581; AVX512F-NEXT:  Ltmp1:
4582; AVX512F-NEXT:    .cfi_def_cfa_offset 24
4583; AVX512F-NEXT:    pushq %r14
4584; AVX512F-NEXT:  Ltmp2:
4585; AVX512F-NEXT:    .cfi_def_cfa_offset 32
4586; AVX512F-NEXT:    pushq %r13
4587; AVX512F-NEXT:  Ltmp3:
4588; AVX512F-NEXT:    .cfi_def_cfa_offset 40
4589; AVX512F-NEXT:    pushq %r12
4590; AVX512F-NEXT:  Ltmp4:
4591; AVX512F-NEXT:    .cfi_def_cfa_offset 48
4592; AVX512F-NEXT:    pushq %rbx
4593; AVX512F-NEXT:  Ltmp5:
4594; AVX512F-NEXT:    .cfi_def_cfa_offset 56
4595; AVX512F-NEXT:    subq $76, %rsp
4596; AVX512F-NEXT:  Ltmp6:
4597; AVX512F-NEXT:    .cfi_def_cfa_offset 132
4598; AVX512F-NEXT:  Ltmp7:
4599; AVX512F-NEXT:    .cfi_offset %rbx, -56
4600; AVX512F-NEXT:  Ltmp8:
4601; AVX512F-NEXT:    .cfi_offset %r12, -48
4602; AVX512F-NEXT:  Ltmp9:
4603; AVX512F-NEXT:    .cfi_offset %r13, -40
4604; AVX512F-NEXT:  Ltmp10:
4605; AVX512F-NEXT:    .cfi_offset %r14, -32
4606; AVX512F-NEXT:  Ltmp11:
4607; AVX512F-NEXT:    .cfi_offset %r15, -24
4608; AVX512F-NEXT:  Ltmp12:
4609; AVX512F-NEXT:    .cfi_offset %rbp, -16
4610; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
4611; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
4612; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
4613; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
4614; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4615; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4616; AVX512F-NEXT:    kmovw %k1, %eax
4617; AVX512F-NEXT:    testb %al, %al
4618; AVX512F-NEXT:    je LBB52_2
4619; AVX512F-NEXT:  ## BB#1: ## %cond.load
4620; AVX512F-NEXT:    movzbl (%rdi), %eax
4621; AVX512F-NEXT:    vmovd %eax, %xmm0
4622; AVX512F-NEXT:  LBB52_2: ## %else
4623; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
4624; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4625; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4626; AVX512F-NEXT:    kmovw %k1, %eax
4627; AVX512F-NEXT:    testb %al, %al
4628; AVX512F-NEXT:    je LBB52_4
4629; AVX512F-NEXT:  ## BB#3: ## %cond.load1
4630; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm6
4631; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4632; AVX512F-NEXT:  LBB52_4: ## %else2
4633; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
4634; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4635; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4636; AVX512F-NEXT:    kmovw %k1, %eax
4637; AVX512F-NEXT:    testb %al, %al
4638; AVX512F-NEXT:    je LBB52_6
4639; AVX512F-NEXT:  ## BB#5: ## %cond.load4
4640; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm6
4641; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4642; AVX512F-NEXT:  LBB52_6: ## %else5
4643; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
4644; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4645; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4646; AVX512F-NEXT:    kmovw %k1, %eax
4647; AVX512F-NEXT:    testb %al, %al
4648; AVX512F-NEXT:    je LBB52_8
4649; AVX512F-NEXT:  ## BB#7: ## %cond.load7
4650; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm6
4651; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4652; AVX512F-NEXT:  LBB52_8: ## %else8
4653; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
4654; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4655; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4656; AVX512F-NEXT:    kmovw %k1, %eax
4657; AVX512F-NEXT:    testb %al, %al
4658; AVX512F-NEXT:    je LBB52_10
4659; AVX512F-NEXT:  ## BB#9: ## %cond.load10
4660; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm6
4661; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4662; AVX512F-NEXT:  LBB52_10: ## %else11
4663; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
4664; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4665; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4666; AVX512F-NEXT:    kmovw %k1, %eax
4667; AVX512F-NEXT:    testb %al, %al
4668; AVX512F-NEXT:    je LBB52_12
4669; AVX512F-NEXT:  ## BB#11: ## %cond.load13
4670; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm6
4671; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4672; AVX512F-NEXT:  LBB52_12: ## %else14
4673; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
4674; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4675; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4676; AVX512F-NEXT:    kmovw %k1, %eax
4677; AVX512F-NEXT:    testb %al, %al
4678; AVX512F-NEXT:    je LBB52_14
4679; AVX512F-NEXT:  ## BB#13: ## %cond.load16
4680; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm6
4681; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4682; AVX512F-NEXT:  LBB52_14: ## %else17
4683; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
4684; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4685; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4686; AVX512F-NEXT:    kmovw %k1, %eax
4687; AVX512F-NEXT:    testb %al, %al
4688; AVX512F-NEXT:    je LBB52_16
4689; AVX512F-NEXT:  ## BB#15: ## %cond.load19
4690; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm6
4691; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4692; AVX512F-NEXT:  LBB52_16: ## %else20
4693; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
4694; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4695; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4696; AVX512F-NEXT:    kmovw %k1, %eax
4697; AVX512F-NEXT:    testb %al, %al
4698; AVX512F-NEXT:    je LBB52_18
4699; AVX512F-NEXT:  ## BB#17: ## %cond.load22
4700; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm6
4701; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4702; AVX512F-NEXT:  LBB52_18: ## %else23
4703; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
4704; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4705; AVX512F-NEXT:    kmovw %k1, (%rsp) ## 2-byte Folded Spill
4706; AVX512F-NEXT:    kmovw %k1, %eax
4707; AVX512F-NEXT:    testb %al, %al
4708; AVX512F-NEXT:    je LBB52_20
4709; AVX512F-NEXT:  ## BB#19: ## %cond.load25
4710; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm6
4711; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4712; AVX512F-NEXT:  LBB52_20: ## %else26
4713; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
4714; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4715; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4716; AVX512F-NEXT:    kmovw %k1, %eax
4717; AVX512F-NEXT:    testb %al, %al
4718; AVX512F-NEXT:    je LBB52_22
4719; AVX512F-NEXT:  ## BB#21: ## %cond.load28
4720; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm6
4721; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4722; AVX512F-NEXT:  LBB52_22: ## %else29
4723; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
4724; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4725; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4726; AVX512F-NEXT:    kmovw %k1, %eax
4727; AVX512F-NEXT:    testb %al, %al
4728; AVX512F-NEXT:    je LBB52_24
4729; AVX512F-NEXT:  ## BB#23: ## %cond.load31
4730; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm6
4731; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4732; AVX512F-NEXT:  LBB52_24: ## %else32
4733; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
4734; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4735; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4736; AVX512F-NEXT:    kmovw %k1, %eax
4737; AVX512F-NEXT:    testb %al, %al
4738; AVX512F-NEXT:    je LBB52_26
4739; AVX512F-NEXT:  ## BB#25: ## %cond.load34
4740; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm6
4741; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4742; AVX512F-NEXT:  LBB52_26: ## %else35
4743; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm1
4744; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
4745; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4746; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4747; AVX512F-NEXT:    kmovw %k1, %eax
4748; AVX512F-NEXT:    testb %al, %al
4749; AVX512F-NEXT:    je LBB52_28
4750; AVX512F-NEXT:  ## BB#27: ## %cond.load37
4751; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm6
4752; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4753; AVX512F-NEXT:  LBB52_28: ## %else38
4754; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
4755; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
4756; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4757; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4758; AVX512F-NEXT:    kmovw %k1, %eax
4759; AVX512F-NEXT:    testb %al, %al
4760; AVX512F-NEXT:    je LBB52_30
4761; AVX512F-NEXT:  ## BB#29: ## %cond.load40
4762; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm6
4763; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7]
4764; AVX512F-NEXT:  LBB52_30: ## %else41
4765; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
4766; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
4767; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4768; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4769; AVX512F-NEXT:    kmovw %k0, %eax
4770; AVX512F-NEXT:    testb %al, %al
4771; AVX512F-NEXT:    je LBB52_32
4772; AVX512F-NEXT:  ## BB#31: ## %cond.load43
4773; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm1
4774; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4775; AVX512F-NEXT:  LBB52_32: ## %else44
4776; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
4777; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4778; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4779; AVX512F-NEXT:    kmovw %k0, %eax
4780; AVX512F-NEXT:    testb %al, %al
4781; AVX512F-NEXT:    je LBB52_34
4782; AVX512F-NEXT:  ## BB#33: ## %cond.load46
4783; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4784; AVX512F-NEXT:    vpinsrb $0, 16(%rdi), %xmm1, %xmm1
4785; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4786; AVX512F-NEXT:  LBB52_34: ## %else47
4787; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
4788; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4789; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4790; AVX512F-NEXT:    kmovw %k0, %eax
4791; AVX512F-NEXT:    testb %al, %al
4792; AVX512F-NEXT:    je LBB52_36
4793; AVX512F-NEXT:  ## BB#35: ## %cond.load49
4794; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4795; AVX512F-NEXT:    vpinsrb $1, 17(%rdi), %xmm1, %xmm1
4796; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4797; AVX512F-NEXT:  LBB52_36: ## %else50
4798; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
4799; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4800; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4801; AVX512F-NEXT:    kmovw %k0, %eax
4802; AVX512F-NEXT:    testb %al, %al
4803; AVX512F-NEXT:    je LBB52_38
4804; AVX512F-NEXT:  ## BB#37: ## %cond.load52
4805; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4806; AVX512F-NEXT:    vpinsrb $2, 18(%rdi), %xmm1, %xmm1
4807; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4808; AVX512F-NEXT:  LBB52_38: ## %else53
4809; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
4810; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4811; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4812; AVX512F-NEXT:    kmovw %k0, %eax
4813; AVX512F-NEXT:    testb %al, %al
4814; AVX512F-NEXT:    je LBB52_40
4815; AVX512F-NEXT:  ## BB#39: ## %cond.load55
4816; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4817; AVX512F-NEXT:    vpinsrb $3, 19(%rdi), %xmm1, %xmm1
4818; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4819; AVX512F-NEXT:  LBB52_40: ## %else56
4820; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
4821; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4822; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4823; AVX512F-NEXT:    kmovw %k0, %eax
4824; AVX512F-NEXT:    testb %al, %al
4825; AVX512F-NEXT:    je LBB52_42
4826; AVX512F-NEXT:  ## BB#41: ## %cond.load58
4827; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4828; AVX512F-NEXT:    vpinsrb $4, 20(%rdi), %xmm1, %xmm1
4829; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4830; AVX512F-NEXT:  LBB52_42: ## %else59
4831; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
4832; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4833; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4834; AVX512F-NEXT:    kmovw %k0, %eax
4835; AVX512F-NEXT:    testb %al, %al
4836; AVX512F-NEXT:    je LBB52_44
4837; AVX512F-NEXT:  ## BB#43: ## %cond.load61
4838; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4839; AVX512F-NEXT:    vpinsrb $5, 21(%rdi), %xmm1, %xmm1
4840; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4841; AVX512F-NEXT:  LBB52_44: ## %else62
4842; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
4843; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4844; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4845; AVX512F-NEXT:    kmovw %k0, %eax
4846; AVX512F-NEXT:    testb %al, %al
4847; AVX512F-NEXT:    je LBB52_46
4848; AVX512F-NEXT:  ## BB#45: ## %cond.load64
4849; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4850; AVX512F-NEXT:    vpinsrb $6, 22(%rdi), %xmm1, %xmm1
4851; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4852; AVX512F-NEXT:  LBB52_46: ## %else65
4853; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
4854; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4855; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4856; AVX512F-NEXT:    kmovw %k0, %eax
4857; AVX512F-NEXT:    testb %al, %al
4858; AVX512F-NEXT:    je LBB52_48
4859; AVX512F-NEXT:  ## BB#47: ## %cond.load67
4860; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4861; AVX512F-NEXT:    vpinsrb $7, 23(%rdi), %xmm1, %xmm1
4862; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4863; AVX512F-NEXT:  LBB52_48: ## %else68
4864; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
4865; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4866; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4867; AVX512F-NEXT:    kmovw %k0, %eax
4868; AVX512F-NEXT:    testb %al, %al
4869; AVX512F-NEXT:    je LBB52_50
4870; AVX512F-NEXT:  ## BB#49: ## %cond.load70
4871; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4872; AVX512F-NEXT:    vpinsrb $8, 24(%rdi), %xmm1, %xmm1
4873; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4874; AVX512F-NEXT:  LBB52_50: ## %else71
4875; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
4876; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4877; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4878; AVX512F-NEXT:    kmovw %k0, %eax
4879; AVX512F-NEXT:    testb %al, %al
4880; AVX512F-NEXT:    je LBB52_52
4881; AVX512F-NEXT:  ## BB#51: ## %cond.load73
4882; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4883; AVX512F-NEXT:    vpinsrb $9, 25(%rdi), %xmm1, %xmm1
4884; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4885; AVX512F-NEXT:  LBB52_52: ## %else74
4886; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
4887; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4888; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4889; AVX512F-NEXT:    kmovw %k0, %eax
4890; AVX512F-NEXT:    testb %al, %al
4891; AVX512F-NEXT:    je LBB52_54
4892; AVX512F-NEXT:  ## BB#53: ## %cond.load76
4893; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4894; AVX512F-NEXT:    vpinsrb $10, 26(%rdi), %xmm1, %xmm1
4895; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4896; AVX512F-NEXT:  LBB52_54: ## %else77
4897; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
4898; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4899; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4900; AVX512F-NEXT:    kmovw %k0, %eax
4901; AVX512F-NEXT:    testb %al, %al
4902; AVX512F-NEXT:    je LBB52_56
4903; AVX512F-NEXT:  ## BB#55: ## %cond.load79
4904; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4905; AVX512F-NEXT:    vpinsrb $11, 27(%rdi), %xmm1, %xmm1
4906; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4907; AVX512F-NEXT:  LBB52_56: ## %else80
4908; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
4909; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4910; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4911; AVX512F-NEXT:    kmovw %k0, %eax
4912; AVX512F-NEXT:    testb %al, %al
4913; AVX512F-NEXT:    je LBB52_58
4914; AVX512F-NEXT:  ## BB#57: ## %cond.load82
4915; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4916; AVX512F-NEXT:    vpinsrb $12, 28(%rdi), %xmm1, %xmm1
4917; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4918; AVX512F-NEXT:  LBB52_58: ## %else83
4919; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm1
4920; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
4921; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4922; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4923; AVX512F-NEXT:    kmovw %k0, %eax
4924; AVX512F-NEXT:    testb %al, %al
4925; AVX512F-NEXT:    je LBB52_60
4926; AVX512F-NEXT:  ## BB#59: ## %cond.load85
4927; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
4928; AVX512F-NEXT:    vpinsrb $13, 29(%rdi), %xmm2, %xmm2
4929; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
4930; AVX512F-NEXT:  LBB52_60: ## %else86
4931; AVX512F-NEXT:    vpslld $31, %zmm1, %zmm1
4932; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
4933; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
4934; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4935; AVX512F-NEXT:    kmovw %k0, %eax
4936; AVX512F-NEXT:    testb %al, %al
4937; AVX512F-NEXT:    je LBB52_62
4938; AVX512F-NEXT:  ## BB#61: ## %cond.load88
4939; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
4940; AVX512F-NEXT:    vpinsrb $14, 30(%rdi), %xmm2, %xmm2
4941; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
4942; AVX512F-NEXT:  LBB52_62: ## %else89
4943; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
4944; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
4945; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4946; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4947; AVX512F-NEXT:    kmovw %k1, %eax
4948; AVX512F-NEXT:    testb %al, %al
4949; AVX512F-NEXT:    je LBB52_64
4950; AVX512F-NEXT:  ## BB#63: ## %cond.load91
4951; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
4952; AVX512F-NEXT:    vpinsrb $15, 31(%rdi), %xmm1, %xmm1
4953; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
4954; AVX512F-NEXT:  LBB52_64: ## %else92
4955; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
4956; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4957; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4958; AVX512F-NEXT:    kmovw %k1, %eax
4959; AVX512F-NEXT:    testb %al, %al
4960; AVX512F-NEXT:    je LBB52_66
4961; AVX512F-NEXT:  ## BB#65: ## %cond.load94
4962; AVX512F-NEXT:    vpinsrb $0, 32(%rdi), %xmm0, %xmm1
4963; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
4964; AVX512F-NEXT:  LBB52_66: ## %else95
4965; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
4966; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4967; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4968; AVX512F-NEXT:    kmovw %k1, %eax
4969; AVX512F-NEXT:    testb %al, %al
4970; AVX512F-NEXT:    je LBB52_68
4971; AVX512F-NEXT:  ## BB#67: ## %cond.load97
4972; AVX512F-NEXT:    vpinsrb $1, 33(%rdi), %xmm1, %xmm2
4973; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
4974; AVX512F-NEXT:  LBB52_68: ## %else98
4975; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
4976; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4977; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4978; AVX512F-NEXT:    kmovw %k1, %eax
4979; AVX512F-NEXT:    testb %al, %al
4980; AVX512F-NEXT:    je LBB52_70
4981; AVX512F-NEXT:  ## BB#69: ## %cond.load100
4982; AVX512F-NEXT:    vpinsrb $2, 34(%rdi), %xmm1, %xmm2
4983; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
4984; AVX512F-NEXT:  LBB52_70: ## %else101
4985; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
4986; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4987; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4988; AVX512F-NEXT:    kmovw %k1, %eax
4989; AVX512F-NEXT:    testb %al, %al
4990; AVX512F-NEXT:    je LBB52_72
4991; AVX512F-NEXT:  ## BB#71: ## %cond.load103
4992; AVX512F-NEXT:    vpinsrb $3, 35(%rdi), %xmm1, %xmm2
4993; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
4994; AVX512F-NEXT:  LBB52_72: ## %else104
4995; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
4996; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
4997; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
4998; AVX512F-NEXT:    kmovw %k1, %eax
4999; AVX512F-NEXT:    testb %al, %al
5000; AVX512F-NEXT:    je LBB52_74
5001; AVX512F-NEXT:  ## BB#73: ## %cond.load106
5002; AVX512F-NEXT:    vpinsrb $4, 36(%rdi), %xmm1, %xmm2
5003; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5004; AVX512F-NEXT:  LBB52_74: ## %else107
5005; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
5006; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5007; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5008; AVX512F-NEXT:    kmovw %k1, %eax
5009; AVX512F-NEXT:    testb %al, %al
5010; AVX512F-NEXT:    je LBB52_76
5011; AVX512F-NEXT:  ## BB#75: ## %cond.load109
5012; AVX512F-NEXT:    vpinsrb $5, 37(%rdi), %xmm1, %xmm2
5013; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5014; AVX512F-NEXT:  LBB52_76: ## %else110
5015; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
5016; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5017; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5018; AVX512F-NEXT:    kmovw %k1, %eax
5019; AVX512F-NEXT:    testb %al, %al
5020; AVX512F-NEXT:    je LBB52_78
5021; AVX512F-NEXT:  ## BB#77: ## %cond.load112
5022; AVX512F-NEXT:    vpinsrb $6, 38(%rdi), %xmm1, %xmm2
5023; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5024; AVX512F-NEXT:  LBB52_78: ## %else113
5025; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
5026; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5027; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5028; AVX512F-NEXT:    kmovw %k1, %eax
5029; AVX512F-NEXT:    testb %al, %al
5030; AVX512F-NEXT:    je LBB52_80
5031; AVX512F-NEXT:  ## BB#79: ## %cond.load115
5032; AVX512F-NEXT:    vpinsrb $7, 39(%rdi), %xmm1, %xmm2
5033; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5034; AVX512F-NEXT:  LBB52_80: ## %else116
5035; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
5036; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5037; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5038; AVX512F-NEXT:    kmovw %k1, %eax
5039; AVX512F-NEXT:    testb %al, %al
5040; AVX512F-NEXT:    je LBB52_82
5041; AVX512F-NEXT:  ## BB#81: ## %cond.load118
5042; AVX512F-NEXT:    vpinsrb $8, 40(%rdi), %xmm1, %xmm2
5043; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5044; AVX512F-NEXT:  LBB52_82: ## %else119
5045; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
5046; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5047; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5048; AVX512F-NEXT:    kmovw %k1, %eax
5049; AVX512F-NEXT:    testb %al, %al
5050; AVX512F-NEXT:    je LBB52_84
5051; AVX512F-NEXT:  ## BB#83: ## %cond.load121
5052; AVX512F-NEXT:    vpinsrb $9, 41(%rdi), %xmm1, %xmm2
5053; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5054; AVX512F-NEXT:  LBB52_84: ## %else122
5055; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
5056; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5057; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5058; AVX512F-NEXT:    kmovw %k1, %eax
5059; AVX512F-NEXT:    testb %al, %al
5060; AVX512F-NEXT:    je LBB52_86
5061; AVX512F-NEXT:  ## BB#85: ## %cond.load124
5062; AVX512F-NEXT:    vpinsrb $10, 42(%rdi), %xmm1, %xmm2
5063; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5064; AVX512F-NEXT:  LBB52_86: ## %else125
5065; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
5066; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5067; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5068; AVX512F-NEXT:    kmovw %k1, %eax
5069; AVX512F-NEXT:    testb %al, %al
5070; AVX512F-NEXT:    je LBB52_88
5071; AVX512F-NEXT:  ## BB#87: ## %cond.load127
5072; AVX512F-NEXT:    vpinsrb $11, 43(%rdi), %xmm1, %xmm2
5073; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5074; AVX512F-NEXT:  LBB52_88: ## %else128
5075; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
5076; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5077; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5078; AVX512F-NEXT:    kmovw %k1, %eax
5079; AVX512F-NEXT:    testb %al, %al
5080; AVX512F-NEXT:    je LBB52_90
5081; AVX512F-NEXT:  ## BB#89: ## %cond.load130
5082; AVX512F-NEXT:    vpinsrb $12, 44(%rdi), %xmm1, %xmm2
5083; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5084; AVX512F-NEXT:  LBB52_90: ## %else131
5085; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm2
5086; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
5087; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5088; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5089; AVX512F-NEXT:    kmovw %k1, %eax
5090; AVX512F-NEXT:    testb %al, %al
5091; AVX512F-NEXT:    je LBB52_92
5092; AVX512F-NEXT:  ## BB#91: ## %cond.load133
5093; AVX512F-NEXT:    vpinsrb $13, 45(%rdi), %xmm1, %xmm3
5094; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
5095; AVX512F-NEXT:  LBB52_92: ## %else134
5096; AVX512F-NEXT:    vpslld $31, %zmm2, %zmm2
5097; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
5098; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5099; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5100; AVX512F-NEXT:    kmovw %k1, %eax
5101; AVX512F-NEXT:    testb %al, %al
5102; AVX512F-NEXT:    je LBB52_94
5103; AVX512F-NEXT:  ## BB#93: ## %cond.load136
5104; AVX512F-NEXT:    vpinsrb $14, 46(%rdi), %xmm1, %xmm3
5105; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
5106; AVX512F-NEXT:  LBB52_94: ## %else137
5107; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k1
5108; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
5109; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5110; AVX512F-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5111; AVX512F-NEXT:    kmovw %k0, %eax
5112; AVX512F-NEXT:    testb %al, %al
5113; AVX512F-NEXT:    je LBB52_96
5114; AVX512F-NEXT:  ## BB#95: ## %cond.load139
5115; AVX512F-NEXT:    vpinsrb $15, 47(%rdi), %xmm1, %xmm2
5116; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5117; AVX512F-NEXT:  LBB52_96: ## %else140
5118; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
5119; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5120; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5121; AVX512F-NEXT:    kmovw %k0, %eax
5122; AVX512F-NEXT:    testb %al, %al
5123; AVX512F-NEXT:    je LBB52_98
5124; AVX512F-NEXT:  ## BB#97: ## %cond.load142
5125; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5126; AVX512F-NEXT:    vpinsrb $0, 48(%rdi), %xmm2, %xmm2
5127; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5128; AVX512F-NEXT:  LBB52_98: ## %else143
5129; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
5130; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5131; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5132; AVX512F-NEXT:    kmovw %k0, %eax
5133; AVX512F-NEXT:    testb %al, %al
5134; AVX512F-NEXT:    je LBB52_100
5135; AVX512F-NEXT:  ## BB#99: ## %cond.load145
5136; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5137; AVX512F-NEXT:    vpinsrb $1, 49(%rdi), %xmm2, %xmm2
5138; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5139; AVX512F-NEXT:  LBB52_100: ## %else146
5140; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
5141; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5142; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5143; AVX512F-NEXT:    kmovw %k0, %eax
5144; AVX512F-NEXT:    testb %al, %al
5145; AVX512F-NEXT:    je LBB52_102
5146; AVX512F-NEXT:  ## BB#101: ## %cond.load148
5147; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5148; AVX512F-NEXT:    vpinsrb $2, 50(%rdi), %xmm2, %xmm2
5149; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5150; AVX512F-NEXT:  LBB52_102: ## %else149
5151; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
5152; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5153; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5154; AVX512F-NEXT:    kmovw %k0, %eax
5155; AVX512F-NEXT:    testb %al, %al
5156; AVX512F-NEXT:    je LBB52_104
5157; AVX512F-NEXT:  ## BB#103: ## %cond.load151
5158; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5159; AVX512F-NEXT:    vpinsrb $3, 51(%rdi), %xmm2, %xmm2
5160; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5161; AVX512F-NEXT:  LBB52_104: ## %else152
5162; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
5163; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5164; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5165; AVX512F-NEXT:    kmovw %k0, %eax
5166; AVX512F-NEXT:    testb %al, %al
5167; AVX512F-NEXT:    je LBB52_106
5168; AVX512F-NEXT:  ## BB#105: ## %cond.load154
5169; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5170; AVX512F-NEXT:    vpinsrb $4, 52(%rdi), %xmm2, %xmm2
5171; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5172; AVX512F-NEXT:  LBB52_106: ## %else155
5173; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
5174; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5175; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5176; AVX512F-NEXT:    kmovw %k0, %eax
5177; AVX512F-NEXT:    testb %al, %al
5178; AVX512F-NEXT:    je LBB52_108
5179; AVX512F-NEXT:  ## BB#107: ## %cond.load157
5180; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5181; AVX512F-NEXT:    vpinsrb $5, 53(%rdi), %xmm2, %xmm2
5182; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5183; AVX512F-NEXT:  LBB52_108: ## %else158
5184; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
5185; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5186; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5187; AVX512F-NEXT:    kmovw %k0, %eax
5188; AVX512F-NEXT:    testb %al, %al
5189; AVX512F-NEXT:    je LBB52_110
5190; AVX512F-NEXT:  ## BB#109: ## %cond.load160
5191; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5192; AVX512F-NEXT:    vpinsrb $6, 54(%rdi), %xmm2, %xmm2
5193; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5194; AVX512F-NEXT:  LBB52_110: ## %else161
5195; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
5196; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5197; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5198; AVX512F-NEXT:    kmovw %k0, %eax
5199; AVX512F-NEXT:    testb %al, %al
5200; AVX512F-NEXT:    je LBB52_112
5201; AVX512F-NEXT:  ## BB#111: ## %cond.load163
5202; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5203; AVX512F-NEXT:    vpinsrb $7, 55(%rdi), %xmm2, %xmm2
5204; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5205; AVX512F-NEXT:  LBB52_112: ## %else164
5206; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
5207; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5208; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill
5209; AVX512F-NEXT:    kmovw %k0, %eax
5210; AVX512F-NEXT:    testb %al, %al
5211; AVX512F-NEXT:    je LBB52_114
5212; AVX512F-NEXT:  ## BB#113: ## %cond.load166
5213; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5214; AVX512F-NEXT:    vpinsrb $8, 56(%rdi), %xmm2, %xmm2
5215; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5216; AVX512F-NEXT:  LBB52_114: ## %else167
5217; AVX512F-NEXT:    kshiftlw $6, %k1, %k2
5218; AVX512F-NEXT:    kshiftrw $15, %k2, %k2
5219; AVX512F-NEXT:    kmovw %k2, %eax
5220; AVX512F-NEXT:    testb %al, %al
5221; AVX512F-NEXT:    je LBB52_116
5222; AVX512F-NEXT:  ## BB#115: ## %cond.load169
5223; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5224; AVX512F-NEXT:    vpinsrb $9, 57(%rdi), %xmm2, %xmm2
5225; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5226; AVX512F-NEXT:  LBB52_116: ## %else170
5227; AVX512F-NEXT:    kshiftlw $5, %k1, %k3
5228; AVX512F-NEXT:    kshiftrw $15, %k3, %k3
5229; AVX512F-NEXT:    kmovw %k3, %eax
5230; AVX512F-NEXT:    testb %al, %al
5231; AVX512F-NEXT:    je LBB52_118
5232; AVX512F-NEXT:  ## BB#117: ## %cond.load172
5233; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5234; AVX512F-NEXT:    vpinsrb $10, 58(%rdi), %xmm2, %xmm2
5235; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5236; AVX512F-NEXT:  LBB52_118: ## %else173
5237; AVX512F-NEXT:    kshiftlw $4, %k1, %k4
5238; AVX512F-NEXT:    kshiftrw $15, %k4, %k4
5239; AVX512F-NEXT:    kmovw %k4, %eax
5240; AVX512F-NEXT:    testb %al, %al
5241; AVX512F-NEXT:    je LBB52_120
5242; AVX512F-NEXT:  ## BB#119: ## %cond.load175
5243; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5244; AVX512F-NEXT:    vpinsrb $11, 59(%rdi), %xmm2, %xmm2
5245; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5246; AVX512F-NEXT:  LBB52_120: ## %else176
5247; AVX512F-NEXT:    kshiftlw $3, %k1, %k5
5248; AVX512F-NEXT:    kshiftrw $15, %k5, %k5
5249; AVX512F-NEXT:    kmovw %k5, %eax
5250; AVX512F-NEXT:    testb %al, %al
5251; AVX512F-NEXT:    je LBB52_122
5252; AVX512F-NEXT:  ## BB#121: ## %cond.load178
5253; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5254; AVX512F-NEXT:    vpinsrb $12, 60(%rdi), %xmm2, %xmm2
5255; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5256; AVX512F-NEXT:  LBB52_122: ## %else179
5257; AVX512F-NEXT:    kshiftlw $2, %k1, %k6
5258; AVX512F-NEXT:    kshiftrw $15, %k6, %k6
5259; AVX512F-NEXT:    kmovw %k6, %eax
5260; AVX512F-NEXT:    testb %al, %al
5261; AVX512F-NEXT:    je LBB52_124
5262; AVX512F-NEXT:  ## BB#123: ## %cond.load181
5263; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5264; AVX512F-NEXT:    vpinsrb $13, 61(%rdi), %xmm2, %xmm2
5265; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5266; AVX512F-NEXT:  LBB52_124: ## %else182
5267; AVX512F-NEXT:    kshiftlw $1, %k1, %k7
5268; AVX512F-NEXT:    kshiftrw $15, %k7, %k7
5269; AVX512F-NEXT:    kmovw %k7, %eax
5270; AVX512F-NEXT:    testb %al, %al
5271; AVX512F-NEXT:    je LBB52_126
5272; AVX512F-NEXT:  ## BB#125: ## %cond.load184
5273; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5274; AVX512F-NEXT:    vpinsrb $14, 62(%rdi), %xmm2, %xmm2
5275; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5276; AVX512F-NEXT:  LBB52_126: ## %else185
5277; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
5278; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
5279; AVX512F-NEXT:    kmovw %k1, %eax
5280; AVX512F-NEXT:    testb %al, %al
5281; AVX512F-NEXT:    je LBB52_128
5282; AVX512F-NEXT:  ## BB#127: ## %cond.load187
5283; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
5284; AVX512F-NEXT:    vpinsrb $15, 63(%rdi), %xmm2, %xmm2
5285; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5286; AVX512F-NEXT:  LBB52_128: ## %else188
5287; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5288; AVX512F-NEXT:    kmovw %k0, %eax
5289; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5290; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5291; AVX512F-NEXT:    kmovw %k0, %eax
5292; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5293; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5294; AVX512F-NEXT:    kmovw %k0, %eax
5295; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5296; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5297; AVX512F-NEXT:    kmovw %k0, %eax
5298; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5299; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5300; AVX512F-NEXT:    kmovw %k0, %eax
5301; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5302; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5303; AVX512F-NEXT:    kmovw %k0, %eax
5304; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5305; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5306; AVX512F-NEXT:    kmovw %k0, %eax
5307; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5308; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5309; AVX512F-NEXT:    kmovw %k0, %eax
5310; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5311; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5312; AVX512F-NEXT:    kmovw %k0, %eax
5313; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5314; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5315; AVX512F-NEXT:    kmovw %k0, %eax
5316; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5317; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5318; AVX512F-NEXT:    kmovw %k0, %eax
5319; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5320; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5321; AVX512F-NEXT:    kmovw %k0, %eax
5322; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5323; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5324; AVX512F-NEXT:    kmovw %k0, %eax
5325; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5326; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5327; AVX512F-NEXT:    kmovw %k0, %eax
5328; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5329; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5330; AVX512F-NEXT:    kmovw %k0, %eax
5331; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5332; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5333; AVX512F-NEXT:    kmovw %k0, %eax
5334; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5335; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5336; AVX512F-NEXT:    kmovw %k0, %eax
5337; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5338; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5339; AVX512F-NEXT:    kmovw %k0, %eax
5340; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5341; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5342; AVX512F-NEXT:    kmovw %k0, %eax
5343; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5344; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5345; AVX512F-NEXT:    kmovw %k0, %eax
5346; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5347; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5348; AVX512F-NEXT:    kmovw %k0, %eax
5349; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5350; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5351; AVX512F-NEXT:    kmovw %k0, %eax
5352; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5353; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5354; AVX512F-NEXT:    kmovw %k0, %eax
5355; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5356; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5357; AVX512F-NEXT:    kmovw %k0, %eax
5358; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5359; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5360; AVX512F-NEXT:    kmovw %k0, %eax
5361; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5362; AVX512F-NEXT:    kmovw (%rsp), %k0 ## 2-byte Folded Reload
5363; AVX512F-NEXT:    kmovw %k0, %eax
5364; AVX512F-NEXT:    movl %eax, (%rsp) ## 4-byte Spill
5365; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5366; AVX512F-NEXT:    kmovw %k0, %eax
5367; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5368; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5369; AVX512F-NEXT:    kmovw %k0, %eax
5370; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5371; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5372; AVX512F-NEXT:    kmovw %k0, %eax
5373; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5374; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5375; AVX512F-NEXT:    kmovw %k0, %eax
5376; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5377; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5378; AVX512F-NEXT:    kmovw %k0, %eax
5379; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5380; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5381; AVX512F-NEXT:    kmovw %k0, %eax
5382; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5383; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5384; AVX512F-NEXT:    kmovw %k0, %eax
5385; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5386; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5387; AVX512F-NEXT:    kmovw %k0, %eax
5388; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5389; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5390; AVX512F-NEXT:    kmovw %k0, %eax
5391; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5392; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5393; AVX512F-NEXT:    kmovw %k0, %eax
5394; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5395; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5396; AVX512F-NEXT:    kmovw %k0, %eax
5397; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5398; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5399; AVX512F-NEXT:    kmovw %k0, %eax
5400; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5401; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5402; AVX512F-NEXT:    kmovw %k0, %eax
5403; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5404; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5405; AVX512F-NEXT:    kmovw %k0, %eax
5406; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5407; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5408; AVX512F-NEXT:    kmovw %k0, %eax
5409; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5410; AVX512F-NEXT:    kmovw %k2, %eax
5411; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
5412; AVX512F-NEXT:    kmovw %k3, %r12d
5413; AVX512F-NEXT:    kmovw %k4, %r15d
5414; AVX512F-NEXT:    kmovw %k5, %r14d
5415; AVX512F-NEXT:    kmovw %k6, %ebx
5416; AVX512F-NEXT:    kmovw %k7, %r11d
5417; AVX512F-NEXT:    kmovw %k1, %r10d
5418; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5419; AVX512F-NEXT:    kmovw %k0, %eax
5420; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
5421; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5422; AVX512F-NEXT:    kmovw %k0, %r8d
5423; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5424; AVX512F-NEXT:    kmovw %k0, %r9d
5425; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5426; AVX512F-NEXT:    kmovw %k0, %edi
5427; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5428; AVX512F-NEXT:    kmovw %k0, %esi
5429; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5430; AVX512F-NEXT:    kmovw %k0, %edx
5431; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5432; AVX512F-NEXT:    kmovw %k0, %ecx
5433; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5434; AVX512F-NEXT:    kmovw %k0, %eax
5435; AVX512F-NEXT:    movl -{{[0-9]+}}(%rsp), %r13d ## 4-byte Reload
5436; AVX512F-NEXT:    vmovd %r13d, %xmm2
5437; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5438; AVX512F-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5439; AVX512F-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5440; AVX512F-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5441; AVX512F-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5442; AVX512F-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5443; AVX512F-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5444; AVX512F-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5445; AVX512F-NEXT:    vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5446; AVX512F-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5447; AVX512F-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5448; AVX512F-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5449; AVX512F-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5450; AVX512F-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5451; AVX512F-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload
5452; AVX512F-NEXT:    movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
5453; AVX512F-NEXT:    vmovd %ebp, %xmm3
5454; AVX512F-NEXT:    vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5455; AVX512F-NEXT:    vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5456; AVX512F-NEXT:    vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5457; AVX512F-NEXT:    vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5458; AVX512F-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5459; AVX512F-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5460; AVX512F-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5461; AVX512F-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5462; AVX512F-NEXT:    vpinsrb $9, (%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5463; AVX512F-NEXT:    vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5464; AVX512F-NEXT:    vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5465; AVX512F-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5466; AVX512F-NEXT:    vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5467; AVX512F-NEXT:    vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5468; AVX512F-NEXT:    vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload
5469; AVX512F-NEXT:    movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload
5470; AVX512F-NEXT:    vmovd %ebp, %xmm6
5471; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5472; AVX512F-NEXT:    vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5473; AVX512F-NEXT:    vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5474; AVX512F-NEXT:    vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5475; AVX512F-NEXT:    vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5476; AVX512F-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5477; AVX512F-NEXT:    vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5478; AVX512F-NEXT:    vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5479; AVX512F-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
5480; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5481; AVX512F-NEXT:    kmovw %k0, %r13d
5482; AVX512F-NEXT:    vpinsrb $10, %r12d, %xmm6, %xmm6
5483; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5484; AVX512F-NEXT:    kmovw %k0, %r12d
5485; AVX512F-NEXT:    vpinsrb $11, %r15d, %xmm6, %xmm6
5486; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5487; AVX512F-NEXT:    kmovw %k0, %r15d
5488; AVX512F-NEXT:    vpinsrb $12, %r14d, %xmm6, %xmm6
5489; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5490; AVX512F-NEXT:    kmovw %k0, %r14d
5491; AVX512F-NEXT:    vpinsrb $13, %ebx, %xmm6, %xmm6
5492; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5493; AVX512F-NEXT:    kmovw %k0, %ebx
5494; AVX512F-NEXT:    vpinsrb $14, %r11d, %xmm6, %xmm6
5495; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5496; AVX512F-NEXT:    kmovw %k0, %r11d
5497; AVX512F-NEXT:    vpinsrb $15, %r10d, %xmm6, %xmm6
5498; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5499; AVX512F-NEXT:    kmovw %k0, %r10d
5500; AVX512F-NEXT:    vmovd %r8d, %xmm7
5501; AVX512F-NEXT:    kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload
5502; AVX512F-NEXT:    kmovw %k0, %r8d
5503; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
5504; AVX512F-NEXT:    vpsllw $7, %ymm2, %ymm2
5505; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
5506; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
5507; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm4, %ymm0
5508; AVX512F-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm7, %xmm2 ## 4-byte Folded Reload
5509; AVX512F-NEXT:    vpinsrb $2, %r9d, %xmm2, %xmm2
5510; AVX512F-NEXT:    vpinsrb $3, %edi, %xmm2, %xmm2
5511; AVX512F-NEXT:    vpinsrb $4, %esi, %xmm2, %xmm2
5512; AVX512F-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
5513; AVX512F-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
5514; AVX512F-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
5515; AVX512F-NEXT:    vpinsrb $8, %r13d, %xmm2, %xmm2
5516; AVX512F-NEXT:    vpinsrb $9, %r12d, %xmm2, %xmm2
5517; AVX512F-NEXT:    vpinsrb $10, %r15d, %xmm2, %xmm2
5518; AVX512F-NEXT:    vpinsrb $11, %r14d, %xmm2, %xmm2
5519; AVX512F-NEXT:    vpinsrb $12, %ebx, %xmm2, %xmm2
5520; AVX512F-NEXT:    vpinsrb $13, %r11d, %xmm2, %xmm2
5521; AVX512F-NEXT:    vpinsrb $14, %r10d, %xmm2, %xmm2
5522; AVX512F-NEXT:    vpinsrb $15, %r8d, %xmm2, %xmm2
5523; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm2, %ymm2
5524; AVX512F-NEXT:    vpsllw $7, %ymm2, %ymm2
5525; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
5526; AVX512F-NEXT:    vpblendvb %ymm2, %ymm1, %ymm5, %ymm1
5527; AVX512F-NEXT:    addq $76, %rsp
5528; AVX512F-NEXT:    popq %rbx
5529; AVX512F-NEXT:    popq %r12
5530; AVX512F-NEXT:    popq %r13
5531; AVX512F-NEXT:    popq %r14
5532; AVX512F-NEXT:    popq %r15
5533; AVX512F-NEXT:    popq %rbp
5534; AVX512F-NEXT:    retq
5535;
5536; SKX-LABEL: test_mask_load_64xi8:
5537; SKX:       ## BB#0:
5538; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
5539; SKX-NEXT:    vpmovb2m %zmm0, %k1
5540; SKX-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1}
5541; SKX-NEXT:    vmovaps %zmm1, %zmm0
5542; SKX-NEXT:    retq
5543  %res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
5544  ret <64 x i8> %res
5545}
5546declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>)
5547
5548define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
5549; AVX-LABEL: test_mask_load_8xi16:
5550; AVX:       ## BB#0:
5551; AVX-NEXT:    vpextrb $0, %xmm0, %eax
5552; AVX-NEXT:    ## implicit-def: %XMM1
5553; AVX-NEXT:    testb $1, %al
5554; AVX-NEXT:    je LBB53_2
5555; AVX-NEXT:  ## BB#1: ## %cond.load
5556; AVX-NEXT:    movzwl (%rdi), %eax
5557; AVX-NEXT:    vmovd %eax, %xmm1
5558; AVX-NEXT:  LBB53_2: ## %else
5559; AVX-NEXT:    vpextrb $2, %xmm0, %eax
5560; AVX-NEXT:    testb $1, %al
5561; AVX-NEXT:    je LBB53_4
5562; AVX-NEXT:  ## BB#3: ## %cond.load1
5563; AVX-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm1
5564; AVX-NEXT:  LBB53_4: ## %else2
5565; AVX-NEXT:    vpextrb $4, %xmm0, %eax
5566; AVX-NEXT:    testb $1, %al
5567; AVX-NEXT:    je LBB53_6
5568; AVX-NEXT:  ## BB#5: ## %cond.load4
5569; AVX-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm1
5570; AVX-NEXT:  LBB53_6: ## %else5
5571; AVX-NEXT:    vpextrb $6, %xmm0, %eax
5572; AVX-NEXT:    testb $1, %al
5573; AVX-NEXT:    je LBB53_8
5574; AVX-NEXT:  ## BB#7: ## %cond.load7
5575; AVX-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
5576; AVX-NEXT:  LBB53_8: ## %else8
5577; AVX-NEXT:    vpextrb $8, %xmm0, %eax
5578; AVX-NEXT:    testb $1, %al
5579; AVX-NEXT:    je LBB53_10
5580; AVX-NEXT:  ## BB#9: ## %cond.load10
5581; AVX-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm1
5582; AVX-NEXT:  LBB53_10: ## %else11
5583; AVX-NEXT:    vpextrb $10, %xmm0, %eax
5584; AVX-NEXT:    testb $1, %al
5585; AVX-NEXT:    je LBB53_12
5586; AVX-NEXT:  ## BB#11: ## %cond.load13
5587; AVX-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm1
5588; AVX-NEXT:  LBB53_12: ## %else14
5589; AVX-NEXT:    vpextrb $12, %xmm0, %eax
5590; AVX-NEXT:    testb $1, %al
5591; AVX-NEXT:    je LBB53_14
5592; AVX-NEXT:  ## BB#13: ## %cond.load16
5593; AVX-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm1
5594; AVX-NEXT:  LBB53_14: ## %else17
5595; AVX-NEXT:    vpextrb $14, %xmm0, %eax
5596; AVX-NEXT:    testb $1, %al
5597; AVX-NEXT:    je LBB53_16
5598; AVX-NEXT:  ## BB#15: ## %cond.load19
5599; AVX-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm1
5600; AVX-NEXT:  LBB53_16: ## %else20
5601; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
5602; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
5603; AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
5604; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm2
5605; AVX-NEXT:    vpand %xmm0, %xmm1, %xmm0
5606; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
5607; AVX-NEXT:    retq
5608;
5609; AVX512F-LABEL: test_mask_load_8xi16:
5610; AVX512F:       ## BB#0:
5611; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
5612; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
5613; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
5614; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
5615; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5616; AVX512F-NEXT:    kmovw %k0, %eax
5617; AVX512F-NEXT:    ## implicit-def: %XMM0
5618; AVX512F-NEXT:    testb %al, %al
5619; AVX512F-NEXT:    je LBB53_2
5620; AVX512F-NEXT:  ## BB#1: ## %cond.load
5621; AVX512F-NEXT:    movzwl (%rdi), %eax
5622; AVX512F-NEXT:    vmovd %eax, %xmm0
5623; AVX512F-NEXT:  LBB53_2: ## %else
5624; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
5625; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5626; AVX512F-NEXT:    kmovw %k0, %eax
5627; AVX512F-NEXT:    testb %al, %al
5628; AVX512F-NEXT:    je LBB53_4
5629; AVX512F-NEXT:  ## BB#3: ## %cond.load1
5630; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm0
5631; AVX512F-NEXT:  LBB53_4: ## %else2
5632; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
5633; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5634; AVX512F-NEXT:    kmovw %k0, %eax
5635; AVX512F-NEXT:    testb %al, %al
5636; AVX512F-NEXT:    je LBB53_6
5637; AVX512F-NEXT:  ## BB#5: ## %cond.load4
5638; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm0
5639; AVX512F-NEXT:  LBB53_6: ## %else5
5640; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
5641; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5642; AVX512F-NEXT:    kmovw %k0, %eax
5643; AVX512F-NEXT:    testb %al, %al
5644; AVX512F-NEXT:    je LBB53_8
5645; AVX512F-NEXT:  ## BB#7: ## %cond.load7
5646; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm0
5647; AVX512F-NEXT:  LBB53_8: ## %else8
5648; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
5649; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5650; AVX512F-NEXT:    kmovw %k0, %eax
5651; AVX512F-NEXT:    testb %al, %al
5652; AVX512F-NEXT:    je LBB53_10
5653; AVX512F-NEXT:  ## BB#9: ## %cond.load10
5654; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm0
5655; AVX512F-NEXT:  LBB53_10: ## %else11
5656; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
5657; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5658; AVX512F-NEXT:    kmovw %k0, %eax
5659; AVX512F-NEXT:    testb %al, %al
5660; AVX512F-NEXT:    je LBB53_12
5661; AVX512F-NEXT:  ## BB#11: ## %cond.load13
5662; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm0
5663; AVX512F-NEXT:  LBB53_12: ## %else14
5664; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
5665; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5666; AVX512F-NEXT:    kmovw %k0, %eax
5667; AVX512F-NEXT:    testb %al, %al
5668; AVX512F-NEXT:    je LBB53_14
5669; AVX512F-NEXT:  ## BB#13: ## %cond.load16
5670; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm0
5671; AVX512F-NEXT:  LBB53_14: ## %else17
5672; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
5673; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5674; AVX512F-NEXT:    kmovw %k0, %eax
5675; AVX512F-NEXT:    testb %al, %al
5676; AVX512F-NEXT:    je LBB53_16
5677; AVX512F-NEXT:  ## BB#15: ## %cond.load19
5678; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm0
5679; AVX512F-NEXT:  LBB53_16: ## %else20
5680; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
5681; AVX512F-NEXT:    vmovdqa64 %zmm1, %zmm1 {%k1} {z}
5682; AVX512F-NEXT:    vpmovqw %zmm1, %xmm1
5683; AVX512F-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
5684; AVX512F-NEXT:    vpxor %xmm2, %xmm1, %xmm2
5685; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
5686; AVX512F-NEXT:    vpor %xmm2, %xmm0, %xmm0
5687; AVX512F-NEXT:    retq
5688;
5689; SKX-LABEL: test_mask_load_8xi16:
5690; SKX:       ## BB#0:
5691; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
5692; SKX-NEXT:    vpmovw2m %xmm0, %k1
5693; SKX-NEXT:    vmovdqu16 (%rdi), %xmm0 {%k1} {z}
5694; SKX-NEXT:    retq
5695  %res = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
5696  ret <8 x i16> %res
5697}
5698declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
5699
5700define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
5701; AVX1-LABEL: test_mask_load_16xi16:
5702; AVX1:       ## BB#0:
5703; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
5704; AVX1-NEXT:    ## implicit-def: %YMM1
5705; AVX1-NEXT:    testb $1, %al
5706; AVX1-NEXT:    je LBB54_2
5707; AVX1-NEXT:  ## BB#1: ## %cond.load
5708; AVX1-NEXT:    movzwl (%rdi), %eax
5709; AVX1-NEXT:    vmovd %eax, %xmm1
5710; AVX1-NEXT:  LBB54_2: ## %else
5711; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
5712; AVX1-NEXT:    testb $1, %al
5713; AVX1-NEXT:    je LBB54_4
5714; AVX1-NEXT:  ## BB#3: ## %cond.load1
5715; AVX1-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm2
5716; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5717; AVX1-NEXT:  LBB54_4: ## %else2
5718; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
5719; AVX1-NEXT:    testb $1, %al
5720; AVX1-NEXT:    je LBB54_6
5721; AVX1-NEXT:  ## BB#5: ## %cond.load4
5722; AVX1-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm2
5723; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5724; AVX1-NEXT:  LBB54_6: ## %else5
5725; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
5726; AVX1-NEXT:    testb $1, %al
5727; AVX1-NEXT:    je LBB54_8
5728; AVX1-NEXT:  ## BB#7: ## %cond.load7
5729; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm2
5730; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5731; AVX1-NEXT:  LBB54_8: ## %else8
5732; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
5733; AVX1-NEXT:    testb $1, %al
5734; AVX1-NEXT:    je LBB54_10
5735; AVX1-NEXT:  ## BB#9: ## %cond.load10
5736; AVX1-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm2
5737; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5738; AVX1-NEXT:  LBB54_10: ## %else11
5739; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
5740; AVX1-NEXT:    testb $1, %al
5741; AVX1-NEXT:    je LBB54_12
5742; AVX1-NEXT:  ## BB#11: ## %cond.load13
5743; AVX1-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm2
5744; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5745; AVX1-NEXT:  LBB54_12: ## %else14
5746; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
5747; AVX1-NEXT:    testb $1, %al
5748; AVX1-NEXT:    je LBB54_14
5749; AVX1-NEXT:  ## BB#13: ## %cond.load16
5750; AVX1-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm2
5751; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5752; AVX1-NEXT:  LBB54_14: ## %else17
5753; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
5754; AVX1-NEXT:    testb $1, %al
5755; AVX1-NEXT:    je LBB54_16
5756; AVX1-NEXT:  ## BB#15: ## %cond.load19
5757; AVX1-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm2
5758; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5759; AVX1-NEXT:  LBB54_16: ## %else20
5760; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
5761; AVX1-NEXT:    testb $1, %al
5762; AVX1-NEXT:    je LBB54_18
5763; AVX1-NEXT:  ## BB#17: ## %cond.load22
5764; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5765; AVX1-NEXT:    vpinsrw $0, 16(%rdi), %xmm2, %xmm2
5766; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5767; AVX1-NEXT:  LBB54_18: ## %else23
5768; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
5769; AVX1-NEXT:    testb $1, %al
5770; AVX1-NEXT:    je LBB54_20
5771; AVX1-NEXT:  ## BB#19: ## %cond.load25
5772; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5773; AVX1-NEXT:    vpinsrw $1, 18(%rdi), %xmm2, %xmm2
5774; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5775; AVX1-NEXT:  LBB54_20: ## %else26
5776; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
5777; AVX1-NEXT:    testb $1, %al
5778; AVX1-NEXT:    je LBB54_22
5779; AVX1-NEXT:  ## BB#21: ## %cond.load28
5780; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5781; AVX1-NEXT:    vpinsrw $2, 20(%rdi), %xmm2, %xmm2
5782; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5783; AVX1-NEXT:  LBB54_22: ## %else29
5784; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
5785; AVX1-NEXT:    testb $1, %al
5786; AVX1-NEXT:    je LBB54_24
5787; AVX1-NEXT:  ## BB#23: ## %cond.load31
5788; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5789; AVX1-NEXT:    vpinsrw $3, 22(%rdi), %xmm2, %xmm2
5790; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5791; AVX1-NEXT:  LBB54_24: ## %else32
5792; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
5793; AVX1-NEXT:    testb $1, %al
5794; AVX1-NEXT:    je LBB54_26
5795; AVX1-NEXT:  ## BB#25: ## %cond.load34
5796; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5797; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm2, %xmm2
5798; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5799; AVX1-NEXT:  LBB54_26: ## %else35
5800; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
5801; AVX1-NEXT:    testb $1, %al
5802; AVX1-NEXT:    je LBB54_28
5803; AVX1-NEXT:  ## BB#27: ## %cond.load37
5804; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5805; AVX1-NEXT:    vpinsrw $5, 26(%rdi), %xmm2, %xmm2
5806; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5807; AVX1-NEXT:  LBB54_28: ## %else38
5808; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
5809; AVX1-NEXT:    testb $1, %al
5810; AVX1-NEXT:    je LBB54_30
5811; AVX1-NEXT:  ## BB#29: ## %cond.load40
5812; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5813; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm2, %xmm2
5814; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5815; AVX1-NEXT:  LBB54_30: ## %else41
5816; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
5817; AVX1-NEXT:    testb $1, %al
5818; AVX1-NEXT:    je LBB54_32
5819; AVX1-NEXT:  ## BB#31: ## %cond.load43
5820; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
5821; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm2, %xmm2
5822; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
5823; AVX1-NEXT:  LBB54_32: ## %else44
5824; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
5825; AVX1-NEXT:    vpsllw $15, %xmm2, %xmm2
5826; AVX1-NEXT:    vpsraw $15, %xmm2, %xmm2
5827; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
5828; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
5829; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
5830; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
5831; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
5832; AVX1-NEXT:    retq
5833;
5834; AVX2-LABEL: test_mask_load_16xi16:
5835; AVX2:       ## BB#0:
5836; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
5837; AVX2-NEXT:    ## implicit-def: %YMM1
5838; AVX2-NEXT:    testb $1, %al
5839; AVX2-NEXT:    je LBB54_2
5840; AVX2-NEXT:  ## BB#1: ## %cond.load
5841; AVX2-NEXT:    movzwl (%rdi), %eax
5842; AVX2-NEXT:    vmovd %eax, %xmm1
5843; AVX2-NEXT:  LBB54_2: ## %else
5844; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
5845; AVX2-NEXT:    testb $1, %al
5846; AVX2-NEXT:    je LBB54_4
5847; AVX2-NEXT:  ## BB#3: ## %cond.load1
5848; AVX2-NEXT:    vpinsrw $1, 2(%rdi), %xmm1, %xmm2
5849; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5850; AVX2-NEXT:  LBB54_4: ## %else2
5851; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
5852; AVX2-NEXT:    testb $1, %al
5853; AVX2-NEXT:    je LBB54_6
5854; AVX2-NEXT:  ## BB#5: ## %cond.load4
5855; AVX2-NEXT:    vpinsrw $2, 4(%rdi), %xmm1, %xmm2
5856; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5857; AVX2-NEXT:  LBB54_6: ## %else5
5858; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
5859; AVX2-NEXT:    testb $1, %al
5860; AVX2-NEXT:    je LBB54_8
5861; AVX2-NEXT:  ## BB#7: ## %cond.load7
5862; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm2
5863; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5864; AVX2-NEXT:  LBB54_8: ## %else8
5865; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
5866; AVX2-NEXT:    testb $1, %al
5867; AVX2-NEXT:    je LBB54_10
5868; AVX2-NEXT:  ## BB#9: ## %cond.load10
5869; AVX2-NEXT:    vpinsrw $4, 8(%rdi), %xmm1, %xmm2
5870; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5871; AVX2-NEXT:  LBB54_10: ## %else11
5872; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
5873; AVX2-NEXT:    testb $1, %al
5874; AVX2-NEXT:    je LBB54_12
5875; AVX2-NEXT:  ## BB#11: ## %cond.load13
5876; AVX2-NEXT:    vpinsrw $5, 10(%rdi), %xmm1, %xmm2
5877; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5878; AVX2-NEXT:  LBB54_12: ## %else14
5879; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
5880; AVX2-NEXT:    testb $1, %al
5881; AVX2-NEXT:    je LBB54_14
5882; AVX2-NEXT:  ## BB#13: ## %cond.load16
5883; AVX2-NEXT:    vpinsrw $6, 12(%rdi), %xmm1, %xmm2
5884; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5885; AVX2-NEXT:  LBB54_14: ## %else17
5886; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
5887; AVX2-NEXT:    testb $1, %al
5888; AVX2-NEXT:    je LBB54_16
5889; AVX2-NEXT:  ## BB#15: ## %cond.load19
5890; AVX2-NEXT:    vpinsrw $7, 14(%rdi), %xmm1, %xmm2
5891; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
5892; AVX2-NEXT:  LBB54_16: ## %else20
5893; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
5894; AVX2-NEXT:    testb $1, %al
5895; AVX2-NEXT:    je LBB54_18
5896; AVX2-NEXT:  ## BB#17: ## %cond.load22
5897; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5898; AVX2-NEXT:    vpinsrw $0, 16(%rdi), %xmm2, %xmm2
5899; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5900; AVX2-NEXT:  LBB54_18: ## %else23
5901; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
5902; AVX2-NEXT:    testb $1, %al
5903; AVX2-NEXT:    je LBB54_20
5904; AVX2-NEXT:  ## BB#19: ## %cond.load25
5905; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5906; AVX2-NEXT:    vpinsrw $1, 18(%rdi), %xmm2, %xmm2
5907; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5908; AVX2-NEXT:  LBB54_20: ## %else26
5909; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
5910; AVX2-NEXT:    testb $1, %al
5911; AVX2-NEXT:    je LBB54_22
5912; AVX2-NEXT:  ## BB#21: ## %cond.load28
5913; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5914; AVX2-NEXT:    vpinsrw $2, 20(%rdi), %xmm2, %xmm2
5915; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5916; AVX2-NEXT:  LBB54_22: ## %else29
5917; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
5918; AVX2-NEXT:    testb $1, %al
5919; AVX2-NEXT:    je LBB54_24
5920; AVX2-NEXT:  ## BB#23: ## %cond.load31
5921; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5922; AVX2-NEXT:    vpinsrw $3, 22(%rdi), %xmm2, %xmm2
5923; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5924; AVX2-NEXT:  LBB54_24: ## %else32
5925; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
5926; AVX2-NEXT:    testb $1, %al
5927; AVX2-NEXT:    je LBB54_26
5928; AVX2-NEXT:  ## BB#25: ## %cond.load34
5929; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5930; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm2, %xmm2
5931; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5932; AVX2-NEXT:  LBB54_26: ## %else35
5933; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
5934; AVX2-NEXT:    testb $1, %al
5935; AVX2-NEXT:    je LBB54_28
5936; AVX2-NEXT:  ## BB#27: ## %cond.load37
5937; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5938; AVX2-NEXT:    vpinsrw $5, 26(%rdi), %xmm2, %xmm2
5939; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5940; AVX2-NEXT:  LBB54_28: ## %else38
5941; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
5942; AVX2-NEXT:    testb $1, %al
5943; AVX2-NEXT:    je LBB54_30
5944; AVX2-NEXT:  ## BB#29: ## %cond.load40
5945; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5946; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm2, %xmm2
5947; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5948; AVX2-NEXT:  LBB54_30: ## %else41
5949; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
5950; AVX2-NEXT:    testb $1, %al
5951; AVX2-NEXT:    je LBB54_32
5952; AVX2-NEXT:  ## BB#31: ## %cond.load43
5953; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
5954; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm2, %xmm2
5955; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
5956; AVX2-NEXT:  LBB54_32: ## %else44
5957; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
5958; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
5959; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
5960; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
5961; AVX2-NEXT:    retq
5962;
5963; AVX512F-LABEL: test_mask_load_16xi16:
5964; AVX512F:       ## BB#0:
5965; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
5966; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
5967; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
5968; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
5969; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5970; AVX512F-NEXT:    kmovw %k0, %eax
5971; AVX512F-NEXT:    ## implicit-def: %YMM0
5972; AVX512F-NEXT:    testb %al, %al
5973; AVX512F-NEXT:    je LBB54_2
5974; AVX512F-NEXT:  ## BB#1: ## %cond.load
5975; AVX512F-NEXT:    movzwl (%rdi), %eax
5976; AVX512F-NEXT:    vmovd %eax, %xmm0
5977; AVX512F-NEXT:  LBB54_2: ## %else
5978; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
5979; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5980; AVX512F-NEXT:    kmovw %k0, %eax
5981; AVX512F-NEXT:    testb %al, %al
5982; AVX512F-NEXT:    je LBB54_4
5983; AVX512F-NEXT:  ## BB#3: ## %cond.load1
5984; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm1
5985; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5986; AVX512F-NEXT:  LBB54_4: ## %else2
5987; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
5988; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5989; AVX512F-NEXT:    kmovw %k0, %eax
5990; AVX512F-NEXT:    testb %al, %al
5991; AVX512F-NEXT:    je LBB54_6
5992; AVX512F-NEXT:  ## BB#5: ## %cond.load4
5993; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm1
5994; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
5995; AVX512F-NEXT:  LBB54_6: ## %else5
5996; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
5997; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
5998; AVX512F-NEXT:    kmovw %k0, %eax
5999; AVX512F-NEXT:    testb %al, %al
6000; AVX512F-NEXT:    je LBB54_8
6001; AVX512F-NEXT:  ## BB#7: ## %cond.load7
6002; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm1
6003; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
6004; AVX512F-NEXT:  LBB54_8: ## %else8
6005; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
6006; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6007; AVX512F-NEXT:    kmovw %k0, %eax
6008; AVX512F-NEXT:    testb %al, %al
6009; AVX512F-NEXT:    je LBB54_10
6010; AVX512F-NEXT:  ## BB#9: ## %cond.load10
6011; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm1
6012; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
6013; AVX512F-NEXT:  LBB54_10: ## %else11
6014; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
6015; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6016; AVX512F-NEXT:    kmovw %k0, %eax
6017; AVX512F-NEXT:    testb %al, %al
6018; AVX512F-NEXT:    je LBB54_12
6019; AVX512F-NEXT:  ## BB#11: ## %cond.load13
6020; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm1
6021; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
6022; AVX512F-NEXT:  LBB54_12: ## %else14
6023; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
6024; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6025; AVX512F-NEXT:    kmovw %k0, %eax
6026; AVX512F-NEXT:    testb %al, %al
6027; AVX512F-NEXT:    je LBB54_14
6028; AVX512F-NEXT:  ## BB#13: ## %cond.load16
6029; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm1
6030; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
6031; AVX512F-NEXT:  LBB54_14: ## %else17
6032; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
6033; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6034; AVX512F-NEXT:    kmovw %k0, %eax
6035; AVX512F-NEXT:    testb %al, %al
6036; AVX512F-NEXT:    je LBB54_16
6037; AVX512F-NEXT:  ## BB#15: ## %cond.load19
6038; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm1
6039; AVX512F-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
6040; AVX512F-NEXT:  LBB54_16: ## %else20
6041; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
6042; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6043; AVX512F-NEXT:    kmovw %k0, %eax
6044; AVX512F-NEXT:    testb %al, %al
6045; AVX512F-NEXT:    je LBB54_18
6046; AVX512F-NEXT:  ## BB#17: ## %cond.load22
6047; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6048; AVX512F-NEXT:    vpinsrw $0, 16(%rdi), %xmm1, %xmm1
6049; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6050; AVX512F-NEXT:  LBB54_18: ## %else23
6051; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
6052; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6053; AVX512F-NEXT:    kmovw %k0, %eax
6054; AVX512F-NEXT:    testb %al, %al
6055; AVX512F-NEXT:    je LBB54_20
6056; AVX512F-NEXT:  ## BB#19: ## %cond.load25
6057; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6058; AVX512F-NEXT:    vpinsrw $1, 18(%rdi), %xmm1, %xmm1
6059; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6060; AVX512F-NEXT:  LBB54_20: ## %else26
6061; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
6062; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6063; AVX512F-NEXT:    kmovw %k0, %eax
6064; AVX512F-NEXT:    testb %al, %al
6065; AVX512F-NEXT:    je LBB54_22
6066; AVX512F-NEXT:  ## BB#21: ## %cond.load28
6067; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6068; AVX512F-NEXT:    vpinsrw $2, 20(%rdi), %xmm1, %xmm1
6069; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6070; AVX512F-NEXT:  LBB54_22: ## %else29
6071; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
6072; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6073; AVX512F-NEXT:    kmovw %k0, %eax
6074; AVX512F-NEXT:    testb %al, %al
6075; AVX512F-NEXT:    je LBB54_24
6076; AVX512F-NEXT:  ## BB#23: ## %cond.load31
6077; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6078; AVX512F-NEXT:    vpinsrw $3, 22(%rdi), %xmm1, %xmm1
6079; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6080; AVX512F-NEXT:  LBB54_24: ## %else32
6081; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
6082; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6083; AVX512F-NEXT:    kmovw %k0, %eax
6084; AVX512F-NEXT:    testb %al, %al
6085; AVX512F-NEXT:    je LBB54_26
6086; AVX512F-NEXT:  ## BB#25: ## %cond.load34
6087; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6088; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm1, %xmm1
6089; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6090; AVX512F-NEXT:  LBB54_26: ## %else35
6091; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
6092; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6093; AVX512F-NEXT:    kmovw %k0, %eax
6094; AVX512F-NEXT:    testb %al, %al
6095; AVX512F-NEXT:    je LBB54_28
6096; AVX512F-NEXT:  ## BB#27: ## %cond.load37
6097; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6098; AVX512F-NEXT:    vpinsrw $5, 26(%rdi), %xmm1, %xmm1
6099; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6100; AVX512F-NEXT:  LBB54_28: ## %else38
6101; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
6102; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6103; AVX512F-NEXT:    kmovw %k0, %eax
6104; AVX512F-NEXT:    testb %al, %al
6105; AVX512F-NEXT:    je LBB54_30
6106; AVX512F-NEXT:  ## BB#29: ## %cond.load40
6107; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6108; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm1, %xmm1
6109; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6110; AVX512F-NEXT:  LBB54_30: ## %else41
6111; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
6112; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
6113; AVX512F-NEXT:    kmovw %k0, %eax
6114; AVX512F-NEXT:    testb %al, %al
6115; AVX512F-NEXT:    je LBB54_32
6116; AVX512F-NEXT:  ## BB#31: ## %cond.load43
6117; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
6118; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm1, %xmm1
6119; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
6120; AVX512F-NEXT:  LBB54_32: ## %else44
6121; AVX512F-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
6122; AVX512F-NEXT:    vmovdqa32 %zmm1, %zmm1 {%k1} {z}
6123; AVX512F-NEXT:    vpmovdw %zmm1, %ymm1
6124; AVX512F-NEXT:    vpand %ymm0, %ymm1, %ymm0
6125; AVX512F-NEXT:    retq
6126;
6127; SKX-LABEL: test_mask_load_16xi16:
6128; SKX:       ## BB#0:
6129; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
6130; SKX-NEXT:    vpmovb2m %xmm0, %k1
6131; SKX-NEXT:    vmovdqu16 (%rdi), %ymm0 {%k1} {z}
6132; SKX-NEXT:    retq
6133  %res = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
6134  ret <16 x i16> %res
6135}
6136declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>)
6137
6138define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
6139; AVX1-LABEL: test_mask_load_32xi16:
6140; AVX1:       ## BB#0:
6141; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
6142; AVX1-NEXT:    testb $1, %al
6143; AVX1-NEXT:    je LBB55_2
6144; AVX1-NEXT:  ## BB#1: ## %cond.load
6145; AVX1-NEXT:    movzwl (%rdi), %eax
6146; AVX1-NEXT:    vmovd %eax, %xmm3
6147; AVX1-NEXT:  LBB55_2: ## %else
6148; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
6149; AVX1-NEXT:    testb $1, %al
6150; AVX1-NEXT:    je LBB55_4
6151; AVX1-NEXT:  ## BB#3: ## %cond.load1
6152; AVX1-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
6153; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6154; AVX1-NEXT:  LBB55_4: ## %else2
6155; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
6156; AVX1-NEXT:    testb $1, %al
6157; AVX1-NEXT:    je LBB55_6
6158; AVX1-NEXT:  ## BB#5: ## %cond.load4
6159; AVX1-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
6160; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6161; AVX1-NEXT:  LBB55_6: ## %else5
6162; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
6163; AVX1-NEXT:    testb $1, %al
6164; AVX1-NEXT:    je LBB55_8
6165; AVX1-NEXT:  ## BB#7: ## %cond.load7
6166; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
6167; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6168; AVX1-NEXT:  LBB55_8: ## %else8
6169; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
6170; AVX1-NEXT:    testb $1, %al
6171; AVX1-NEXT:    je LBB55_10
6172; AVX1-NEXT:  ## BB#9: ## %cond.load10
6173; AVX1-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
6174; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6175; AVX1-NEXT:  LBB55_10: ## %else11
6176; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
6177; AVX1-NEXT:    testb $1, %al
6178; AVX1-NEXT:    je LBB55_12
6179; AVX1-NEXT:  ## BB#11: ## %cond.load13
6180; AVX1-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
6181; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6182; AVX1-NEXT:  LBB55_12: ## %else14
6183; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
6184; AVX1-NEXT:    testb $1, %al
6185; AVX1-NEXT:    je LBB55_14
6186; AVX1-NEXT:  ## BB#13: ## %cond.load16
6187; AVX1-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
6188; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6189; AVX1-NEXT:  LBB55_14: ## %else17
6190; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
6191; AVX1-NEXT:    testb $1, %al
6192; AVX1-NEXT:    je LBB55_16
6193; AVX1-NEXT:  ## BB#15: ## %cond.load19
6194; AVX1-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
6195; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6196; AVX1-NEXT:  LBB55_16: ## %else20
6197; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
6198; AVX1-NEXT:    testb $1, %al
6199; AVX1-NEXT:    je LBB55_18
6200; AVX1-NEXT:  ## BB#17: ## %cond.load22
6201; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6202; AVX1-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
6203; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6204; AVX1-NEXT:  LBB55_18: ## %else23
6205; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
6206; AVX1-NEXT:    testb $1, %al
6207; AVX1-NEXT:    je LBB55_20
6208; AVX1-NEXT:  ## BB#19: ## %cond.load25
6209; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6210; AVX1-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
6211; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6212; AVX1-NEXT:  LBB55_20: ## %else26
6213; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
6214; AVX1-NEXT:    testb $1, %al
6215; AVX1-NEXT:    je LBB55_22
6216; AVX1-NEXT:  ## BB#21: ## %cond.load28
6217; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6218; AVX1-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
6219; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6220; AVX1-NEXT:  LBB55_22: ## %else29
6221; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
6222; AVX1-NEXT:    testb $1, %al
6223; AVX1-NEXT:    je LBB55_24
6224; AVX1-NEXT:  ## BB#23: ## %cond.load31
6225; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6226; AVX1-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
6227; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6228; AVX1-NEXT:  LBB55_24: ## %else32
6229; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
6230; AVX1-NEXT:    testb $1, %al
6231; AVX1-NEXT:    je LBB55_26
6232; AVX1-NEXT:  ## BB#25: ## %cond.load34
6233; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6234; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
6235; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6236; AVX1-NEXT:  LBB55_26: ## %else35
6237; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
6238; AVX1-NEXT:    testb $1, %al
6239; AVX1-NEXT:    je LBB55_28
6240; AVX1-NEXT:  ## BB#27: ## %cond.load37
6241; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6242; AVX1-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
6243; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6244; AVX1-NEXT:  LBB55_28: ## %else38
6245; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
6246; AVX1-NEXT:    testb $1, %al
6247; AVX1-NEXT:    je LBB55_30
6248; AVX1-NEXT:  ## BB#29: ## %cond.load40
6249; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6250; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
6251; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6252; AVX1-NEXT:  LBB55_30: ## %else41
6253; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
6254; AVX1-NEXT:    testb $1, %al
6255; AVX1-NEXT:    je LBB55_32
6256; AVX1-NEXT:  ## BB#31: ## %cond.load43
6257; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
6258; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
6259; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm3, %ymm3
6260; AVX1-NEXT:  LBB55_32: ## %else44
6261; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
6262; AVX1-NEXT:    vpextrb $0, %xmm4, %eax
6263; AVX1-NEXT:    testb $1, %al
6264; AVX1-NEXT:    je LBB55_34
6265; AVX1-NEXT:  ## BB#33: ## %cond.load46
6266; AVX1-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
6267; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
6268; AVX1-NEXT:  LBB55_34: ## %else47
6269; AVX1-NEXT:    vpextrb $1, %xmm4, %eax
6270; AVX1-NEXT:    testb $1, %al
6271; AVX1-NEXT:    je LBB55_36
6272; AVX1-NEXT:  ## BB#35: ## %cond.load49
6273; AVX1-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
6274; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6275; AVX1-NEXT:  LBB55_36: ## %else50
6276; AVX1-NEXT:    vpextrb $2, %xmm4, %eax
6277; AVX1-NEXT:    testb $1, %al
6278; AVX1-NEXT:    je LBB55_38
6279; AVX1-NEXT:  ## BB#37: ## %cond.load52
6280; AVX1-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
6281; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6282; AVX1-NEXT:  LBB55_38: ## %else53
6283; AVX1-NEXT:    vpextrb $3, %xmm4, %eax
6284; AVX1-NEXT:    testb $1, %al
6285; AVX1-NEXT:    je LBB55_40
6286; AVX1-NEXT:  ## BB#39: ## %cond.load55
6287; AVX1-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
6288; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6289; AVX1-NEXT:  LBB55_40: ## %else56
6290; AVX1-NEXT:    vpextrb $4, %xmm4, %eax
6291; AVX1-NEXT:    testb $1, %al
6292; AVX1-NEXT:    je LBB55_42
6293; AVX1-NEXT:  ## BB#41: ## %cond.load58
6294; AVX1-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
6295; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6296; AVX1-NEXT:  LBB55_42: ## %else59
6297; AVX1-NEXT:    vpextrb $5, %xmm4, %eax
6298; AVX1-NEXT:    testb $1, %al
6299; AVX1-NEXT:    je LBB55_44
6300; AVX1-NEXT:  ## BB#43: ## %cond.load61
6301; AVX1-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
6302; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6303; AVX1-NEXT:  LBB55_44: ## %else62
6304; AVX1-NEXT:    vpextrb $6, %xmm4, %eax
6305; AVX1-NEXT:    testb $1, %al
6306; AVX1-NEXT:    je LBB55_46
6307; AVX1-NEXT:  ## BB#45: ## %cond.load64
6308; AVX1-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
6309; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6310; AVX1-NEXT:  LBB55_46: ## %else65
6311; AVX1-NEXT:    vpextrb $7, %xmm4, %eax
6312; AVX1-NEXT:    testb $1, %al
6313; AVX1-NEXT:    je LBB55_48
6314; AVX1-NEXT:  ## BB#47: ## %cond.load67
6315; AVX1-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
6316; AVX1-NEXT:    vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6317; AVX1-NEXT:  LBB55_48: ## %else68
6318; AVX1-NEXT:    vpextrb $8, %xmm4, %eax
6319; AVX1-NEXT:    testb $1, %al
6320; AVX1-NEXT:    je LBB55_50
6321; AVX1-NEXT:  ## BB#49: ## %cond.load70
6322; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6323; AVX1-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
6324; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6325; AVX1-NEXT:  LBB55_50: ## %else71
6326; AVX1-NEXT:    vpextrb $9, %xmm4, %eax
6327; AVX1-NEXT:    testb $1, %al
6328; AVX1-NEXT:    je LBB55_52
6329; AVX1-NEXT:  ## BB#51: ## %cond.load73
6330; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6331; AVX1-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
6332; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6333; AVX1-NEXT:  LBB55_52: ## %else74
6334; AVX1-NEXT:    vpextrb $10, %xmm4, %eax
6335; AVX1-NEXT:    testb $1, %al
6336; AVX1-NEXT:    je LBB55_54
6337; AVX1-NEXT:  ## BB#53: ## %cond.load76
6338; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6339; AVX1-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
6340; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6341; AVX1-NEXT:  LBB55_54: ## %else77
6342; AVX1-NEXT:    vpextrb $11, %xmm4, %eax
6343; AVX1-NEXT:    testb $1, %al
6344; AVX1-NEXT:    je LBB55_56
6345; AVX1-NEXT:  ## BB#55: ## %cond.load79
6346; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6347; AVX1-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
6348; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6349; AVX1-NEXT:  LBB55_56: ## %else80
6350; AVX1-NEXT:    vpextrb $12, %xmm4, %eax
6351; AVX1-NEXT:    testb $1, %al
6352; AVX1-NEXT:    je LBB55_58
6353; AVX1-NEXT:  ## BB#57: ## %cond.load82
6354; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6355; AVX1-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
6356; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6357; AVX1-NEXT:  LBB55_58: ## %else83
6358; AVX1-NEXT:    vpextrb $13, %xmm4, %eax
6359; AVX1-NEXT:    testb $1, %al
6360; AVX1-NEXT:    je LBB55_60
6361; AVX1-NEXT:  ## BB#59: ## %cond.load85
6362; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6363; AVX1-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
6364; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6365; AVX1-NEXT:  LBB55_60: ## %else86
6366; AVX1-NEXT:    vpextrb $14, %xmm4, %eax
6367; AVX1-NEXT:    testb $1, %al
6368; AVX1-NEXT:    je LBB55_62
6369; AVX1-NEXT:  ## BB#61: ## %cond.load88
6370; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6371; AVX1-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
6372; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6373; AVX1-NEXT:  LBB55_62: ## %else89
6374; AVX1-NEXT:    vpextrb $15, %xmm4, %eax
6375; AVX1-NEXT:    testb $1, %al
6376; AVX1-NEXT:    je LBB55_64
6377; AVX1-NEXT:  ## BB#63: ## %cond.load91
6378; AVX1-NEXT:    vextractf128 $1, %ymm5, %xmm6
6379; AVX1-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
6380; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
6381; AVX1-NEXT:  LBB55_64: ## %else92
6382; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
6383; AVX1-NEXT:    vpsllw $15, %xmm6, %xmm6
6384; AVX1-NEXT:    vpsraw $15, %xmm6, %xmm6
6385; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
6386; AVX1-NEXT:    vpsllw $15, %xmm0, %xmm0
6387; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm0
6388; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm6, %ymm0
6389; AVX1-NEXT:    vandnps %ymm1, %ymm0, %ymm1
6390; AVX1-NEXT:    vandps %ymm0, %ymm3, %ymm0
6391; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
6392; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
6393; AVX1-NEXT:    vpsllw $15, %xmm1, %xmm1
6394; AVX1-NEXT:    vpsraw $15, %xmm1, %xmm1
6395; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
6396; AVX1-NEXT:    vpsllw $15, %xmm3, %xmm3
6397; AVX1-NEXT:    vpsraw $15, %xmm3, %xmm3
6398; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
6399; AVX1-NEXT:    vandnps %ymm2, %ymm1, %ymm2
6400; AVX1-NEXT:    vandps %ymm1, %ymm5, %ymm1
6401; AVX1-NEXT:    vorps %ymm2, %ymm1, %ymm1
6402; AVX1-NEXT:    retq
6403;
6404; AVX2-LABEL: test_mask_load_32xi16:
6405; AVX2:       ## BB#0:
6406; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
6407; AVX2-NEXT:    testb $1, %al
6408; AVX2-NEXT:    je LBB55_2
6409; AVX2-NEXT:  ## BB#1: ## %cond.load
6410; AVX2-NEXT:    movzwl (%rdi), %eax
6411; AVX2-NEXT:    vmovd %eax, %xmm3
6412; AVX2-NEXT:  LBB55_2: ## %else
6413; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
6414; AVX2-NEXT:    testb $1, %al
6415; AVX2-NEXT:    je LBB55_4
6416; AVX2-NEXT:  ## BB#3: ## %cond.load1
6417; AVX2-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
6418; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6419; AVX2-NEXT:  LBB55_4: ## %else2
6420; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
6421; AVX2-NEXT:    testb $1, %al
6422; AVX2-NEXT:    je LBB55_6
6423; AVX2-NEXT:  ## BB#5: ## %cond.load4
6424; AVX2-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
6425; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6426; AVX2-NEXT:  LBB55_6: ## %else5
6427; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
6428; AVX2-NEXT:    testb $1, %al
6429; AVX2-NEXT:    je LBB55_8
6430; AVX2-NEXT:  ## BB#7: ## %cond.load7
6431; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
6432; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6433; AVX2-NEXT:  LBB55_8: ## %else8
6434; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
6435; AVX2-NEXT:    testb $1, %al
6436; AVX2-NEXT:    je LBB55_10
6437; AVX2-NEXT:  ## BB#9: ## %cond.load10
6438; AVX2-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
6439; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6440; AVX2-NEXT:  LBB55_10: ## %else11
6441; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
6442; AVX2-NEXT:    testb $1, %al
6443; AVX2-NEXT:    je LBB55_12
6444; AVX2-NEXT:  ## BB#11: ## %cond.load13
6445; AVX2-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
6446; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6447; AVX2-NEXT:  LBB55_12: ## %else14
6448; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
6449; AVX2-NEXT:    testb $1, %al
6450; AVX2-NEXT:    je LBB55_14
6451; AVX2-NEXT:  ## BB#13: ## %cond.load16
6452; AVX2-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
6453; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6454; AVX2-NEXT:  LBB55_14: ## %else17
6455; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
6456; AVX2-NEXT:    testb $1, %al
6457; AVX2-NEXT:    je LBB55_16
6458; AVX2-NEXT:  ## BB#15: ## %cond.load19
6459; AVX2-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
6460; AVX2-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6461; AVX2-NEXT:  LBB55_16: ## %else20
6462; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
6463; AVX2-NEXT:    testb $1, %al
6464; AVX2-NEXT:    je LBB55_18
6465; AVX2-NEXT:  ## BB#17: ## %cond.load22
6466; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6467; AVX2-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
6468; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6469; AVX2-NEXT:  LBB55_18: ## %else23
6470; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
6471; AVX2-NEXT:    testb $1, %al
6472; AVX2-NEXT:    je LBB55_20
6473; AVX2-NEXT:  ## BB#19: ## %cond.load25
6474; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6475; AVX2-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
6476; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6477; AVX2-NEXT:  LBB55_20: ## %else26
6478; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
6479; AVX2-NEXT:    testb $1, %al
6480; AVX2-NEXT:    je LBB55_22
6481; AVX2-NEXT:  ## BB#21: ## %cond.load28
6482; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6483; AVX2-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
6484; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6485; AVX2-NEXT:  LBB55_22: ## %else29
6486; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
6487; AVX2-NEXT:    testb $1, %al
6488; AVX2-NEXT:    je LBB55_24
6489; AVX2-NEXT:  ## BB#23: ## %cond.load31
6490; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6491; AVX2-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
6492; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6493; AVX2-NEXT:  LBB55_24: ## %else32
6494; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
6495; AVX2-NEXT:    testb $1, %al
6496; AVX2-NEXT:    je LBB55_26
6497; AVX2-NEXT:  ## BB#25: ## %cond.load34
6498; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6499; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
6500; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6501; AVX2-NEXT:  LBB55_26: ## %else35
6502; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
6503; AVX2-NEXT:    testb $1, %al
6504; AVX2-NEXT:    je LBB55_28
6505; AVX2-NEXT:  ## BB#27: ## %cond.load37
6506; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6507; AVX2-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
6508; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6509; AVX2-NEXT:  LBB55_28: ## %else38
6510; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
6511; AVX2-NEXT:    testb $1, %al
6512; AVX2-NEXT:    je LBB55_30
6513; AVX2-NEXT:  ## BB#29: ## %cond.load40
6514; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6515; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
6516; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6517; AVX2-NEXT:  LBB55_30: ## %else41
6518; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
6519; AVX2-NEXT:    testb $1, %al
6520; AVX2-NEXT:    je LBB55_32
6521; AVX2-NEXT:  ## BB#31: ## %cond.load43
6522; AVX2-NEXT:    vextracti128 $1, %ymm3, %xmm4
6523; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
6524; AVX2-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6525; AVX2-NEXT:  LBB55_32: ## %else44
6526; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
6527; AVX2-NEXT:    vpextrb $0, %xmm4, %eax
6528; AVX2-NEXT:    testb $1, %al
6529; AVX2-NEXT:    je LBB55_34
6530; AVX2-NEXT:  ## BB#33: ## %cond.load46
6531; AVX2-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
6532; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
6533; AVX2-NEXT:  LBB55_34: ## %else47
6534; AVX2-NEXT:    vpextrb $1, %xmm4, %eax
6535; AVX2-NEXT:    testb $1, %al
6536; AVX2-NEXT:    je LBB55_36
6537; AVX2-NEXT:  ## BB#35: ## %cond.load49
6538; AVX2-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
6539; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6540; AVX2-NEXT:  LBB55_36: ## %else50
6541; AVX2-NEXT:    vpextrb $2, %xmm4, %eax
6542; AVX2-NEXT:    testb $1, %al
6543; AVX2-NEXT:    je LBB55_38
6544; AVX2-NEXT:  ## BB#37: ## %cond.load52
6545; AVX2-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
6546; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6547; AVX2-NEXT:  LBB55_38: ## %else53
6548; AVX2-NEXT:    vpextrb $3, %xmm4, %eax
6549; AVX2-NEXT:    testb $1, %al
6550; AVX2-NEXT:    je LBB55_40
6551; AVX2-NEXT:  ## BB#39: ## %cond.load55
6552; AVX2-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
6553; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6554; AVX2-NEXT:  LBB55_40: ## %else56
6555; AVX2-NEXT:    vpextrb $4, %xmm4, %eax
6556; AVX2-NEXT:    testb $1, %al
6557; AVX2-NEXT:    je LBB55_42
6558; AVX2-NEXT:  ## BB#41: ## %cond.load58
6559; AVX2-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
6560; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6561; AVX2-NEXT:  LBB55_42: ## %else59
6562; AVX2-NEXT:    vpextrb $5, %xmm4, %eax
6563; AVX2-NEXT:    testb $1, %al
6564; AVX2-NEXT:    je LBB55_44
6565; AVX2-NEXT:  ## BB#43: ## %cond.load61
6566; AVX2-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
6567; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6568; AVX2-NEXT:  LBB55_44: ## %else62
6569; AVX2-NEXT:    vpextrb $6, %xmm4, %eax
6570; AVX2-NEXT:    testb $1, %al
6571; AVX2-NEXT:    je LBB55_46
6572; AVX2-NEXT:  ## BB#45: ## %cond.load64
6573; AVX2-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
6574; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6575; AVX2-NEXT:  LBB55_46: ## %else65
6576; AVX2-NEXT:    vpextrb $7, %xmm4, %eax
6577; AVX2-NEXT:    testb $1, %al
6578; AVX2-NEXT:    je LBB55_48
6579; AVX2-NEXT:  ## BB#47: ## %cond.load67
6580; AVX2-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
6581; AVX2-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6582; AVX2-NEXT:  LBB55_48: ## %else68
6583; AVX2-NEXT:    vpextrb $8, %xmm4, %eax
6584; AVX2-NEXT:    testb $1, %al
6585; AVX2-NEXT:    je LBB55_50
6586; AVX2-NEXT:  ## BB#49: ## %cond.load70
6587; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6588; AVX2-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
6589; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6590; AVX2-NEXT:  LBB55_50: ## %else71
6591; AVX2-NEXT:    vpextrb $9, %xmm4, %eax
6592; AVX2-NEXT:    testb $1, %al
6593; AVX2-NEXT:    je LBB55_52
6594; AVX2-NEXT:  ## BB#51: ## %cond.load73
6595; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6596; AVX2-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
6597; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6598; AVX2-NEXT:  LBB55_52: ## %else74
6599; AVX2-NEXT:    vpextrb $10, %xmm4, %eax
6600; AVX2-NEXT:    testb $1, %al
6601; AVX2-NEXT:    je LBB55_54
6602; AVX2-NEXT:  ## BB#53: ## %cond.load76
6603; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6604; AVX2-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
6605; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6606; AVX2-NEXT:  LBB55_54: ## %else77
6607; AVX2-NEXT:    vpextrb $11, %xmm4, %eax
6608; AVX2-NEXT:    testb $1, %al
6609; AVX2-NEXT:    je LBB55_56
6610; AVX2-NEXT:  ## BB#55: ## %cond.load79
6611; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6612; AVX2-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
6613; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6614; AVX2-NEXT:  LBB55_56: ## %else80
6615; AVX2-NEXT:    vpextrb $12, %xmm4, %eax
6616; AVX2-NEXT:    testb $1, %al
6617; AVX2-NEXT:    je LBB55_58
6618; AVX2-NEXT:  ## BB#57: ## %cond.load82
6619; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6620; AVX2-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
6621; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6622; AVX2-NEXT:  LBB55_58: ## %else83
6623; AVX2-NEXT:    vpextrb $13, %xmm4, %eax
6624; AVX2-NEXT:    testb $1, %al
6625; AVX2-NEXT:    je LBB55_60
6626; AVX2-NEXT:  ## BB#59: ## %cond.load85
6627; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6628; AVX2-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
6629; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6630; AVX2-NEXT:  LBB55_60: ## %else86
6631; AVX2-NEXT:    vpextrb $14, %xmm4, %eax
6632; AVX2-NEXT:    testb $1, %al
6633; AVX2-NEXT:    je LBB55_62
6634; AVX2-NEXT:  ## BB#61: ## %cond.load88
6635; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6636; AVX2-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
6637; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6638; AVX2-NEXT:  LBB55_62: ## %else89
6639; AVX2-NEXT:    vpextrb $15, %xmm4, %eax
6640; AVX2-NEXT:    testb $1, %al
6641; AVX2-NEXT:    je LBB55_64
6642; AVX2-NEXT:  ## BB#63: ## %cond.load91
6643; AVX2-NEXT:    vextracti128 $1, %ymm5, %xmm6
6644; AVX2-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
6645; AVX2-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6646; AVX2-NEXT:  LBB55_64: ## %else92
6647; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
6648; AVX2-NEXT:    vpsllw $15, %ymm0, %ymm0
6649; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm0
6650; AVX2-NEXT:    vpblendvb %ymm0, %ymm3, %ymm1, %ymm0
6651; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
6652; AVX2-NEXT:    vpsllw $15, %ymm1, %ymm1
6653; AVX2-NEXT:    vpsraw $15, %ymm1, %ymm1
6654; AVX2-NEXT:    vpblendvb %ymm1, %ymm5, %ymm2, %ymm1
6655; AVX2-NEXT:    retq
6656;
6657; AVX512F-LABEL: test_mask_load_32xi16:
6658; AVX512F:       ## BB#0:
6659; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
6660; AVX512F-NEXT:    testb $1, %al
6661; AVX512F-NEXT:    je LBB55_2
6662; AVX512F-NEXT:  ## BB#1: ## %cond.load
6663; AVX512F-NEXT:    movzwl (%rdi), %eax
6664; AVX512F-NEXT:    vmovd %eax, %xmm3
6665; AVX512F-NEXT:  LBB55_2: ## %else
6666; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
6667; AVX512F-NEXT:    testb $1, %al
6668; AVX512F-NEXT:    je LBB55_4
6669; AVX512F-NEXT:  ## BB#3: ## %cond.load1
6670; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm3, %xmm4
6671; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6672; AVX512F-NEXT:  LBB55_4: ## %else2
6673; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
6674; AVX512F-NEXT:    testb $1, %al
6675; AVX512F-NEXT:    je LBB55_6
6676; AVX512F-NEXT:  ## BB#5: ## %cond.load4
6677; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm3, %xmm4
6678; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6679; AVX512F-NEXT:  LBB55_6: ## %else5
6680; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
6681; AVX512F-NEXT:    testb $1, %al
6682; AVX512F-NEXT:    je LBB55_8
6683; AVX512F-NEXT:  ## BB#7: ## %cond.load7
6684; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm3, %xmm4
6685; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6686; AVX512F-NEXT:  LBB55_8: ## %else8
6687; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
6688; AVX512F-NEXT:    testb $1, %al
6689; AVX512F-NEXT:    je LBB55_10
6690; AVX512F-NEXT:  ## BB#9: ## %cond.load10
6691; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm3, %xmm4
6692; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6693; AVX512F-NEXT:  LBB55_10: ## %else11
6694; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
6695; AVX512F-NEXT:    testb $1, %al
6696; AVX512F-NEXT:    je LBB55_12
6697; AVX512F-NEXT:  ## BB#11: ## %cond.load13
6698; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm3, %xmm4
6699; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6700; AVX512F-NEXT:  LBB55_12: ## %else14
6701; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
6702; AVX512F-NEXT:    testb $1, %al
6703; AVX512F-NEXT:    je LBB55_14
6704; AVX512F-NEXT:  ## BB#13: ## %cond.load16
6705; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm3, %xmm4
6706; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6707; AVX512F-NEXT:  LBB55_14: ## %else17
6708; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
6709; AVX512F-NEXT:    testb $1, %al
6710; AVX512F-NEXT:    je LBB55_16
6711; AVX512F-NEXT:  ## BB#15: ## %cond.load19
6712; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm3, %xmm4
6713; AVX512F-NEXT:    vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
6714; AVX512F-NEXT:  LBB55_16: ## %else20
6715; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
6716; AVX512F-NEXT:    testb $1, %al
6717; AVX512F-NEXT:    je LBB55_18
6718; AVX512F-NEXT:  ## BB#17: ## %cond.load22
6719; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6720; AVX512F-NEXT:    vpinsrw $0, 16(%rdi), %xmm4, %xmm4
6721; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6722; AVX512F-NEXT:  LBB55_18: ## %else23
6723; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
6724; AVX512F-NEXT:    testb $1, %al
6725; AVX512F-NEXT:    je LBB55_20
6726; AVX512F-NEXT:  ## BB#19: ## %cond.load25
6727; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6728; AVX512F-NEXT:    vpinsrw $1, 18(%rdi), %xmm4, %xmm4
6729; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6730; AVX512F-NEXT:  LBB55_20: ## %else26
6731; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
6732; AVX512F-NEXT:    testb $1, %al
6733; AVX512F-NEXT:    je LBB55_22
6734; AVX512F-NEXT:  ## BB#21: ## %cond.load28
6735; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6736; AVX512F-NEXT:    vpinsrw $2, 20(%rdi), %xmm4, %xmm4
6737; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6738; AVX512F-NEXT:  LBB55_22: ## %else29
6739; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
6740; AVX512F-NEXT:    testb $1, %al
6741; AVX512F-NEXT:    je LBB55_24
6742; AVX512F-NEXT:  ## BB#23: ## %cond.load31
6743; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6744; AVX512F-NEXT:    vpinsrw $3, 22(%rdi), %xmm4, %xmm4
6745; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6746; AVX512F-NEXT:  LBB55_24: ## %else32
6747; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
6748; AVX512F-NEXT:    testb $1, %al
6749; AVX512F-NEXT:    je LBB55_26
6750; AVX512F-NEXT:  ## BB#25: ## %cond.load34
6751; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6752; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm4, %xmm4
6753; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6754; AVX512F-NEXT:  LBB55_26: ## %else35
6755; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
6756; AVX512F-NEXT:    testb $1, %al
6757; AVX512F-NEXT:    je LBB55_28
6758; AVX512F-NEXT:  ## BB#27: ## %cond.load37
6759; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6760; AVX512F-NEXT:    vpinsrw $5, 26(%rdi), %xmm4, %xmm4
6761; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6762; AVX512F-NEXT:  LBB55_28: ## %else38
6763; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
6764; AVX512F-NEXT:    testb $1, %al
6765; AVX512F-NEXT:    je LBB55_30
6766; AVX512F-NEXT:  ## BB#29: ## %cond.load40
6767; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6768; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm4, %xmm4
6769; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6770; AVX512F-NEXT:  LBB55_30: ## %else41
6771; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
6772; AVX512F-NEXT:    testb $1, %al
6773; AVX512F-NEXT:    je LBB55_32
6774; AVX512F-NEXT:  ## BB#31: ## %cond.load43
6775; AVX512F-NEXT:    vextracti128 $1, %ymm3, %xmm4
6776; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm4, %xmm4
6777; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
6778; AVX512F-NEXT:  LBB55_32: ## %else44
6779; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm4
6780; AVX512F-NEXT:    vpextrb $0, %xmm4, %eax
6781; AVX512F-NEXT:    testb $1, %al
6782; AVX512F-NEXT:    je LBB55_34
6783; AVX512F-NEXT:  ## BB#33: ## %cond.load46
6784; AVX512F-NEXT:    vpinsrw $0, 32(%rdi), %xmm0, %xmm5
6785; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7]
6786; AVX512F-NEXT:  LBB55_34: ## %else47
6787; AVX512F-NEXT:    vpextrb $1, %xmm4, %eax
6788; AVX512F-NEXT:    testb $1, %al
6789; AVX512F-NEXT:    je LBB55_36
6790; AVX512F-NEXT:  ## BB#35: ## %cond.load49
6791; AVX512F-NEXT:    vpinsrw $1, 34(%rdi), %xmm5, %xmm6
6792; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6793; AVX512F-NEXT:  LBB55_36: ## %else50
6794; AVX512F-NEXT:    vpextrb $2, %xmm4, %eax
6795; AVX512F-NEXT:    testb $1, %al
6796; AVX512F-NEXT:    je LBB55_38
6797; AVX512F-NEXT:  ## BB#37: ## %cond.load52
6798; AVX512F-NEXT:    vpinsrw $2, 36(%rdi), %xmm5, %xmm6
6799; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6800; AVX512F-NEXT:  LBB55_38: ## %else53
6801; AVX512F-NEXT:    vpextrb $3, %xmm4, %eax
6802; AVX512F-NEXT:    testb $1, %al
6803; AVX512F-NEXT:    je LBB55_40
6804; AVX512F-NEXT:  ## BB#39: ## %cond.load55
6805; AVX512F-NEXT:    vpinsrw $3, 38(%rdi), %xmm5, %xmm6
6806; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6807; AVX512F-NEXT:  LBB55_40: ## %else56
6808; AVX512F-NEXT:    vpextrb $4, %xmm4, %eax
6809; AVX512F-NEXT:    testb $1, %al
6810; AVX512F-NEXT:    je LBB55_42
6811; AVX512F-NEXT:  ## BB#41: ## %cond.load58
6812; AVX512F-NEXT:    vpinsrw $4, 40(%rdi), %xmm5, %xmm6
6813; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6814; AVX512F-NEXT:  LBB55_42: ## %else59
6815; AVX512F-NEXT:    vpextrb $5, %xmm4, %eax
6816; AVX512F-NEXT:    testb $1, %al
6817; AVX512F-NEXT:    je LBB55_44
6818; AVX512F-NEXT:  ## BB#43: ## %cond.load61
6819; AVX512F-NEXT:    vpinsrw $5, 42(%rdi), %xmm5, %xmm6
6820; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6821; AVX512F-NEXT:  LBB55_44: ## %else62
6822; AVX512F-NEXT:    vpextrb $6, %xmm4, %eax
6823; AVX512F-NEXT:    testb $1, %al
6824; AVX512F-NEXT:    je LBB55_46
6825; AVX512F-NEXT:  ## BB#45: ## %cond.load64
6826; AVX512F-NEXT:    vpinsrw $6, 44(%rdi), %xmm5, %xmm6
6827; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6828; AVX512F-NEXT:  LBB55_46: ## %else65
6829; AVX512F-NEXT:    vpextrb $7, %xmm4, %eax
6830; AVX512F-NEXT:    testb $1, %al
6831; AVX512F-NEXT:    je LBB55_48
6832; AVX512F-NEXT:  ## BB#47: ## %cond.load67
6833; AVX512F-NEXT:    vpinsrw $7, 46(%rdi), %xmm5, %xmm6
6834; AVX512F-NEXT:    vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
6835; AVX512F-NEXT:  LBB55_48: ## %else68
6836; AVX512F-NEXT:    vpextrb $8, %xmm4, %eax
6837; AVX512F-NEXT:    testb $1, %al
6838; AVX512F-NEXT:    je LBB55_50
6839; AVX512F-NEXT:  ## BB#49: ## %cond.load70
6840; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6841; AVX512F-NEXT:    vpinsrw $0, 48(%rdi), %xmm6, %xmm6
6842; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6843; AVX512F-NEXT:  LBB55_50: ## %else71
6844; AVX512F-NEXT:    vpextrb $9, %xmm4, %eax
6845; AVX512F-NEXT:    testb $1, %al
6846; AVX512F-NEXT:    je LBB55_52
6847; AVX512F-NEXT:  ## BB#51: ## %cond.load73
6848; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6849; AVX512F-NEXT:    vpinsrw $1, 50(%rdi), %xmm6, %xmm6
6850; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6851; AVX512F-NEXT:  LBB55_52: ## %else74
6852; AVX512F-NEXT:    vpextrb $10, %xmm4, %eax
6853; AVX512F-NEXT:    testb $1, %al
6854; AVX512F-NEXT:    je LBB55_54
6855; AVX512F-NEXT:  ## BB#53: ## %cond.load76
6856; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6857; AVX512F-NEXT:    vpinsrw $2, 52(%rdi), %xmm6, %xmm6
6858; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6859; AVX512F-NEXT:  LBB55_54: ## %else77
6860; AVX512F-NEXT:    vpextrb $11, %xmm4, %eax
6861; AVX512F-NEXT:    testb $1, %al
6862; AVX512F-NEXT:    je LBB55_56
6863; AVX512F-NEXT:  ## BB#55: ## %cond.load79
6864; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6865; AVX512F-NEXT:    vpinsrw $3, 54(%rdi), %xmm6, %xmm6
6866; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6867; AVX512F-NEXT:  LBB55_56: ## %else80
6868; AVX512F-NEXT:    vpextrb $12, %xmm4, %eax
6869; AVX512F-NEXT:    testb $1, %al
6870; AVX512F-NEXT:    je LBB55_58
6871; AVX512F-NEXT:  ## BB#57: ## %cond.load82
6872; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6873; AVX512F-NEXT:    vpinsrw $4, 56(%rdi), %xmm6, %xmm6
6874; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6875; AVX512F-NEXT:  LBB55_58: ## %else83
6876; AVX512F-NEXT:    vpextrb $13, %xmm4, %eax
6877; AVX512F-NEXT:    testb $1, %al
6878; AVX512F-NEXT:    je LBB55_60
6879; AVX512F-NEXT:  ## BB#59: ## %cond.load85
6880; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6881; AVX512F-NEXT:    vpinsrw $5, 58(%rdi), %xmm6, %xmm6
6882; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6883; AVX512F-NEXT:  LBB55_60: ## %else86
6884; AVX512F-NEXT:    vpextrb $14, %xmm4, %eax
6885; AVX512F-NEXT:    testb $1, %al
6886; AVX512F-NEXT:    je LBB55_62
6887; AVX512F-NEXT:  ## BB#61: ## %cond.load88
6888; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6889; AVX512F-NEXT:    vpinsrw $6, 60(%rdi), %xmm6, %xmm6
6890; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6891; AVX512F-NEXT:  LBB55_62: ## %else89
6892; AVX512F-NEXT:    vpextrb $15, %xmm4, %eax
6893; AVX512F-NEXT:    testb $1, %al
6894; AVX512F-NEXT:    je LBB55_64
6895; AVX512F-NEXT:  ## BB#63: ## %cond.load91
6896; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm6
6897; AVX512F-NEXT:    vpinsrw $7, 62(%rdi), %xmm6, %xmm6
6898; AVX512F-NEXT:    vinserti128 $1, %xmm6, %ymm5, %ymm5
6899; AVX512F-NEXT:  LBB55_64: ## %else92
6900; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
6901; AVX512F-NEXT:    vpsllw $15, %ymm0, %ymm0
6902; AVX512F-NEXT:    vpsraw $15, %ymm0, %ymm0
6903; AVX512F-NEXT:    vpblendvb %ymm0, %ymm3, %ymm1, %ymm0
6904; AVX512F-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
6905; AVX512F-NEXT:    vpsllw $15, %ymm1, %ymm1
6906; AVX512F-NEXT:    vpsraw $15, %ymm1, %ymm1
6907; AVX512F-NEXT:    vpblendvb %ymm1, %ymm5, %ymm2, %ymm1
6908; AVX512F-NEXT:    retq
6909;
6910; SKX-LABEL: test_mask_load_32xi16:
6911; SKX:       ## BB#0:
6912; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
6913; SKX-NEXT:    vpmovb2m %ymm0, %k1
6914; SKX-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1}
6915; SKX-NEXT:    vmovaps %zmm1, %zmm0
6916; SKX-NEXT:    retq
6917  %res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
6918  ret <32 x i16> %res
6919}
6920declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>)
6921
6922define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
6923; AVX-LABEL: test_mask_store_16xi8:
6924; AVX:       ## BB#0:
6925; AVX-NEXT:    vpextrb $0, %xmm0, %eax
6926; AVX-NEXT:    testb $1, %al
6927; AVX-NEXT:    je LBB56_2
6928; AVX-NEXT:  ## BB#1: ## %cond.store
6929; AVX-NEXT:    vpextrb $0, %xmm1, (%rdi)
6930; AVX-NEXT:  LBB56_2: ## %else
6931; AVX-NEXT:    vpextrb $1, %xmm0, %eax
6932; AVX-NEXT:    testb $1, %al
6933; AVX-NEXT:    je LBB56_4
6934; AVX-NEXT:  ## BB#3: ## %cond.store1
6935; AVX-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
6936; AVX-NEXT:  LBB56_4: ## %else2
6937; AVX-NEXT:    vpextrb $2, %xmm0, %eax
6938; AVX-NEXT:    testb $1, %al
6939; AVX-NEXT:    je LBB56_6
6940; AVX-NEXT:  ## BB#5: ## %cond.store3
6941; AVX-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
6942; AVX-NEXT:  LBB56_6: ## %else4
6943; AVX-NEXT:    vpextrb $3, %xmm0, %eax
6944; AVX-NEXT:    testb $1, %al
6945; AVX-NEXT:    je LBB56_8
6946; AVX-NEXT:  ## BB#7: ## %cond.store5
6947; AVX-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
6948; AVX-NEXT:  LBB56_8: ## %else6
6949; AVX-NEXT:    vpextrb $4, %xmm0, %eax
6950; AVX-NEXT:    testb $1, %al
6951; AVX-NEXT:    je LBB56_10
6952; AVX-NEXT:  ## BB#9: ## %cond.store7
6953; AVX-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
6954; AVX-NEXT:  LBB56_10: ## %else8
6955; AVX-NEXT:    vpextrb $5, %xmm0, %eax
6956; AVX-NEXT:    testb $1, %al
6957; AVX-NEXT:    je LBB56_12
6958; AVX-NEXT:  ## BB#11: ## %cond.store9
6959; AVX-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
6960; AVX-NEXT:  LBB56_12: ## %else10
6961; AVX-NEXT:    vpextrb $6, %xmm0, %eax
6962; AVX-NEXT:    testb $1, %al
6963; AVX-NEXT:    je LBB56_14
6964; AVX-NEXT:  ## BB#13: ## %cond.store11
6965; AVX-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
6966; AVX-NEXT:  LBB56_14: ## %else12
6967; AVX-NEXT:    vpextrb $7, %xmm0, %eax
6968; AVX-NEXT:    testb $1, %al
6969; AVX-NEXT:    je LBB56_16
6970; AVX-NEXT:  ## BB#15: ## %cond.store13
6971; AVX-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
6972; AVX-NEXT:  LBB56_16: ## %else14
6973; AVX-NEXT:    vpextrb $8, %xmm0, %eax
6974; AVX-NEXT:    testb $1, %al
6975; AVX-NEXT:    je LBB56_18
6976; AVX-NEXT:  ## BB#17: ## %cond.store15
6977; AVX-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
6978; AVX-NEXT:  LBB56_18: ## %else16
6979; AVX-NEXT:    vpextrb $9, %xmm0, %eax
6980; AVX-NEXT:    testb $1, %al
6981; AVX-NEXT:    je LBB56_20
6982; AVX-NEXT:  ## BB#19: ## %cond.store17
6983; AVX-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
6984; AVX-NEXT:  LBB56_20: ## %else18
6985; AVX-NEXT:    vpextrb $10, %xmm0, %eax
6986; AVX-NEXT:    testb $1, %al
6987; AVX-NEXT:    je LBB56_22
6988; AVX-NEXT:  ## BB#21: ## %cond.store19
6989; AVX-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
6990; AVX-NEXT:  LBB56_22: ## %else20
6991; AVX-NEXT:    vpextrb $11, %xmm0, %eax
6992; AVX-NEXT:    testb $1, %al
6993; AVX-NEXT:    je LBB56_24
6994; AVX-NEXT:  ## BB#23: ## %cond.store21
6995; AVX-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
6996; AVX-NEXT:  LBB56_24: ## %else22
6997; AVX-NEXT:    vpextrb $12, %xmm0, %eax
6998; AVX-NEXT:    testb $1, %al
6999; AVX-NEXT:    je LBB56_26
7000; AVX-NEXT:  ## BB#25: ## %cond.store23
7001; AVX-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
7002; AVX-NEXT:  LBB56_26: ## %else24
7003; AVX-NEXT:    vpextrb $13, %xmm0, %eax
7004; AVX-NEXT:    testb $1, %al
7005; AVX-NEXT:    je LBB56_28
7006; AVX-NEXT:  ## BB#27: ## %cond.store25
7007; AVX-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
7008; AVX-NEXT:  LBB56_28: ## %else26
7009; AVX-NEXT:    vpextrb $14, %xmm0, %eax
7010; AVX-NEXT:    testb $1, %al
7011; AVX-NEXT:    je LBB56_30
7012; AVX-NEXT:  ## BB#29: ## %cond.store27
7013; AVX-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
7014; AVX-NEXT:  LBB56_30: ## %else28
7015; AVX-NEXT:    vpextrb $15, %xmm0, %eax
7016; AVX-NEXT:    testb $1, %al
7017; AVX-NEXT:    je LBB56_32
7018; AVX-NEXT:  ## BB#31: ## %cond.store29
7019; AVX-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
7020; AVX-NEXT:  LBB56_32: ## %else30
7021; AVX-NEXT:    retq
7022;
7023; AVX512F-LABEL: test_mask_store_16xi8:
7024; AVX512F:       ## BB#0:
7025; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
7026; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
7027; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
7028; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
7029; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7030; AVX512F-NEXT:    kmovw %k1, %eax
7031; AVX512F-NEXT:    testb %al, %al
7032; AVX512F-NEXT:    je LBB56_2
7033; AVX512F-NEXT:  ## BB#1: ## %cond.store
7034; AVX512F-NEXT:    vpextrb $0, %xmm1, (%rdi)
7035; AVX512F-NEXT:  LBB56_2: ## %else
7036; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
7037; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7038; AVX512F-NEXT:    kmovw %k1, %eax
7039; AVX512F-NEXT:    testb %al, %al
7040; AVX512F-NEXT:    je LBB56_4
7041; AVX512F-NEXT:  ## BB#3: ## %cond.store1
7042; AVX512F-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
7043; AVX512F-NEXT:  LBB56_4: ## %else2
7044; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
7045; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7046; AVX512F-NEXT:    kmovw %k1, %eax
7047; AVX512F-NEXT:    testb %al, %al
7048; AVX512F-NEXT:    je LBB56_6
7049; AVX512F-NEXT:  ## BB#5: ## %cond.store3
7050; AVX512F-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
7051; AVX512F-NEXT:  LBB56_6: ## %else4
7052; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
7053; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7054; AVX512F-NEXT:    kmovw %k1, %eax
7055; AVX512F-NEXT:    testb %al, %al
7056; AVX512F-NEXT:    je LBB56_8
7057; AVX512F-NEXT:  ## BB#7: ## %cond.store5
7058; AVX512F-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
7059; AVX512F-NEXT:  LBB56_8: ## %else6
7060; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
7061; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7062; AVX512F-NEXT:    kmovw %k1, %eax
7063; AVX512F-NEXT:    testb %al, %al
7064; AVX512F-NEXT:    je LBB56_10
7065; AVX512F-NEXT:  ## BB#9: ## %cond.store7
7066; AVX512F-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
7067; AVX512F-NEXT:  LBB56_10: ## %else8
7068; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
7069; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7070; AVX512F-NEXT:    kmovw %k1, %eax
7071; AVX512F-NEXT:    testb %al, %al
7072; AVX512F-NEXT:    je LBB56_12
7073; AVX512F-NEXT:  ## BB#11: ## %cond.store9
7074; AVX512F-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
7075; AVX512F-NEXT:  LBB56_12: ## %else10
7076; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
7077; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7078; AVX512F-NEXT:    kmovw %k1, %eax
7079; AVX512F-NEXT:    testb %al, %al
7080; AVX512F-NEXT:    je LBB56_14
7081; AVX512F-NEXT:  ## BB#13: ## %cond.store11
7082; AVX512F-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
7083; AVX512F-NEXT:  LBB56_14: ## %else12
7084; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
7085; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7086; AVX512F-NEXT:    kmovw %k1, %eax
7087; AVX512F-NEXT:    testb %al, %al
7088; AVX512F-NEXT:    je LBB56_16
7089; AVX512F-NEXT:  ## BB#15: ## %cond.store13
7090; AVX512F-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
7091; AVX512F-NEXT:  LBB56_16: ## %else14
7092; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
7093; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7094; AVX512F-NEXT:    kmovw %k1, %eax
7095; AVX512F-NEXT:    testb %al, %al
7096; AVX512F-NEXT:    je LBB56_18
7097; AVX512F-NEXT:  ## BB#17: ## %cond.store15
7098; AVX512F-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
7099; AVX512F-NEXT:  LBB56_18: ## %else16
7100; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
7101; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7102; AVX512F-NEXT:    kmovw %k1, %eax
7103; AVX512F-NEXT:    testb %al, %al
7104; AVX512F-NEXT:    je LBB56_20
7105; AVX512F-NEXT:  ## BB#19: ## %cond.store17
7106; AVX512F-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
7107; AVX512F-NEXT:  LBB56_20: ## %else18
7108; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
7109; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7110; AVX512F-NEXT:    kmovw %k1, %eax
7111; AVX512F-NEXT:    testb %al, %al
7112; AVX512F-NEXT:    je LBB56_22
7113; AVX512F-NEXT:  ## BB#21: ## %cond.store19
7114; AVX512F-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
7115; AVX512F-NEXT:  LBB56_22: ## %else20
7116; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
7117; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7118; AVX512F-NEXT:    kmovw %k1, %eax
7119; AVX512F-NEXT:    testb %al, %al
7120; AVX512F-NEXT:    je LBB56_24
7121; AVX512F-NEXT:  ## BB#23: ## %cond.store21
7122; AVX512F-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
7123; AVX512F-NEXT:  LBB56_24: ## %else22
7124; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
7125; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7126; AVX512F-NEXT:    kmovw %k1, %eax
7127; AVX512F-NEXT:    testb %al, %al
7128; AVX512F-NEXT:    je LBB56_26
7129; AVX512F-NEXT:  ## BB#25: ## %cond.store23
7130; AVX512F-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
7131; AVX512F-NEXT:  LBB56_26: ## %else24
7132; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
7133; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7134; AVX512F-NEXT:    kmovw %k1, %eax
7135; AVX512F-NEXT:    testb %al, %al
7136; AVX512F-NEXT:    je LBB56_28
7137; AVX512F-NEXT:  ## BB#27: ## %cond.store25
7138; AVX512F-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
7139; AVX512F-NEXT:  LBB56_28: ## %else26
7140; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
7141; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
7142; AVX512F-NEXT:    kmovw %k1, %eax
7143; AVX512F-NEXT:    testb %al, %al
7144; AVX512F-NEXT:    je LBB56_30
7145; AVX512F-NEXT:  ## BB#29: ## %cond.store27
7146; AVX512F-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
7147; AVX512F-NEXT:  LBB56_30: ## %else28
7148; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
7149; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
7150; AVX512F-NEXT:    kmovw %k0, %eax
7151; AVX512F-NEXT:    testb %al, %al
7152; AVX512F-NEXT:    je LBB56_32
7153; AVX512F-NEXT:  ## BB#31: ## %cond.store29
7154; AVX512F-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
7155; AVX512F-NEXT:  LBB56_32: ## %else30
7156; AVX512F-NEXT:    retq
7157;
7158; SKX-LABEL: test_mask_store_16xi8:
7159; SKX:       ## BB#0:
7160; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
7161; SKX-NEXT:    vpmovb2m %xmm0, %k1
7162; SKX-NEXT:    vmovdqu8 %xmm1, (%rdi) {%k1}
7163; SKX-NEXT:    retq
7164  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
7165  ret void
7166}
7167declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
7168
7169define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
7170; AVX1-LABEL: test_mask_store_32xi8:
7171; AVX1:       ## BB#0:
7172; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
7173; AVX1-NEXT:    testb $1, %al
7174; AVX1-NEXT:    je LBB57_2
7175; AVX1-NEXT:  ## BB#1: ## %cond.store
7176; AVX1-NEXT:    vpextrb $0, %xmm1, (%rdi)
7177; AVX1-NEXT:  LBB57_2: ## %else
7178; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
7179; AVX1-NEXT:    testb $1, %al
7180; AVX1-NEXT:    je LBB57_4
7181; AVX1-NEXT:  ## BB#3: ## %cond.store1
7182; AVX1-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
7183; AVX1-NEXT:  LBB57_4: ## %else2
7184; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
7185; AVX1-NEXT:    testb $1, %al
7186; AVX1-NEXT:    je LBB57_6
7187; AVX1-NEXT:  ## BB#5: ## %cond.store3
7188; AVX1-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
7189; AVX1-NEXT:  LBB57_6: ## %else4
7190; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
7191; AVX1-NEXT:    testb $1, %al
7192; AVX1-NEXT:    je LBB57_8
7193; AVX1-NEXT:  ## BB#7: ## %cond.store5
7194; AVX1-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
7195; AVX1-NEXT:  LBB57_8: ## %else6
7196; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
7197; AVX1-NEXT:    testb $1, %al
7198; AVX1-NEXT:    je LBB57_10
7199; AVX1-NEXT:  ## BB#9: ## %cond.store7
7200; AVX1-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
7201; AVX1-NEXT:  LBB57_10: ## %else8
7202; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
7203; AVX1-NEXT:    testb $1, %al
7204; AVX1-NEXT:    je LBB57_12
7205; AVX1-NEXT:  ## BB#11: ## %cond.store9
7206; AVX1-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
7207; AVX1-NEXT:  LBB57_12: ## %else10
7208; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
7209; AVX1-NEXT:    testb $1, %al
7210; AVX1-NEXT:    je LBB57_14
7211; AVX1-NEXT:  ## BB#13: ## %cond.store11
7212; AVX1-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
7213; AVX1-NEXT:  LBB57_14: ## %else12
7214; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
7215; AVX1-NEXT:    testb $1, %al
7216; AVX1-NEXT:    je LBB57_16
7217; AVX1-NEXT:  ## BB#15: ## %cond.store13
7218; AVX1-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
7219; AVX1-NEXT:  LBB57_16: ## %else14
7220; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
7221; AVX1-NEXT:    testb $1, %al
7222; AVX1-NEXT:    je LBB57_18
7223; AVX1-NEXT:  ## BB#17: ## %cond.store15
7224; AVX1-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
7225; AVX1-NEXT:  LBB57_18: ## %else16
7226; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
7227; AVX1-NEXT:    testb $1, %al
7228; AVX1-NEXT:    je LBB57_20
7229; AVX1-NEXT:  ## BB#19: ## %cond.store17
7230; AVX1-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
7231; AVX1-NEXT:  LBB57_20: ## %else18
7232; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
7233; AVX1-NEXT:    testb $1, %al
7234; AVX1-NEXT:    je LBB57_22
7235; AVX1-NEXT:  ## BB#21: ## %cond.store19
7236; AVX1-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
7237; AVX1-NEXT:  LBB57_22: ## %else20
7238; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
7239; AVX1-NEXT:    testb $1, %al
7240; AVX1-NEXT:    je LBB57_24
7241; AVX1-NEXT:  ## BB#23: ## %cond.store21
7242; AVX1-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
7243; AVX1-NEXT:  LBB57_24: ## %else22
7244; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
7245; AVX1-NEXT:    testb $1, %al
7246; AVX1-NEXT:    je LBB57_26
7247; AVX1-NEXT:  ## BB#25: ## %cond.store23
7248; AVX1-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
7249; AVX1-NEXT:  LBB57_26: ## %else24
7250; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
7251; AVX1-NEXT:    testb $1, %al
7252; AVX1-NEXT:    je LBB57_28
7253; AVX1-NEXT:  ## BB#27: ## %cond.store25
7254; AVX1-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
7255; AVX1-NEXT:  LBB57_28: ## %else26
7256; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
7257; AVX1-NEXT:    testb $1, %al
7258; AVX1-NEXT:    je LBB57_30
7259; AVX1-NEXT:  ## BB#29: ## %cond.store27
7260; AVX1-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
7261; AVX1-NEXT:  LBB57_30: ## %else28
7262; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
7263; AVX1-NEXT:    testb $1, %al
7264; AVX1-NEXT:    je LBB57_32
7265; AVX1-NEXT:  ## BB#31: ## %cond.store29
7266; AVX1-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
7267; AVX1-NEXT:  LBB57_32: ## %else30
7268; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
7269; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
7270; AVX1-NEXT:    testb $1, %al
7271; AVX1-NEXT:    je LBB57_34
7272; AVX1-NEXT:  ## BB#33: ## %cond.store31
7273; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7274; AVX1-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
7275; AVX1-NEXT:  LBB57_34: ## %else32
7276; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
7277; AVX1-NEXT:    testb $1, %al
7278; AVX1-NEXT:    je LBB57_36
7279; AVX1-NEXT:  ## BB#35: ## %cond.store33
7280; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7281; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
7282; AVX1-NEXT:  LBB57_36: ## %else34
7283; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
7284; AVX1-NEXT:    testb $1, %al
7285; AVX1-NEXT:    je LBB57_38
7286; AVX1-NEXT:  ## BB#37: ## %cond.store35
7287; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7288; AVX1-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
7289; AVX1-NEXT:  LBB57_38: ## %else36
7290; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
7291; AVX1-NEXT:    testb $1, %al
7292; AVX1-NEXT:    je LBB57_40
7293; AVX1-NEXT:  ## BB#39: ## %cond.store37
7294; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7295; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
7296; AVX1-NEXT:  LBB57_40: ## %else38
7297; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
7298; AVX1-NEXT:    testb $1, %al
7299; AVX1-NEXT:    je LBB57_42
7300; AVX1-NEXT:  ## BB#41: ## %cond.store39
7301; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7302; AVX1-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
7303; AVX1-NEXT:  LBB57_42: ## %else40
7304; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
7305; AVX1-NEXT:    testb $1, %al
7306; AVX1-NEXT:    je LBB57_44
7307; AVX1-NEXT:  ## BB#43: ## %cond.store41
7308; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7309; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
7310; AVX1-NEXT:  LBB57_44: ## %else42
7311; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
7312; AVX1-NEXT:    testb $1, %al
7313; AVX1-NEXT:    je LBB57_46
7314; AVX1-NEXT:  ## BB#45: ## %cond.store43
7315; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7316; AVX1-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
7317; AVX1-NEXT:  LBB57_46: ## %else44
7318; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
7319; AVX1-NEXT:    testb $1, %al
7320; AVX1-NEXT:    je LBB57_48
7321; AVX1-NEXT:  ## BB#47: ## %cond.store45
7322; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7323; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
7324; AVX1-NEXT:  LBB57_48: ## %else46
7325; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
7326; AVX1-NEXT:    testb $1, %al
7327; AVX1-NEXT:    je LBB57_50
7328; AVX1-NEXT:  ## BB#49: ## %cond.store47
7329; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7330; AVX1-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
7331; AVX1-NEXT:  LBB57_50: ## %else48
7332; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
7333; AVX1-NEXT:    testb $1, %al
7334; AVX1-NEXT:    je LBB57_52
7335; AVX1-NEXT:  ## BB#51: ## %cond.store49
7336; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7337; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
7338; AVX1-NEXT:  LBB57_52: ## %else50
7339; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
7340; AVX1-NEXT:    testb $1, %al
7341; AVX1-NEXT:    je LBB57_54
7342; AVX1-NEXT:  ## BB#53: ## %cond.store51
7343; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7344; AVX1-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
7345; AVX1-NEXT:  LBB57_54: ## %else52
7346; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
7347; AVX1-NEXT:    testb $1, %al
7348; AVX1-NEXT:    je LBB57_56
7349; AVX1-NEXT:  ## BB#55: ## %cond.store53
7350; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7351; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
7352; AVX1-NEXT:  LBB57_56: ## %else54
7353; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
7354; AVX1-NEXT:    testb $1, %al
7355; AVX1-NEXT:    je LBB57_58
7356; AVX1-NEXT:  ## BB#57: ## %cond.store55
7357; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7358; AVX1-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
7359; AVX1-NEXT:  LBB57_58: ## %else56
7360; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
7361; AVX1-NEXT:    testb $1, %al
7362; AVX1-NEXT:    je LBB57_60
7363; AVX1-NEXT:  ## BB#59: ## %cond.store57
7364; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7365; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
7366; AVX1-NEXT:  LBB57_60: ## %else58
7367; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
7368; AVX1-NEXT:    testb $1, %al
7369; AVX1-NEXT:    je LBB57_62
7370; AVX1-NEXT:  ## BB#61: ## %cond.store59
7371; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
7372; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
7373; AVX1-NEXT:  LBB57_62: ## %else60
7374; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
7375; AVX1-NEXT:    testb $1, %al
7376; AVX1-NEXT:    je LBB57_64
7377; AVX1-NEXT:  ## BB#63: ## %cond.store61
7378; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
7379; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
7380; AVX1-NEXT:  LBB57_64: ## %else62
7381; AVX1-NEXT:    vzeroupper
7382; AVX1-NEXT:    retq
7383;
7384; AVX2-LABEL: test_mask_store_32xi8:
7385; AVX2:       ## BB#0:
7386; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
7387; AVX2-NEXT:    testb $1, %al
7388; AVX2-NEXT:    je LBB57_2
7389; AVX2-NEXT:  ## BB#1: ## %cond.store
7390; AVX2-NEXT:    vpextrb $0, %xmm1, (%rdi)
7391; AVX2-NEXT:  LBB57_2: ## %else
7392; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
7393; AVX2-NEXT:    testb $1, %al
7394; AVX2-NEXT:    je LBB57_4
7395; AVX2-NEXT:  ## BB#3: ## %cond.store1
7396; AVX2-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
7397; AVX2-NEXT:  LBB57_4: ## %else2
7398; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
7399; AVX2-NEXT:    testb $1, %al
7400; AVX2-NEXT:    je LBB57_6
7401; AVX2-NEXT:  ## BB#5: ## %cond.store3
7402; AVX2-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
7403; AVX2-NEXT:  LBB57_6: ## %else4
7404; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
7405; AVX2-NEXT:    testb $1, %al
7406; AVX2-NEXT:    je LBB57_8
7407; AVX2-NEXT:  ## BB#7: ## %cond.store5
7408; AVX2-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
7409; AVX2-NEXT:  LBB57_8: ## %else6
7410; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
7411; AVX2-NEXT:    testb $1, %al
7412; AVX2-NEXT:    je LBB57_10
7413; AVX2-NEXT:  ## BB#9: ## %cond.store7
7414; AVX2-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
7415; AVX2-NEXT:  LBB57_10: ## %else8
7416; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
7417; AVX2-NEXT:    testb $1, %al
7418; AVX2-NEXT:    je LBB57_12
7419; AVX2-NEXT:  ## BB#11: ## %cond.store9
7420; AVX2-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
7421; AVX2-NEXT:  LBB57_12: ## %else10
7422; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
7423; AVX2-NEXT:    testb $1, %al
7424; AVX2-NEXT:    je LBB57_14
7425; AVX2-NEXT:  ## BB#13: ## %cond.store11
7426; AVX2-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
7427; AVX2-NEXT:  LBB57_14: ## %else12
7428; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
7429; AVX2-NEXT:    testb $1, %al
7430; AVX2-NEXT:    je LBB57_16
7431; AVX2-NEXT:  ## BB#15: ## %cond.store13
7432; AVX2-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
7433; AVX2-NEXT:  LBB57_16: ## %else14
7434; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
7435; AVX2-NEXT:    testb $1, %al
7436; AVX2-NEXT:    je LBB57_18
7437; AVX2-NEXT:  ## BB#17: ## %cond.store15
7438; AVX2-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
7439; AVX2-NEXT:  LBB57_18: ## %else16
7440; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
7441; AVX2-NEXT:    testb $1, %al
7442; AVX2-NEXT:    je LBB57_20
7443; AVX2-NEXT:  ## BB#19: ## %cond.store17
7444; AVX2-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
7445; AVX2-NEXT:  LBB57_20: ## %else18
7446; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
7447; AVX2-NEXT:    testb $1, %al
7448; AVX2-NEXT:    je LBB57_22
7449; AVX2-NEXT:  ## BB#21: ## %cond.store19
7450; AVX2-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
7451; AVX2-NEXT:  LBB57_22: ## %else20
7452; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
7453; AVX2-NEXT:    testb $1, %al
7454; AVX2-NEXT:    je LBB57_24
7455; AVX2-NEXT:  ## BB#23: ## %cond.store21
7456; AVX2-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
7457; AVX2-NEXT:  LBB57_24: ## %else22
7458; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
7459; AVX2-NEXT:    testb $1, %al
7460; AVX2-NEXT:    je LBB57_26
7461; AVX2-NEXT:  ## BB#25: ## %cond.store23
7462; AVX2-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
7463; AVX2-NEXT:  LBB57_26: ## %else24
7464; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
7465; AVX2-NEXT:    testb $1, %al
7466; AVX2-NEXT:    je LBB57_28
7467; AVX2-NEXT:  ## BB#27: ## %cond.store25
7468; AVX2-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
7469; AVX2-NEXT:  LBB57_28: ## %else26
7470; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
7471; AVX2-NEXT:    testb $1, %al
7472; AVX2-NEXT:    je LBB57_30
7473; AVX2-NEXT:  ## BB#29: ## %cond.store27
7474; AVX2-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
7475; AVX2-NEXT:  LBB57_30: ## %else28
7476; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
7477; AVX2-NEXT:    testb $1, %al
7478; AVX2-NEXT:    je LBB57_32
7479; AVX2-NEXT:  ## BB#31: ## %cond.store29
7480; AVX2-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
7481; AVX2-NEXT:  LBB57_32: ## %else30
7482; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
7483; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
7484; AVX2-NEXT:    testb $1, %al
7485; AVX2-NEXT:    je LBB57_34
7486; AVX2-NEXT:  ## BB#33: ## %cond.store31
7487; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7488; AVX2-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
7489; AVX2-NEXT:  LBB57_34: ## %else32
7490; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
7491; AVX2-NEXT:    testb $1, %al
7492; AVX2-NEXT:    je LBB57_36
7493; AVX2-NEXT:  ## BB#35: ## %cond.store33
7494; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7495; AVX2-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
7496; AVX2-NEXT:  LBB57_36: ## %else34
7497; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
7498; AVX2-NEXT:    testb $1, %al
7499; AVX2-NEXT:    je LBB57_38
7500; AVX2-NEXT:  ## BB#37: ## %cond.store35
7501; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7502; AVX2-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
7503; AVX2-NEXT:  LBB57_38: ## %else36
7504; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
7505; AVX2-NEXT:    testb $1, %al
7506; AVX2-NEXT:    je LBB57_40
7507; AVX2-NEXT:  ## BB#39: ## %cond.store37
7508; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7509; AVX2-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
7510; AVX2-NEXT:  LBB57_40: ## %else38
7511; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
7512; AVX2-NEXT:    testb $1, %al
7513; AVX2-NEXT:    je LBB57_42
7514; AVX2-NEXT:  ## BB#41: ## %cond.store39
7515; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7516; AVX2-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
7517; AVX2-NEXT:  LBB57_42: ## %else40
7518; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
7519; AVX2-NEXT:    testb $1, %al
7520; AVX2-NEXT:    je LBB57_44
7521; AVX2-NEXT:  ## BB#43: ## %cond.store41
7522; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7523; AVX2-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
7524; AVX2-NEXT:  LBB57_44: ## %else42
7525; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
7526; AVX2-NEXT:    testb $1, %al
7527; AVX2-NEXT:    je LBB57_46
7528; AVX2-NEXT:  ## BB#45: ## %cond.store43
7529; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7530; AVX2-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
7531; AVX2-NEXT:  LBB57_46: ## %else44
7532; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
7533; AVX2-NEXT:    testb $1, %al
7534; AVX2-NEXT:    je LBB57_48
7535; AVX2-NEXT:  ## BB#47: ## %cond.store45
7536; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7537; AVX2-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
7538; AVX2-NEXT:  LBB57_48: ## %else46
7539; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
7540; AVX2-NEXT:    testb $1, %al
7541; AVX2-NEXT:    je LBB57_50
7542; AVX2-NEXT:  ## BB#49: ## %cond.store47
7543; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7544; AVX2-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
7545; AVX2-NEXT:  LBB57_50: ## %else48
7546; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
7547; AVX2-NEXT:    testb $1, %al
7548; AVX2-NEXT:    je LBB57_52
7549; AVX2-NEXT:  ## BB#51: ## %cond.store49
7550; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7551; AVX2-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
7552; AVX2-NEXT:  LBB57_52: ## %else50
7553; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
7554; AVX2-NEXT:    testb $1, %al
7555; AVX2-NEXT:    je LBB57_54
7556; AVX2-NEXT:  ## BB#53: ## %cond.store51
7557; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7558; AVX2-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
7559; AVX2-NEXT:  LBB57_54: ## %else52
7560; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
7561; AVX2-NEXT:    testb $1, %al
7562; AVX2-NEXT:    je LBB57_56
7563; AVX2-NEXT:  ## BB#55: ## %cond.store53
7564; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7565; AVX2-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
7566; AVX2-NEXT:  LBB57_56: ## %else54
7567; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
7568; AVX2-NEXT:    testb $1, %al
7569; AVX2-NEXT:    je LBB57_58
7570; AVX2-NEXT:  ## BB#57: ## %cond.store55
7571; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7572; AVX2-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
7573; AVX2-NEXT:  LBB57_58: ## %else56
7574; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
7575; AVX2-NEXT:    testb $1, %al
7576; AVX2-NEXT:    je LBB57_60
7577; AVX2-NEXT:  ## BB#59: ## %cond.store57
7578; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7579; AVX2-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
7580; AVX2-NEXT:  LBB57_60: ## %else58
7581; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
7582; AVX2-NEXT:    testb $1, %al
7583; AVX2-NEXT:    je LBB57_62
7584; AVX2-NEXT:  ## BB#61: ## %cond.store59
7585; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
7586; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
7587; AVX2-NEXT:  LBB57_62: ## %else60
7588; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
7589; AVX2-NEXT:    testb $1, %al
7590; AVX2-NEXT:    je LBB57_64
7591; AVX2-NEXT:  ## BB#63: ## %cond.store61
7592; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
7593; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
7594; AVX2-NEXT:  LBB57_64: ## %else62
7595; AVX2-NEXT:    vzeroupper
7596; AVX2-NEXT:    retq
7597;
7598; AVX512F-LABEL: test_mask_store_32xi8:
7599; AVX512F:       ## BB#0:
7600; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
7601; AVX512F-NEXT:    testb $1, %al
7602; AVX512F-NEXT:    je LBB57_2
7603; AVX512F-NEXT:  ## BB#1: ## %cond.store
7604; AVX512F-NEXT:    vpextrb $0, %xmm1, (%rdi)
7605; AVX512F-NEXT:  LBB57_2: ## %else
7606; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
7607; AVX512F-NEXT:    testb $1, %al
7608; AVX512F-NEXT:    je LBB57_4
7609; AVX512F-NEXT:  ## BB#3: ## %cond.store1
7610; AVX512F-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
7611; AVX512F-NEXT:  LBB57_4: ## %else2
7612; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
7613; AVX512F-NEXT:    testb $1, %al
7614; AVX512F-NEXT:    je LBB57_6
7615; AVX512F-NEXT:  ## BB#5: ## %cond.store3
7616; AVX512F-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
7617; AVX512F-NEXT:  LBB57_6: ## %else4
7618; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
7619; AVX512F-NEXT:    testb $1, %al
7620; AVX512F-NEXT:    je LBB57_8
7621; AVX512F-NEXT:  ## BB#7: ## %cond.store5
7622; AVX512F-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
7623; AVX512F-NEXT:  LBB57_8: ## %else6
7624; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
7625; AVX512F-NEXT:    testb $1, %al
7626; AVX512F-NEXT:    je LBB57_10
7627; AVX512F-NEXT:  ## BB#9: ## %cond.store7
7628; AVX512F-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
7629; AVX512F-NEXT:  LBB57_10: ## %else8
7630; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
7631; AVX512F-NEXT:    testb $1, %al
7632; AVX512F-NEXT:    je LBB57_12
7633; AVX512F-NEXT:  ## BB#11: ## %cond.store9
7634; AVX512F-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
7635; AVX512F-NEXT:  LBB57_12: ## %else10
7636; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
7637; AVX512F-NEXT:    testb $1, %al
7638; AVX512F-NEXT:    je LBB57_14
7639; AVX512F-NEXT:  ## BB#13: ## %cond.store11
7640; AVX512F-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
7641; AVX512F-NEXT:  LBB57_14: ## %else12
7642; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
7643; AVX512F-NEXT:    testb $1, %al
7644; AVX512F-NEXT:    je LBB57_16
7645; AVX512F-NEXT:  ## BB#15: ## %cond.store13
7646; AVX512F-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
7647; AVX512F-NEXT:  LBB57_16: ## %else14
7648; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
7649; AVX512F-NEXT:    testb $1, %al
7650; AVX512F-NEXT:    je LBB57_18
7651; AVX512F-NEXT:  ## BB#17: ## %cond.store15
7652; AVX512F-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
7653; AVX512F-NEXT:  LBB57_18: ## %else16
7654; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
7655; AVX512F-NEXT:    testb $1, %al
7656; AVX512F-NEXT:    je LBB57_20
7657; AVX512F-NEXT:  ## BB#19: ## %cond.store17
7658; AVX512F-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
7659; AVX512F-NEXT:  LBB57_20: ## %else18
7660; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
7661; AVX512F-NEXT:    testb $1, %al
7662; AVX512F-NEXT:    je LBB57_22
7663; AVX512F-NEXT:  ## BB#21: ## %cond.store19
7664; AVX512F-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
7665; AVX512F-NEXT:  LBB57_22: ## %else20
7666; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
7667; AVX512F-NEXT:    testb $1, %al
7668; AVX512F-NEXT:    je LBB57_24
7669; AVX512F-NEXT:  ## BB#23: ## %cond.store21
7670; AVX512F-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
7671; AVX512F-NEXT:  LBB57_24: ## %else22
7672; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
7673; AVX512F-NEXT:    testb $1, %al
7674; AVX512F-NEXT:    je LBB57_26
7675; AVX512F-NEXT:  ## BB#25: ## %cond.store23
7676; AVX512F-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
7677; AVX512F-NEXT:  LBB57_26: ## %else24
7678; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
7679; AVX512F-NEXT:    testb $1, %al
7680; AVX512F-NEXT:    je LBB57_28
7681; AVX512F-NEXT:  ## BB#27: ## %cond.store25
7682; AVX512F-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
7683; AVX512F-NEXT:  LBB57_28: ## %else26
7684; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
7685; AVX512F-NEXT:    testb $1, %al
7686; AVX512F-NEXT:    je LBB57_30
7687; AVX512F-NEXT:  ## BB#29: ## %cond.store27
7688; AVX512F-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
7689; AVX512F-NEXT:  LBB57_30: ## %else28
7690; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
7691; AVX512F-NEXT:    testb $1, %al
7692; AVX512F-NEXT:    je LBB57_32
7693; AVX512F-NEXT:  ## BB#31: ## %cond.store29
7694; AVX512F-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
7695; AVX512F-NEXT:  LBB57_32: ## %else30
7696; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
7697; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
7698; AVX512F-NEXT:    testb $1, %al
7699; AVX512F-NEXT:    je LBB57_34
7700; AVX512F-NEXT:  ## BB#33: ## %cond.store31
7701; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7702; AVX512F-NEXT:    vpextrb $0, %xmm2, 16(%rdi)
7703; AVX512F-NEXT:  LBB57_34: ## %else32
7704; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
7705; AVX512F-NEXT:    testb $1, %al
7706; AVX512F-NEXT:    je LBB57_36
7707; AVX512F-NEXT:  ## BB#35: ## %cond.store33
7708; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7709; AVX512F-NEXT:    vpextrb $1, %xmm2, 17(%rdi)
7710; AVX512F-NEXT:  LBB57_36: ## %else34
7711; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
7712; AVX512F-NEXT:    testb $1, %al
7713; AVX512F-NEXT:    je LBB57_38
7714; AVX512F-NEXT:  ## BB#37: ## %cond.store35
7715; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7716; AVX512F-NEXT:    vpextrb $2, %xmm2, 18(%rdi)
7717; AVX512F-NEXT:  LBB57_38: ## %else36
7718; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
7719; AVX512F-NEXT:    testb $1, %al
7720; AVX512F-NEXT:    je LBB57_40
7721; AVX512F-NEXT:  ## BB#39: ## %cond.store37
7722; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7723; AVX512F-NEXT:    vpextrb $3, %xmm2, 19(%rdi)
7724; AVX512F-NEXT:  LBB57_40: ## %else38
7725; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
7726; AVX512F-NEXT:    testb $1, %al
7727; AVX512F-NEXT:    je LBB57_42
7728; AVX512F-NEXT:  ## BB#41: ## %cond.store39
7729; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7730; AVX512F-NEXT:    vpextrb $4, %xmm2, 20(%rdi)
7731; AVX512F-NEXT:  LBB57_42: ## %else40
7732; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
7733; AVX512F-NEXT:    testb $1, %al
7734; AVX512F-NEXT:    je LBB57_44
7735; AVX512F-NEXT:  ## BB#43: ## %cond.store41
7736; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7737; AVX512F-NEXT:    vpextrb $5, %xmm2, 21(%rdi)
7738; AVX512F-NEXT:  LBB57_44: ## %else42
7739; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
7740; AVX512F-NEXT:    testb $1, %al
7741; AVX512F-NEXT:    je LBB57_46
7742; AVX512F-NEXT:  ## BB#45: ## %cond.store43
7743; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7744; AVX512F-NEXT:    vpextrb $6, %xmm2, 22(%rdi)
7745; AVX512F-NEXT:  LBB57_46: ## %else44
7746; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
7747; AVX512F-NEXT:    testb $1, %al
7748; AVX512F-NEXT:    je LBB57_48
7749; AVX512F-NEXT:  ## BB#47: ## %cond.store45
7750; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7751; AVX512F-NEXT:    vpextrb $7, %xmm2, 23(%rdi)
7752; AVX512F-NEXT:  LBB57_48: ## %else46
7753; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
7754; AVX512F-NEXT:    testb $1, %al
7755; AVX512F-NEXT:    je LBB57_50
7756; AVX512F-NEXT:  ## BB#49: ## %cond.store47
7757; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7758; AVX512F-NEXT:    vpextrb $8, %xmm2, 24(%rdi)
7759; AVX512F-NEXT:  LBB57_50: ## %else48
7760; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
7761; AVX512F-NEXT:    testb $1, %al
7762; AVX512F-NEXT:    je LBB57_52
7763; AVX512F-NEXT:  ## BB#51: ## %cond.store49
7764; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7765; AVX512F-NEXT:    vpextrb $9, %xmm2, 25(%rdi)
7766; AVX512F-NEXT:  LBB57_52: ## %else50
7767; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
7768; AVX512F-NEXT:    testb $1, %al
7769; AVX512F-NEXT:    je LBB57_54
7770; AVX512F-NEXT:  ## BB#53: ## %cond.store51
7771; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7772; AVX512F-NEXT:    vpextrb $10, %xmm2, 26(%rdi)
7773; AVX512F-NEXT:  LBB57_54: ## %else52
7774; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
7775; AVX512F-NEXT:    testb $1, %al
7776; AVX512F-NEXT:    je LBB57_56
7777; AVX512F-NEXT:  ## BB#55: ## %cond.store53
7778; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7779; AVX512F-NEXT:    vpextrb $11, %xmm2, 27(%rdi)
7780; AVX512F-NEXT:  LBB57_56: ## %else54
7781; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
7782; AVX512F-NEXT:    testb $1, %al
7783; AVX512F-NEXT:    je LBB57_58
7784; AVX512F-NEXT:  ## BB#57: ## %cond.store55
7785; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7786; AVX512F-NEXT:    vpextrb $12, %xmm2, 28(%rdi)
7787; AVX512F-NEXT:  LBB57_58: ## %else56
7788; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
7789; AVX512F-NEXT:    testb $1, %al
7790; AVX512F-NEXT:    je LBB57_60
7791; AVX512F-NEXT:  ## BB#59: ## %cond.store57
7792; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7793; AVX512F-NEXT:    vpextrb $13, %xmm2, 29(%rdi)
7794; AVX512F-NEXT:  LBB57_60: ## %else58
7795; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
7796; AVX512F-NEXT:    testb $1, %al
7797; AVX512F-NEXT:    je LBB57_62
7798; AVX512F-NEXT:  ## BB#61: ## %cond.store59
7799; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
7800; AVX512F-NEXT:    vpextrb $14, %xmm2, 30(%rdi)
7801; AVX512F-NEXT:  LBB57_62: ## %else60
7802; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
7803; AVX512F-NEXT:    testb $1, %al
7804; AVX512F-NEXT:    je LBB57_64
7805; AVX512F-NEXT:  ## BB#63: ## %cond.store61
7806; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
7807; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
7808; AVX512F-NEXT:  LBB57_64: ## %else62
7809; AVX512F-NEXT:    retq
7810;
7811; SKX-LABEL: test_mask_store_32xi8:
7812; SKX:       ## BB#0:
7813; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
7814; SKX-NEXT:    vpmovb2m %ymm0, %k1
7815; SKX-NEXT:    vmovdqu8 %ymm1, (%rdi) {%k1}
7816; SKX-NEXT:    retq
7817  call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
7818  ret void
7819}
7820declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
7821
7822define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) {
7823; AVX1-LABEL: test_mask_store_64xi8:
7824; AVX1:       ## BB#0:
7825; AVX1-NEXT:    movq {{[0-9]+}}(%rsp), %rax
7826; AVX1-NEXT:    testb $1, %dil
7827; AVX1-NEXT:    je LBB58_2
7828; AVX1-NEXT:  ## BB#1: ## %cond.store
7829; AVX1-NEXT:    vpextrb $0, %xmm0, (%rax)
7830; AVX1-NEXT:  LBB58_2: ## %else
7831; AVX1-NEXT:    testb $1, %sil
7832; AVX1-NEXT:    je LBB58_4
7833; AVX1-NEXT:  ## BB#3: ## %cond.store1
7834; AVX1-NEXT:    vpextrb $1, %xmm0, 1(%rax)
7835; AVX1-NEXT:  LBB58_4: ## %else2
7836; AVX1-NEXT:    testb $1, %dl
7837; AVX1-NEXT:    je LBB58_6
7838; AVX1-NEXT:  ## BB#5: ## %cond.store3
7839; AVX1-NEXT:    vpextrb $2, %xmm0, 2(%rax)
7840; AVX1-NEXT:  LBB58_6: ## %else4
7841; AVX1-NEXT:    testb $1, %cl
7842; AVX1-NEXT:    je LBB58_8
7843; AVX1-NEXT:  ## BB#7: ## %cond.store5
7844; AVX1-NEXT:    vpextrb $3, %xmm0, 3(%rax)
7845; AVX1-NEXT:  LBB58_8: ## %else6
7846; AVX1-NEXT:    testb $1, %r8b
7847; AVX1-NEXT:    je LBB58_10
7848; AVX1-NEXT:  ## BB#9: ## %cond.store7
7849; AVX1-NEXT:    vpextrb $4, %xmm0, 4(%rax)
7850; AVX1-NEXT:  LBB58_10: ## %else8
7851; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7852; AVX1-NEXT:    testb $1, %r9b
7853; AVX1-NEXT:    je LBB58_12
7854; AVX1-NEXT:  ## BB#11: ## %cond.store9
7855; AVX1-NEXT:    vpextrb $5, %xmm0, 5(%rax)
7856; AVX1-NEXT:  LBB58_12: ## %else10
7857; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7858; AVX1-NEXT:    testb $1, %cl
7859; AVX1-NEXT:    je LBB58_14
7860; AVX1-NEXT:  ## BB#13: ## %cond.store11
7861; AVX1-NEXT:    vpextrb $6, %xmm0, 6(%rax)
7862; AVX1-NEXT:  LBB58_14: ## %else12
7863; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7864; AVX1-NEXT:    testb $1, %dl
7865; AVX1-NEXT:    je LBB58_16
7866; AVX1-NEXT:  ## BB#15: ## %cond.store13
7867; AVX1-NEXT:    vpextrb $7, %xmm0, 7(%rax)
7868; AVX1-NEXT:  LBB58_16: ## %else14
7869; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7870; AVX1-NEXT:    testb $1, %cl
7871; AVX1-NEXT:    je LBB58_18
7872; AVX1-NEXT:  ## BB#17: ## %cond.store15
7873; AVX1-NEXT:    vpextrb $8, %xmm0, 8(%rax)
7874; AVX1-NEXT:  LBB58_18: ## %else16
7875; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7876; AVX1-NEXT:    testb $1, %dl
7877; AVX1-NEXT:    je LBB58_20
7878; AVX1-NEXT:  ## BB#19: ## %cond.store17
7879; AVX1-NEXT:    vpextrb $9, %xmm0, 9(%rax)
7880; AVX1-NEXT:  LBB58_20: ## %else18
7881; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7882; AVX1-NEXT:    testb $1, %cl
7883; AVX1-NEXT:    je LBB58_22
7884; AVX1-NEXT:  ## BB#21: ## %cond.store19
7885; AVX1-NEXT:    vpextrb $10, %xmm0, 10(%rax)
7886; AVX1-NEXT:  LBB58_22: ## %else20
7887; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7888; AVX1-NEXT:    testb $1, %dl
7889; AVX1-NEXT:    je LBB58_24
7890; AVX1-NEXT:  ## BB#23: ## %cond.store21
7891; AVX1-NEXT:    vpextrb $11, %xmm0, 11(%rax)
7892; AVX1-NEXT:  LBB58_24: ## %else22
7893; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7894; AVX1-NEXT:    testb $1, %cl
7895; AVX1-NEXT:    je LBB58_26
7896; AVX1-NEXT:  ## BB#25: ## %cond.store23
7897; AVX1-NEXT:    vpextrb $12, %xmm0, 12(%rax)
7898; AVX1-NEXT:  LBB58_26: ## %else24
7899; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7900; AVX1-NEXT:    testb $1, %dl
7901; AVX1-NEXT:    je LBB58_28
7902; AVX1-NEXT:  ## BB#27: ## %cond.store25
7903; AVX1-NEXT:    vpextrb $13, %xmm0, 13(%rax)
7904; AVX1-NEXT:  LBB58_28: ## %else26
7905; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7906; AVX1-NEXT:    testb $1, %cl
7907; AVX1-NEXT:    je LBB58_30
7908; AVX1-NEXT:  ## BB#29: ## %cond.store27
7909; AVX1-NEXT:    vpextrb $14, %xmm0, 14(%rax)
7910; AVX1-NEXT:  LBB58_30: ## %else28
7911; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7912; AVX1-NEXT:    testb $1, %dl
7913; AVX1-NEXT:    je LBB58_32
7914; AVX1-NEXT:  ## BB#31: ## %cond.store29
7915; AVX1-NEXT:    vpextrb $15, %xmm0, 15(%rax)
7916; AVX1-NEXT:  LBB58_32: ## %else30
7917; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7918; AVX1-NEXT:    testb $1, %cl
7919; AVX1-NEXT:    je LBB58_34
7920; AVX1-NEXT:  ## BB#33: ## %cond.store31
7921; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7922; AVX1-NEXT:    vpextrb $0, %xmm2, 16(%rax)
7923; AVX1-NEXT:  LBB58_34: ## %else32
7924; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7925; AVX1-NEXT:    testb $1, %dl
7926; AVX1-NEXT:    je LBB58_36
7927; AVX1-NEXT:  ## BB#35: ## %cond.store33
7928; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7929; AVX1-NEXT:    vpextrb $1, %xmm2, 17(%rax)
7930; AVX1-NEXT:  LBB58_36: ## %else34
7931; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7932; AVX1-NEXT:    testb $1, %cl
7933; AVX1-NEXT:    je LBB58_38
7934; AVX1-NEXT:  ## BB#37: ## %cond.store35
7935; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7936; AVX1-NEXT:    vpextrb $2, %xmm2, 18(%rax)
7937; AVX1-NEXT:  LBB58_38: ## %else36
7938; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7939; AVX1-NEXT:    testb $1, %dl
7940; AVX1-NEXT:    je LBB58_40
7941; AVX1-NEXT:  ## BB#39: ## %cond.store37
7942; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7943; AVX1-NEXT:    vpextrb $3, %xmm2, 19(%rax)
7944; AVX1-NEXT:  LBB58_40: ## %else38
7945; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7946; AVX1-NEXT:    testb $1, %cl
7947; AVX1-NEXT:    je LBB58_42
7948; AVX1-NEXT:  ## BB#41: ## %cond.store39
7949; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7950; AVX1-NEXT:    vpextrb $4, %xmm2, 20(%rax)
7951; AVX1-NEXT:  LBB58_42: ## %else40
7952; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7953; AVX1-NEXT:    testb $1, %dl
7954; AVX1-NEXT:    je LBB58_44
7955; AVX1-NEXT:  ## BB#43: ## %cond.store41
7956; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7957; AVX1-NEXT:    vpextrb $5, %xmm2, 21(%rax)
7958; AVX1-NEXT:  LBB58_44: ## %else42
7959; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7960; AVX1-NEXT:    testb $1, %cl
7961; AVX1-NEXT:    je LBB58_46
7962; AVX1-NEXT:  ## BB#45: ## %cond.store43
7963; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7964; AVX1-NEXT:    vpextrb $6, %xmm2, 22(%rax)
7965; AVX1-NEXT:  LBB58_46: ## %else44
7966; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7967; AVX1-NEXT:    testb $1, %dl
7968; AVX1-NEXT:    je LBB58_48
7969; AVX1-NEXT:  ## BB#47: ## %cond.store45
7970; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7971; AVX1-NEXT:    vpextrb $7, %xmm2, 23(%rax)
7972; AVX1-NEXT:  LBB58_48: ## %else46
7973; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7974; AVX1-NEXT:    testb $1, %cl
7975; AVX1-NEXT:    je LBB58_50
7976; AVX1-NEXT:  ## BB#49: ## %cond.store47
7977; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7978; AVX1-NEXT:    vpextrb $8, %xmm2, 24(%rax)
7979; AVX1-NEXT:  LBB58_50: ## %else48
7980; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7981; AVX1-NEXT:    testb $1, %dl
7982; AVX1-NEXT:    je LBB58_52
7983; AVX1-NEXT:  ## BB#51: ## %cond.store49
7984; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7985; AVX1-NEXT:    vpextrb $9, %xmm2, 25(%rax)
7986; AVX1-NEXT:  LBB58_52: ## %else50
7987; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
7988; AVX1-NEXT:    testb $1, %cl
7989; AVX1-NEXT:    je LBB58_54
7990; AVX1-NEXT:  ## BB#53: ## %cond.store51
7991; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7992; AVX1-NEXT:    vpextrb $10, %xmm2, 26(%rax)
7993; AVX1-NEXT:  LBB58_54: ## %else52
7994; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
7995; AVX1-NEXT:    testb $1, %dl
7996; AVX1-NEXT:    je LBB58_56
7997; AVX1-NEXT:  ## BB#55: ## %cond.store53
7998; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
7999; AVX1-NEXT:    vpextrb $11, %xmm2, 27(%rax)
8000; AVX1-NEXT:  LBB58_56: ## %else54
8001; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8002; AVX1-NEXT:    testb $1, %cl
8003; AVX1-NEXT:    je LBB58_58
8004; AVX1-NEXT:  ## BB#57: ## %cond.store55
8005; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
8006; AVX1-NEXT:    vpextrb $12, %xmm2, 28(%rax)
8007; AVX1-NEXT:  LBB58_58: ## %else56
8008; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8009; AVX1-NEXT:    testb $1, %dl
8010; AVX1-NEXT:    je LBB58_60
8011; AVX1-NEXT:  ## BB#59: ## %cond.store57
8012; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
8013; AVX1-NEXT:    vpextrb $13, %xmm2, 29(%rax)
8014; AVX1-NEXT:  LBB58_60: ## %else58
8015; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8016; AVX1-NEXT:    testb $1, %cl
8017; AVX1-NEXT:    je LBB58_62
8018; AVX1-NEXT:  ## BB#61: ## %cond.store59
8019; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
8020; AVX1-NEXT:    vpextrb $14, %xmm2, 30(%rax)
8021; AVX1-NEXT:  LBB58_62: ## %else60
8022; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8023; AVX1-NEXT:    testb $1, %dl
8024; AVX1-NEXT:    je LBB58_64
8025; AVX1-NEXT:  ## BB#63: ## %cond.store61
8026; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
8027; AVX1-NEXT:    vpextrb $15, %xmm0, 31(%rax)
8028; AVX1-NEXT:  LBB58_64: ## %else62
8029; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8030; AVX1-NEXT:    testb $1, %cl
8031; AVX1-NEXT:    je LBB58_66
8032; AVX1-NEXT:  ## BB#65: ## %cond.store63
8033; AVX1-NEXT:    vpextrb $0, %xmm1, 32(%rax)
8034; AVX1-NEXT:  LBB58_66: ## %else64
8035; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8036; AVX1-NEXT:    testb $1, %dl
8037; AVX1-NEXT:    je LBB58_68
8038; AVX1-NEXT:  ## BB#67: ## %cond.store65
8039; AVX1-NEXT:    vpextrb $1, %xmm1, 33(%rax)
8040; AVX1-NEXT:  LBB58_68: ## %else66
8041; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8042; AVX1-NEXT:    testb $1, %cl
8043; AVX1-NEXT:    je LBB58_70
8044; AVX1-NEXT:  ## BB#69: ## %cond.store67
8045; AVX1-NEXT:    vpextrb $2, %xmm1, 34(%rax)
8046; AVX1-NEXT:  LBB58_70: ## %else68
8047; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8048; AVX1-NEXT:    testb $1, %dl
8049; AVX1-NEXT:    je LBB58_72
8050; AVX1-NEXT:  ## BB#71: ## %cond.store69
8051; AVX1-NEXT:    vpextrb $3, %xmm1, 35(%rax)
8052; AVX1-NEXT:  LBB58_72: ## %else70
8053; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8054; AVX1-NEXT:    testb $1, %cl
8055; AVX1-NEXT:    je LBB58_74
8056; AVX1-NEXT:  ## BB#73: ## %cond.store71
8057; AVX1-NEXT:    vpextrb $4, %xmm1, 36(%rax)
8058; AVX1-NEXT:  LBB58_74: ## %else72
8059; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8060; AVX1-NEXT:    testb $1, %dl
8061; AVX1-NEXT:    je LBB58_76
8062; AVX1-NEXT:  ## BB#75: ## %cond.store73
8063; AVX1-NEXT:    vpextrb $5, %xmm1, 37(%rax)
8064; AVX1-NEXT:  LBB58_76: ## %else74
8065; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8066; AVX1-NEXT:    testb $1, %cl
8067; AVX1-NEXT:    je LBB58_78
8068; AVX1-NEXT:  ## BB#77: ## %cond.store75
8069; AVX1-NEXT:    vpextrb $6, %xmm1, 38(%rax)
8070; AVX1-NEXT:  LBB58_78: ## %else76
8071; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8072; AVX1-NEXT:    testb $1, %dl
8073; AVX1-NEXT:    je LBB58_80
8074; AVX1-NEXT:  ## BB#79: ## %cond.store77
8075; AVX1-NEXT:    vpextrb $7, %xmm1, 39(%rax)
8076; AVX1-NEXT:  LBB58_80: ## %else78
8077; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8078; AVX1-NEXT:    testb $1, %cl
8079; AVX1-NEXT:    je LBB58_82
8080; AVX1-NEXT:  ## BB#81: ## %cond.store79
8081; AVX1-NEXT:    vpextrb $8, %xmm1, 40(%rax)
8082; AVX1-NEXT:  LBB58_82: ## %else80
8083; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8084; AVX1-NEXT:    testb $1, %dl
8085; AVX1-NEXT:    je LBB58_84
8086; AVX1-NEXT:  ## BB#83: ## %cond.store81
8087; AVX1-NEXT:    vpextrb $9, %xmm1, 41(%rax)
8088; AVX1-NEXT:  LBB58_84: ## %else82
8089; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8090; AVX1-NEXT:    testb $1, %cl
8091; AVX1-NEXT:    je LBB58_86
8092; AVX1-NEXT:  ## BB#85: ## %cond.store83
8093; AVX1-NEXT:    vpextrb $10, %xmm1, 42(%rax)
8094; AVX1-NEXT:  LBB58_86: ## %else84
8095; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8096; AVX1-NEXT:    testb $1, %dl
8097; AVX1-NEXT:    je LBB58_88
8098; AVX1-NEXT:  ## BB#87: ## %cond.store85
8099; AVX1-NEXT:    vpextrb $11, %xmm1, 43(%rax)
8100; AVX1-NEXT:  LBB58_88: ## %else86
8101; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8102; AVX1-NEXT:    testb $1, %cl
8103; AVX1-NEXT:    je LBB58_90
8104; AVX1-NEXT:  ## BB#89: ## %cond.store87
8105; AVX1-NEXT:    vpextrb $12, %xmm1, 44(%rax)
8106; AVX1-NEXT:  LBB58_90: ## %else88
8107; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8108; AVX1-NEXT:    testb $1, %dl
8109; AVX1-NEXT:    je LBB58_92
8110; AVX1-NEXT:  ## BB#91: ## %cond.store89
8111; AVX1-NEXT:    vpextrb $13, %xmm1, 45(%rax)
8112; AVX1-NEXT:  LBB58_92: ## %else90
8113; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8114; AVX1-NEXT:    testb $1, %cl
8115; AVX1-NEXT:    je LBB58_94
8116; AVX1-NEXT:  ## BB#93: ## %cond.store91
8117; AVX1-NEXT:    vpextrb $14, %xmm1, 46(%rax)
8118; AVX1-NEXT:  LBB58_94: ## %else92
8119; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8120; AVX1-NEXT:    testb $1, %dl
8121; AVX1-NEXT:    je LBB58_96
8122; AVX1-NEXT:  ## BB#95: ## %cond.store93
8123; AVX1-NEXT:    vpextrb $15, %xmm1, 47(%rax)
8124; AVX1-NEXT:  LBB58_96: ## %else94
8125; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8126; AVX1-NEXT:    testb $1, %cl
8127; AVX1-NEXT:    je LBB58_98
8128; AVX1-NEXT:  ## BB#97: ## %cond.store95
8129; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8130; AVX1-NEXT:    vpextrb $0, %xmm0, 48(%rax)
8131; AVX1-NEXT:  LBB58_98: ## %else96
8132; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8133; AVX1-NEXT:    testb $1, %dl
8134; AVX1-NEXT:    je LBB58_100
8135; AVX1-NEXT:  ## BB#99: ## %cond.store97
8136; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8137; AVX1-NEXT:    vpextrb $1, %xmm0, 49(%rax)
8138; AVX1-NEXT:  LBB58_100: ## %else98
8139; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8140; AVX1-NEXT:    testb $1, %cl
8141; AVX1-NEXT:    je LBB58_102
8142; AVX1-NEXT:  ## BB#101: ## %cond.store99
8143; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8144; AVX1-NEXT:    vpextrb $2, %xmm0, 50(%rax)
8145; AVX1-NEXT:  LBB58_102: ## %else100
8146; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8147; AVX1-NEXT:    testb $1, %dl
8148; AVX1-NEXT:    je LBB58_104
8149; AVX1-NEXT:  ## BB#103: ## %cond.store101
8150; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8151; AVX1-NEXT:    vpextrb $3, %xmm0, 51(%rax)
8152; AVX1-NEXT:  LBB58_104: ## %else102
8153; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8154; AVX1-NEXT:    testb $1, %cl
8155; AVX1-NEXT:    je LBB58_106
8156; AVX1-NEXT:  ## BB#105: ## %cond.store103
8157; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8158; AVX1-NEXT:    vpextrb $4, %xmm0, 52(%rax)
8159; AVX1-NEXT:  LBB58_106: ## %else104
8160; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8161; AVX1-NEXT:    testb $1, %dl
8162; AVX1-NEXT:    je LBB58_108
8163; AVX1-NEXT:  ## BB#107: ## %cond.store105
8164; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8165; AVX1-NEXT:    vpextrb $5, %xmm0, 53(%rax)
8166; AVX1-NEXT:  LBB58_108: ## %else106
8167; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8168; AVX1-NEXT:    testb $1, %cl
8169; AVX1-NEXT:    je LBB58_110
8170; AVX1-NEXT:  ## BB#109: ## %cond.store107
8171; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8172; AVX1-NEXT:    vpextrb $6, %xmm0, 54(%rax)
8173; AVX1-NEXT:  LBB58_110: ## %else108
8174; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8175; AVX1-NEXT:    testb $1, %dl
8176; AVX1-NEXT:    je LBB58_112
8177; AVX1-NEXT:  ## BB#111: ## %cond.store109
8178; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8179; AVX1-NEXT:    vpextrb $7, %xmm0, 55(%rax)
8180; AVX1-NEXT:  LBB58_112: ## %else110
8181; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8182; AVX1-NEXT:    testb $1, %cl
8183; AVX1-NEXT:    je LBB58_114
8184; AVX1-NEXT:  ## BB#113: ## %cond.store111
8185; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8186; AVX1-NEXT:    vpextrb $8, %xmm0, 56(%rax)
8187; AVX1-NEXT:  LBB58_114: ## %else112
8188; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8189; AVX1-NEXT:    testb $1, %dl
8190; AVX1-NEXT:    je LBB58_116
8191; AVX1-NEXT:  ## BB#115: ## %cond.store113
8192; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8193; AVX1-NEXT:    vpextrb $9, %xmm0, 57(%rax)
8194; AVX1-NEXT:  LBB58_116: ## %else114
8195; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8196; AVX1-NEXT:    testb $1, %cl
8197; AVX1-NEXT:    je LBB58_118
8198; AVX1-NEXT:  ## BB#117: ## %cond.store115
8199; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8200; AVX1-NEXT:    vpextrb $10, %xmm0, 58(%rax)
8201; AVX1-NEXT:  LBB58_118: ## %else116
8202; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8203; AVX1-NEXT:    testb $1, %dl
8204; AVX1-NEXT:    je LBB58_120
8205; AVX1-NEXT:  ## BB#119: ## %cond.store117
8206; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8207; AVX1-NEXT:    vpextrb $11, %xmm0, 59(%rax)
8208; AVX1-NEXT:  LBB58_120: ## %else118
8209; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8210; AVX1-NEXT:    testb $1, %cl
8211; AVX1-NEXT:    je LBB58_122
8212; AVX1-NEXT:  ## BB#121: ## %cond.store119
8213; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8214; AVX1-NEXT:    vpextrb $12, %xmm0, 60(%rax)
8215; AVX1-NEXT:  LBB58_122: ## %else120
8216; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8217; AVX1-NEXT:    testb $1, %dl
8218; AVX1-NEXT:    je LBB58_124
8219; AVX1-NEXT:  ## BB#123: ## %cond.store121
8220; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8221; AVX1-NEXT:    vpextrb $13, %xmm0, 61(%rax)
8222; AVX1-NEXT:  LBB58_124: ## %else122
8223; AVX1-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8224; AVX1-NEXT:    testb $1, %cl
8225; AVX1-NEXT:    je LBB58_126
8226; AVX1-NEXT:  ## BB#125: ## %cond.store123
8227; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8228; AVX1-NEXT:    vpextrb $14, %xmm0, 62(%rax)
8229; AVX1-NEXT:  LBB58_126: ## %else124
8230; AVX1-NEXT:    testb $1, %dl
8231; AVX1-NEXT:    je LBB58_128
8232; AVX1-NEXT:  ## BB#127: ## %cond.store125
8233; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
8234; AVX1-NEXT:    vpextrb $15, %xmm0, 63(%rax)
8235; AVX1-NEXT:  LBB58_128: ## %else126
8236; AVX1-NEXT:    vzeroupper
8237; AVX1-NEXT:    retq
8238;
8239; AVX2-LABEL: test_mask_store_64xi8:
8240; AVX2:       ## BB#0:
8241; AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rax
8242; AVX2-NEXT:    testb $1, %dil
8243; AVX2-NEXT:    je LBB58_2
8244; AVX2-NEXT:  ## BB#1: ## %cond.store
8245; AVX2-NEXT:    vpextrb $0, %xmm0, (%rax)
8246; AVX2-NEXT:  LBB58_2: ## %else
8247; AVX2-NEXT:    testb $1, %sil
8248; AVX2-NEXT:    je LBB58_4
8249; AVX2-NEXT:  ## BB#3: ## %cond.store1
8250; AVX2-NEXT:    vpextrb $1, %xmm0, 1(%rax)
8251; AVX2-NEXT:  LBB58_4: ## %else2
8252; AVX2-NEXT:    testb $1, %dl
8253; AVX2-NEXT:    je LBB58_6
8254; AVX2-NEXT:  ## BB#5: ## %cond.store3
8255; AVX2-NEXT:    vpextrb $2, %xmm0, 2(%rax)
8256; AVX2-NEXT:  LBB58_6: ## %else4
8257; AVX2-NEXT:    testb $1, %cl
8258; AVX2-NEXT:    je LBB58_8
8259; AVX2-NEXT:  ## BB#7: ## %cond.store5
8260; AVX2-NEXT:    vpextrb $3, %xmm0, 3(%rax)
8261; AVX2-NEXT:  LBB58_8: ## %else6
8262; AVX2-NEXT:    testb $1, %r8b
8263; AVX2-NEXT:    je LBB58_10
8264; AVX2-NEXT:  ## BB#9: ## %cond.store7
8265; AVX2-NEXT:    vpextrb $4, %xmm0, 4(%rax)
8266; AVX2-NEXT:  LBB58_10: ## %else8
8267; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8268; AVX2-NEXT:    testb $1, %r9b
8269; AVX2-NEXT:    je LBB58_12
8270; AVX2-NEXT:  ## BB#11: ## %cond.store9
8271; AVX2-NEXT:    vpextrb $5, %xmm0, 5(%rax)
8272; AVX2-NEXT:  LBB58_12: ## %else10
8273; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8274; AVX2-NEXT:    testb $1, %cl
8275; AVX2-NEXT:    je LBB58_14
8276; AVX2-NEXT:  ## BB#13: ## %cond.store11
8277; AVX2-NEXT:    vpextrb $6, %xmm0, 6(%rax)
8278; AVX2-NEXT:  LBB58_14: ## %else12
8279; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8280; AVX2-NEXT:    testb $1, %dl
8281; AVX2-NEXT:    je LBB58_16
8282; AVX2-NEXT:  ## BB#15: ## %cond.store13
8283; AVX2-NEXT:    vpextrb $7, %xmm0, 7(%rax)
8284; AVX2-NEXT:  LBB58_16: ## %else14
8285; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8286; AVX2-NEXT:    testb $1, %cl
8287; AVX2-NEXT:    je LBB58_18
8288; AVX2-NEXT:  ## BB#17: ## %cond.store15
8289; AVX2-NEXT:    vpextrb $8, %xmm0, 8(%rax)
8290; AVX2-NEXT:  LBB58_18: ## %else16
8291; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8292; AVX2-NEXT:    testb $1, %dl
8293; AVX2-NEXT:    je LBB58_20
8294; AVX2-NEXT:  ## BB#19: ## %cond.store17
8295; AVX2-NEXT:    vpextrb $9, %xmm0, 9(%rax)
8296; AVX2-NEXT:  LBB58_20: ## %else18
8297; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8298; AVX2-NEXT:    testb $1, %cl
8299; AVX2-NEXT:    je LBB58_22
8300; AVX2-NEXT:  ## BB#21: ## %cond.store19
8301; AVX2-NEXT:    vpextrb $10, %xmm0, 10(%rax)
8302; AVX2-NEXT:  LBB58_22: ## %else20
8303; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8304; AVX2-NEXT:    testb $1, %dl
8305; AVX2-NEXT:    je LBB58_24
8306; AVX2-NEXT:  ## BB#23: ## %cond.store21
8307; AVX2-NEXT:    vpextrb $11, %xmm0, 11(%rax)
8308; AVX2-NEXT:  LBB58_24: ## %else22
8309; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8310; AVX2-NEXT:    testb $1, %cl
8311; AVX2-NEXT:    je LBB58_26
8312; AVX2-NEXT:  ## BB#25: ## %cond.store23
8313; AVX2-NEXT:    vpextrb $12, %xmm0, 12(%rax)
8314; AVX2-NEXT:  LBB58_26: ## %else24
8315; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8316; AVX2-NEXT:    testb $1, %dl
8317; AVX2-NEXT:    je LBB58_28
8318; AVX2-NEXT:  ## BB#27: ## %cond.store25
8319; AVX2-NEXT:    vpextrb $13, %xmm0, 13(%rax)
8320; AVX2-NEXT:  LBB58_28: ## %else26
8321; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8322; AVX2-NEXT:    testb $1, %cl
8323; AVX2-NEXT:    je LBB58_30
8324; AVX2-NEXT:  ## BB#29: ## %cond.store27
8325; AVX2-NEXT:    vpextrb $14, %xmm0, 14(%rax)
8326; AVX2-NEXT:  LBB58_30: ## %else28
8327; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8328; AVX2-NEXT:    testb $1, %dl
8329; AVX2-NEXT:    je LBB58_32
8330; AVX2-NEXT:  ## BB#31: ## %cond.store29
8331; AVX2-NEXT:    vpextrb $15, %xmm0, 15(%rax)
8332; AVX2-NEXT:  LBB58_32: ## %else30
8333; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8334; AVX2-NEXT:    testb $1, %cl
8335; AVX2-NEXT:    je LBB58_34
8336; AVX2-NEXT:  ## BB#33: ## %cond.store31
8337; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8338; AVX2-NEXT:    vpextrb $0, %xmm2, 16(%rax)
8339; AVX2-NEXT:  LBB58_34: ## %else32
8340; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8341; AVX2-NEXT:    testb $1, %dl
8342; AVX2-NEXT:    je LBB58_36
8343; AVX2-NEXT:  ## BB#35: ## %cond.store33
8344; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8345; AVX2-NEXT:    vpextrb $1, %xmm2, 17(%rax)
8346; AVX2-NEXT:  LBB58_36: ## %else34
8347; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8348; AVX2-NEXT:    testb $1, %cl
8349; AVX2-NEXT:    je LBB58_38
8350; AVX2-NEXT:  ## BB#37: ## %cond.store35
8351; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8352; AVX2-NEXT:    vpextrb $2, %xmm2, 18(%rax)
8353; AVX2-NEXT:  LBB58_38: ## %else36
8354; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8355; AVX2-NEXT:    testb $1, %dl
8356; AVX2-NEXT:    je LBB58_40
8357; AVX2-NEXT:  ## BB#39: ## %cond.store37
8358; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8359; AVX2-NEXT:    vpextrb $3, %xmm2, 19(%rax)
8360; AVX2-NEXT:  LBB58_40: ## %else38
8361; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8362; AVX2-NEXT:    testb $1, %cl
8363; AVX2-NEXT:    je LBB58_42
8364; AVX2-NEXT:  ## BB#41: ## %cond.store39
8365; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8366; AVX2-NEXT:    vpextrb $4, %xmm2, 20(%rax)
8367; AVX2-NEXT:  LBB58_42: ## %else40
8368; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8369; AVX2-NEXT:    testb $1, %dl
8370; AVX2-NEXT:    je LBB58_44
8371; AVX2-NEXT:  ## BB#43: ## %cond.store41
8372; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8373; AVX2-NEXT:    vpextrb $5, %xmm2, 21(%rax)
8374; AVX2-NEXT:  LBB58_44: ## %else42
8375; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8376; AVX2-NEXT:    testb $1, %cl
8377; AVX2-NEXT:    je LBB58_46
8378; AVX2-NEXT:  ## BB#45: ## %cond.store43
8379; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8380; AVX2-NEXT:    vpextrb $6, %xmm2, 22(%rax)
8381; AVX2-NEXT:  LBB58_46: ## %else44
8382; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8383; AVX2-NEXT:    testb $1, %dl
8384; AVX2-NEXT:    je LBB58_48
8385; AVX2-NEXT:  ## BB#47: ## %cond.store45
8386; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8387; AVX2-NEXT:    vpextrb $7, %xmm2, 23(%rax)
8388; AVX2-NEXT:  LBB58_48: ## %else46
8389; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8390; AVX2-NEXT:    testb $1, %cl
8391; AVX2-NEXT:    je LBB58_50
8392; AVX2-NEXT:  ## BB#49: ## %cond.store47
8393; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8394; AVX2-NEXT:    vpextrb $8, %xmm2, 24(%rax)
8395; AVX2-NEXT:  LBB58_50: ## %else48
8396; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8397; AVX2-NEXT:    testb $1, %dl
8398; AVX2-NEXT:    je LBB58_52
8399; AVX2-NEXT:  ## BB#51: ## %cond.store49
8400; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8401; AVX2-NEXT:    vpextrb $9, %xmm2, 25(%rax)
8402; AVX2-NEXT:  LBB58_52: ## %else50
8403; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8404; AVX2-NEXT:    testb $1, %cl
8405; AVX2-NEXT:    je LBB58_54
8406; AVX2-NEXT:  ## BB#53: ## %cond.store51
8407; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8408; AVX2-NEXT:    vpextrb $10, %xmm2, 26(%rax)
8409; AVX2-NEXT:  LBB58_54: ## %else52
8410; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8411; AVX2-NEXT:    testb $1, %dl
8412; AVX2-NEXT:    je LBB58_56
8413; AVX2-NEXT:  ## BB#55: ## %cond.store53
8414; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8415; AVX2-NEXT:    vpextrb $11, %xmm2, 27(%rax)
8416; AVX2-NEXT:  LBB58_56: ## %else54
8417; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8418; AVX2-NEXT:    testb $1, %cl
8419; AVX2-NEXT:    je LBB58_58
8420; AVX2-NEXT:  ## BB#57: ## %cond.store55
8421; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8422; AVX2-NEXT:    vpextrb $12, %xmm2, 28(%rax)
8423; AVX2-NEXT:  LBB58_58: ## %else56
8424; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8425; AVX2-NEXT:    testb $1, %dl
8426; AVX2-NEXT:    je LBB58_60
8427; AVX2-NEXT:  ## BB#59: ## %cond.store57
8428; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8429; AVX2-NEXT:    vpextrb $13, %xmm2, 29(%rax)
8430; AVX2-NEXT:  LBB58_60: ## %else58
8431; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8432; AVX2-NEXT:    testb $1, %cl
8433; AVX2-NEXT:    je LBB58_62
8434; AVX2-NEXT:  ## BB#61: ## %cond.store59
8435; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
8436; AVX2-NEXT:    vpextrb $14, %xmm2, 30(%rax)
8437; AVX2-NEXT:  LBB58_62: ## %else60
8438; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8439; AVX2-NEXT:    testb $1, %dl
8440; AVX2-NEXT:    je LBB58_64
8441; AVX2-NEXT:  ## BB#63: ## %cond.store61
8442; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
8443; AVX2-NEXT:    vpextrb $15, %xmm0, 31(%rax)
8444; AVX2-NEXT:  LBB58_64: ## %else62
8445; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8446; AVX2-NEXT:    testb $1, %cl
8447; AVX2-NEXT:    je LBB58_66
8448; AVX2-NEXT:  ## BB#65: ## %cond.store63
8449; AVX2-NEXT:    vpextrb $0, %xmm1, 32(%rax)
8450; AVX2-NEXT:  LBB58_66: ## %else64
8451; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8452; AVX2-NEXT:    testb $1, %dl
8453; AVX2-NEXT:    je LBB58_68
8454; AVX2-NEXT:  ## BB#67: ## %cond.store65
8455; AVX2-NEXT:    vpextrb $1, %xmm1, 33(%rax)
8456; AVX2-NEXT:  LBB58_68: ## %else66
8457; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8458; AVX2-NEXT:    testb $1, %cl
8459; AVX2-NEXT:    je LBB58_70
8460; AVX2-NEXT:  ## BB#69: ## %cond.store67
8461; AVX2-NEXT:    vpextrb $2, %xmm1, 34(%rax)
8462; AVX2-NEXT:  LBB58_70: ## %else68
8463; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8464; AVX2-NEXT:    testb $1, %dl
8465; AVX2-NEXT:    je LBB58_72
8466; AVX2-NEXT:  ## BB#71: ## %cond.store69
8467; AVX2-NEXT:    vpextrb $3, %xmm1, 35(%rax)
8468; AVX2-NEXT:  LBB58_72: ## %else70
8469; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8470; AVX2-NEXT:    testb $1, %cl
8471; AVX2-NEXT:    je LBB58_74
8472; AVX2-NEXT:  ## BB#73: ## %cond.store71
8473; AVX2-NEXT:    vpextrb $4, %xmm1, 36(%rax)
8474; AVX2-NEXT:  LBB58_74: ## %else72
8475; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8476; AVX2-NEXT:    testb $1, %dl
8477; AVX2-NEXT:    je LBB58_76
8478; AVX2-NEXT:  ## BB#75: ## %cond.store73
8479; AVX2-NEXT:    vpextrb $5, %xmm1, 37(%rax)
8480; AVX2-NEXT:  LBB58_76: ## %else74
8481; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8482; AVX2-NEXT:    testb $1, %cl
8483; AVX2-NEXT:    je LBB58_78
8484; AVX2-NEXT:  ## BB#77: ## %cond.store75
8485; AVX2-NEXT:    vpextrb $6, %xmm1, 38(%rax)
8486; AVX2-NEXT:  LBB58_78: ## %else76
8487; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8488; AVX2-NEXT:    testb $1, %dl
8489; AVX2-NEXT:    je LBB58_80
8490; AVX2-NEXT:  ## BB#79: ## %cond.store77
8491; AVX2-NEXT:    vpextrb $7, %xmm1, 39(%rax)
8492; AVX2-NEXT:  LBB58_80: ## %else78
8493; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8494; AVX2-NEXT:    testb $1, %cl
8495; AVX2-NEXT:    je LBB58_82
8496; AVX2-NEXT:  ## BB#81: ## %cond.store79
8497; AVX2-NEXT:    vpextrb $8, %xmm1, 40(%rax)
8498; AVX2-NEXT:  LBB58_82: ## %else80
8499; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8500; AVX2-NEXT:    testb $1, %dl
8501; AVX2-NEXT:    je LBB58_84
8502; AVX2-NEXT:  ## BB#83: ## %cond.store81
8503; AVX2-NEXT:    vpextrb $9, %xmm1, 41(%rax)
8504; AVX2-NEXT:  LBB58_84: ## %else82
8505; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8506; AVX2-NEXT:    testb $1, %cl
8507; AVX2-NEXT:    je LBB58_86
8508; AVX2-NEXT:  ## BB#85: ## %cond.store83
8509; AVX2-NEXT:    vpextrb $10, %xmm1, 42(%rax)
8510; AVX2-NEXT:  LBB58_86: ## %else84
8511; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8512; AVX2-NEXT:    testb $1, %dl
8513; AVX2-NEXT:    je LBB58_88
8514; AVX2-NEXT:  ## BB#87: ## %cond.store85
8515; AVX2-NEXT:    vpextrb $11, %xmm1, 43(%rax)
8516; AVX2-NEXT:  LBB58_88: ## %else86
8517; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8518; AVX2-NEXT:    testb $1, %cl
8519; AVX2-NEXT:    je LBB58_90
8520; AVX2-NEXT:  ## BB#89: ## %cond.store87
8521; AVX2-NEXT:    vpextrb $12, %xmm1, 44(%rax)
8522; AVX2-NEXT:  LBB58_90: ## %else88
8523; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8524; AVX2-NEXT:    testb $1, %dl
8525; AVX2-NEXT:    je LBB58_92
8526; AVX2-NEXT:  ## BB#91: ## %cond.store89
8527; AVX2-NEXT:    vpextrb $13, %xmm1, 45(%rax)
8528; AVX2-NEXT:  LBB58_92: ## %else90
8529; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8530; AVX2-NEXT:    testb $1, %cl
8531; AVX2-NEXT:    je LBB58_94
8532; AVX2-NEXT:  ## BB#93: ## %cond.store91
8533; AVX2-NEXT:    vpextrb $14, %xmm1, 46(%rax)
8534; AVX2-NEXT:  LBB58_94: ## %else92
8535; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8536; AVX2-NEXT:    testb $1, %dl
8537; AVX2-NEXT:    je LBB58_96
8538; AVX2-NEXT:  ## BB#95: ## %cond.store93
8539; AVX2-NEXT:    vpextrb $15, %xmm1, 47(%rax)
8540; AVX2-NEXT:  LBB58_96: ## %else94
8541; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8542; AVX2-NEXT:    testb $1, %cl
8543; AVX2-NEXT:    je LBB58_98
8544; AVX2-NEXT:  ## BB#97: ## %cond.store95
8545; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8546; AVX2-NEXT:    vpextrb $0, %xmm0, 48(%rax)
8547; AVX2-NEXT:  LBB58_98: ## %else96
8548; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8549; AVX2-NEXT:    testb $1, %dl
8550; AVX2-NEXT:    je LBB58_100
8551; AVX2-NEXT:  ## BB#99: ## %cond.store97
8552; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8553; AVX2-NEXT:    vpextrb $1, %xmm0, 49(%rax)
8554; AVX2-NEXT:  LBB58_100: ## %else98
8555; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8556; AVX2-NEXT:    testb $1, %cl
8557; AVX2-NEXT:    je LBB58_102
8558; AVX2-NEXT:  ## BB#101: ## %cond.store99
8559; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8560; AVX2-NEXT:    vpextrb $2, %xmm0, 50(%rax)
8561; AVX2-NEXT:  LBB58_102: ## %else100
8562; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8563; AVX2-NEXT:    testb $1, %dl
8564; AVX2-NEXT:    je LBB58_104
8565; AVX2-NEXT:  ## BB#103: ## %cond.store101
8566; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8567; AVX2-NEXT:    vpextrb $3, %xmm0, 51(%rax)
8568; AVX2-NEXT:  LBB58_104: ## %else102
8569; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8570; AVX2-NEXT:    testb $1, %cl
8571; AVX2-NEXT:    je LBB58_106
8572; AVX2-NEXT:  ## BB#105: ## %cond.store103
8573; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8574; AVX2-NEXT:    vpextrb $4, %xmm0, 52(%rax)
8575; AVX2-NEXT:  LBB58_106: ## %else104
8576; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8577; AVX2-NEXT:    testb $1, %dl
8578; AVX2-NEXT:    je LBB58_108
8579; AVX2-NEXT:  ## BB#107: ## %cond.store105
8580; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8581; AVX2-NEXT:    vpextrb $5, %xmm0, 53(%rax)
8582; AVX2-NEXT:  LBB58_108: ## %else106
8583; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8584; AVX2-NEXT:    testb $1, %cl
8585; AVX2-NEXT:    je LBB58_110
8586; AVX2-NEXT:  ## BB#109: ## %cond.store107
8587; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8588; AVX2-NEXT:    vpextrb $6, %xmm0, 54(%rax)
8589; AVX2-NEXT:  LBB58_110: ## %else108
8590; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8591; AVX2-NEXT:    testb $1, %dl
8592; AVX2-NEXT:    je LBB58_112
8593; AVX2-NEXT:  ## BB#111: ## %cond.store109
8594; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8595; AVX2-NEXT:    vpextrb $7, %xmm0, 55(%rax)
8596; AVX2-NEXT:  LBB58_112: ## %else110
8597; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8598; AVX2-NEXT:    testb $1, %cl
8599; AVX2-NEXT:    je LBB58_114
8600; AVX2-NEXT:  ## BB#113: ## %cond.store111
8601; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8602; AVX2-NEXT:    vpextrb $8, %xmm0, 56(%rax)
8603; AVX2-NEXT:  LBB58_114: ## %else112
8604; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8605; AVX2-NEXT:    testb $1, %dl
8606; AVX2-NEXT:    je LBB58_116
8607; AVX2-NEXT:  ## BB#115: ## %cond.store113
8608; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8609; AVX2-NEXT:    vpextrb $9, %xmm0, 57(%rax)
8610; AVX2-NEXT:  LBB58_116: ## %else114
8611; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8612; AVX2-NEXT:    testb $1, %cl
8613; AVX2-NEXT:    je LBB58_118
8614; AVX2-NEXT:  ## BB#117: ## %cond.store115
8615; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8616; AVX2-NEXT:    vpextrb $10, %xmm0, 58(%rax)
8617; AVX2-NEXT:  LBB58_118: ## %else116
8618; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8619; AVX2-NEXT:    testb $1, %dl
8620; AVX2-NEXT:    je LBB58_120
8621; AVX2-NEXT:  ## BB#119: ## %cond.store117
8622; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8623; AVX2-NEXT:    vpextrb $11, %xmm0, 59(%rax)
8624; AVX2-NEXT:  LBB58_120: ## %else118
8625; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8626; AVX2-NEXT:    testb $1, %cl
8627; AVX2-NEXT:    je LBB58_122
8628; AVX2-NEXT:  ## BB#121: ## %cond.store119
8629; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8630; AVX2-NEXT:    vpextrb $12, %xmm0, 60(%rax)
8631; AVX2-NEXT:  LBB58_122: ## %else120
8632; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %cl
8633; AVX2-NEXT:    testb $1, %dl
8634; AVX2-NEXT:    je LBB58_124
8635; AVX2-NEXT:  ## BB#123: ## %cond.store121
8636; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8637; AVX2-NEXT:    vpextrb $13, %xmm0, 61(%rax)
8638; AVX2-NEXT:  LBB58_124: ## %else122
8639; AVX2-NEXT:    movb {{[0-9]+}}(%rsp), %dl
8640; AVX2-NEXT:    testb $1, %cl
8641; AVX2-NEXT:    je LBB58_126
8642; AVX2-NEXT:  ## BB#125: ## %cond.store123
8643; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8644; AVX2-NEXT:    vpextrb $14, %xmm0, 62(%rax)
8645; AVX2-NEXT:  LBB58_126: ## %else124
8646; AVX2-NEXT:    testb $1, %dl
8647; AVX2-NEXT:    je LBB58_128
8648; AVX2-NEXT:  ## BB#127: ## %cond.store125
8649; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
8650; AVX2-NEXT:    vpextrb $15, %xmm0, 63(%rax)
8651; AVX2-NEXT:  LBB58_128: ## %else126
8652; AVX2-NEXT:    vzeroupper
8653; AVX2-NEXT:    retq
8654;
8655; AVX512F-LABEL: test_mask_store_64xi8:
8656; AVX512F:       ## BB#0:
8657; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
8658; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
8659; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
8660; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
8661; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8662; AVX512F-NEXT:    kmovw %k1, %eax
8663; AVX512F-NEXT:    testb %al, %al
8664; AVX512F-NEXT:    je LBB58_2
8665; AVX512F-NEXT:  ## BB#1: ## %cond.store
8666; AVX512F-NEXT:    vpextrb $0, %xmm4, (%rdi)
8667; AVX512F-NEXT:  LBB58_2: ## %else
8668; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
8669; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8670; AVX512F-NEXT:    kmovw %k1, %eax
8671; AVX512F-NEXT:    testb %al, %al
8672; AVX512F-NEXT:    je LBB58_4
8673; AVX512F-NEXT:  ## BB#3: ## %cond.store1
8674; AVX512F-NEXT:    vpextrb $1, %xmm4, 1(%rdi)
8675; AVX512F-NEXT:  LBB58_4: ## %else2
8676; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
8677; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8678; AVX512F-NEXT:    kmovw %k1, %eax
8679; AVX512F-NEXT:    testb %al, %al
8680; AVX512F-NEXT:    je LBB58_6
8681; AVX512F-NEXT:  ## BB#5: ## %cond.store3
8682; AVX512F-NEXT:    vpextrb $2, %xmm4, 2(%rdi)
8683; AVX512F-NEXT:  LBB58_6: ## %else4
8684; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
8685; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8686; AVX512F-NEXT:    kmovw %k1, %eax
8687; AVX512F-NEXT:    testb %al, %al
8688; AVX512F-NEXT:    je LBB58_8
8689; AVX512F-NEXT:  ## BB#7: ## %cond.store5
8690; AVX512F-NEXT:    vpextrb $3, %xmm4, 3(%rdi)
8691; AVX512F-NEXT:  LBB58_8: ## %else6
8692; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
8693; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8694; AVX512F-NEXT:    kmovw %k1, %eax
8695; AVX512F-NEXT:    testb %al, %al
8696; AVX512F-NEXT:    je LBB58_10
8697; AVX512F-NEXT:  ## BB#9: ## %cond.store7
8698; AVX512F-NEXT:    vpextrb $4, %xmm4, 4(%rdi)
8699; AVX512F-NEXT:  LBB58_10: ## %else8
8700; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
8701; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8702; AVX512F-NEXT:    kmovw %k1, %eax
8703; AVX512F-NEXT:    testb %al, %al
8704; AVX512F-NEXT:    je LBB58_12
8705; AVX512F-NEXT:  ## BB#11: ## %cond.store9
8706; AVX512F-NEXT:    vpextrb $5, %xmm4, 5(%rdi)
8707; AVX512F-NEXT:  LBB58_12: ## %else10
8708; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
8709; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8710; AVX512F-NEXT:    kmovw %k1, %eax
8711; AVX512F-NEXT:    testb %al, %al
8712; AVX512F-NEXT:    je LBB58_14
8713; AVX512F-NEXT:  ## BB#13: ## %cond.store11
8714; AVX512F-NEXT:    vpextrb $6, %xmm4, 6(%rdi)
8715; AVX512F-NEXT:  LBB58_14: ## %else12
8716; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
8717; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8718; AVX512F-NEXT:    kmovw %k1, %eax
8719; AVX512F-NEXT:    testb %al, %al
8720; AVX512F-NEXT:    je LBB58_16
8721; AVX512F-NEXT:  ## BB#15: ## %cond.store13
8722; AVX512F-NEXT:    vpextrb $7, %xmm4, 7(%rdi)
8723; AVX512F-NEXT:  LBB58_16: ## %else14
8724; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
8725; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8726; AVX512F-NEXT:    kmovw %k1, %eax
8727; AVX512F-NEXT:    testb %al, %al
8728; AVX512F-NEXT:    je LBB58_18
8729; AVX512F-NEXT:  ## BB#17: ## %cond.store15
8730; AVX512F-NEXT:    vpextrb $8, %xmm4, 8(%rdi)
8731; AVX512F-NEXT:  LBB58_18: ## %else16
8732; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
8733; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8734; AVX512F-NEXT:    kmovw %k1, %eax
8735; AVX512F-NEXT:    testb %al, %al
8736; AVX512F-NEXT:    je LBB58_20
8737; AVX512F-NEXT:  ## BB#19: ## %cond.store17
8738; AVX512F-NEXT:    vpextrb $9, %xmm4, 9(%rdi)
8739; AVX512F-NEXT:  LBB58_20: ## %else18
8740; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
8741; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8742; AVX512F-NEXT:    kmovw %k1, %eax
8743; AVX512F-NEXT:    testb %al, %al
8744; AVX512F-NEXT:    je LBB58_22
8745; AVX512F-NEXT:  ## BB#21: ## %cond.store19
8746; AVX512F-NEXT:    vpextrb $10, %xmm4, 10(%rdi)
8747; AVX512F-NEXT:  LBB58_22: ## %else20
8748; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
8749; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8750; AVX512F-NEXT:    kmovw %k1, %eax
8751; AVX512F-NEXT:    testb %al, %al
8752; AVX512F-NEXT:    je LBB58_24
8753; AVX512F-NEXT:  ## BB#23: ## %cond.store21
8754; AVX512F-NEXT:    vpextrb $11, %xmm4, 11(%rdi)
8755; AVX512F-NEXT:  LBB58_24: ## %else22
8756; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
8757; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8758; AVX512F-NEXT:    kmovw %k1, %eax
8759; AVX512F-NEXT:    testb %al, %al
8760; AVX512F-NEXT:    je LBB58_26
8761; AVX512F-NEXT:  ## BB#25: ## %cond.store23
8762; AVX512F-NEXT:    vpextrb $12, %xmm4, 12(%rdi)
8763; AVX512F-NEXT:  LBB58_26: ## %else24
8764; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm0
8765; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
8766; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8767; AVX512F-NEXT:    kmovw %k1, %eax
8768; AVX512F-NEXT:    testb %al, %al
8769; AVX512F-NEXT:    je LBB58_28
8770; AVX512F-NEXT:  ## BB#27: ## %cond.store25
8771; AVX512F-NEXT:    vpextrb $13, %xmm4, 13(%rdi)
8772; AVX512F-NEXT:  LBB58_28: ## %else26
8773; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
8774; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
8775; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8776; AVX512F-NEXT:    kmovw %k1, %eax
8777; AVX512F-NEXT:    testb %al, %al
8778; AVX512F-NEXT:    je LBB58_30
8779; AVX512F-NEXT:  ## BB#29: ## %cond.store27
8780; AVX512F-NEXT:    vpextrb $14, %xmm4, 14(%rdi)
8781; AVX512F-NEXT:  LBB58_30: ## %else28
8782; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
8783; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
8784; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8785; AVX512F-NEXT:    kmovw %k0, %eax
8786; AVX512F-NEXT:    testb %al, %al
8787; AVX512F-NEXT:    je LBB58_32
8788; AVX512F-NEXT:  ## BB#31: ## %cond.store29
8789; AVX512F-NEXT:    vpextrb $15, %xmm4, 15(%rdi)
8790; AVX512F-NEXT:  LBB58_32: ## %else30
8791; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
8792; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8793; AVX512F-NEXT:    kmovw %k0, %eax
8794; AVX512F-NEXT:    testb %al, %al
8795; AVX512F-NEXT:    je LBB58_34
8796; AVX512F-NEXT:  ## BB#33: ## %cond.store31
8797; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8798; AVX512F-NEXT:    vpextrb $0, %xmm0, 16(%rdi)
8799; AVX512F-NEXT:  LBB58_34: ## %else32
8800; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
8801; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8802; AVX512F-NEXT:    kmovw %k0, %eax
8803; AVX512F-NEXT:    testb %al, %al
8804; AVX512F-NEXT:    je LBB58_36
8805; AVX512F-NEXT:  ## BB#35: ## %cond.store33
8806; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8807; AVX512F-NEXT:    vpextrb $1, %xmm0, 17(%rdi)
8808; AVX512F-NEXT:  LBB58_36: ## %else34
8809; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
8810; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8811; AVX512F-NEXT:    kmovw %k0, %eax
8812; AVX512F-NEXT:    testb %al, %al
8813; AVX512F-NEXT:    je LBB58_38
8814; AVX512F-NEXT:  ## BB#37: ## %cond.store35
8815; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8816; AVX512F-NEXT:    vpextrb $2, %xmm0, 18(%rdi)
8817; AVX512F-NEXT:  LBB58_38: ## %else36
8818; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
8819; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8820; AVX512F-NEXT:    kmovw %k0, %eax
8821; AVX512F-NEXT:    testb %al, %al
8822; AVX512F-NEXT:    je LBB58_40
8823; AVX512F-NEXT:  ## BB#39: ## %cond.store37
8824; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8825; AVX512F-NEXT:    vpextrb $3, %xmm0, 19(%rdi)
8826; AVX512F-NEXT:  LBB58_40: ## %else38
8827; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
8828; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8829; AVX512F-NEXT:    kmovw %k0, %eax
8830; AVX512F-NEXT:    testb %al, %al
8831; AVX512F-NEXT:    je LBB58_42
8832; AVX512F-NEXT:  ## BB#41: ## %cond.store39
8833; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8834; AVX512F-NEXT:    vpextrb $4, %xmm0, 20(%rdi)
8835; AVX512F-NEXT:  LBB58_42: ## %else40
8836; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
8837; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8838; AVX512F-NEXT:    kmovw %k0, %eax
8839; AVX512F-NEXT:    testb %al, %al
8840; AVX512F-NEXT:    je LBB58_44
8841; AVX512F-NEXT:  ## BB#43: ## %cond.store41
8842; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8843; AVX512F-NEXT:    vpextrb $5, %xmm0, 21(%rdi)
8844; AVX512F-NEXT:  LBB58_44: ## %else42
8845; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
8846; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8847; AVX512F-NEXT:    kmovw %k0, %eax
8848; AVX512F-NEXT:    testb %al, %al
8849; AVX512F-NEXT:    je LBB58_46
8850; AVX512F-NEXT:  ## BB#45: ## %cond.store43
8851; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8852; AVX512F-NEXT:    vpextrb $6, %xmm0, 22(%rdi)
8853; AVX512F-NEXT:  LBB58_46: ## %else44
8854; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
8855; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8856; AVX512F-NEXT:    kmovw %k0, %eax
8857; AVX512F-NEXT:    testb %al, %al
8858; AVX512F-NEXT:    je LBB58_48
8859; AVX512F-NEXT:  ## BB#47: ## %cond.store45
8860; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8861; AVX512F-NEXT:    vpextrb $7, %xmm0, 23(%rdi)
8862; AVX512F-NEXT:  LBB58_48: ## %else46
8863; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
8864; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8865; AVX512F-NEXT:    kmovw %k0, %eax
8866; AVX512F-NEXT:    testb %al, %al
8867; AVX512F-NEXT:    je LBB58_50
8868; AVX512F-NEXT:  ## BB#49: ## %cond.store47
8869; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8870; AVX512F-NEXT:    vpextrb $8, %xmm0, 24(%rdi)
8871; AVX512F-NEXT:  LBB58_50: ## %else48
8872; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
8873; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8874; AVX512F-NEXT:    kmovw %k0, %eax
8875; AVX512F-NEXT:    testb %al, %al
8876; AVX512F-NEXT:    je LBB58_52
8877; AVX512F-NEXT:  ## BB#51: ## %cond.store49
8878; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8879; AVX512F-NEXT:    vpextrb $9, %xmm0, 25(%rdi)
8880; AVX512F-NEXT:  LBB58_52: ## %else50
8881; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
8882; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8883; AVX512F-NEXT:    kmovw %k0, %eax
8884; AVX512F-NEXT:    testb %al, %al
8885; AVX512F-NEXT:    je LBB58_54
8886; AVX512F-NEXT:  ## BB#53: ## %cond.store51
8887; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8888; AVX512F-NEXT:    vpextrb $10, %xmm0, 26(%rdi)
8889; AVX512F-NEXT:  LBB58_54: ## %else52
8890; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
8891; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8892; AVX512F-NEXT:    kmovw %k0, %eax
8893; AVX512F-NEXT:    testb %al, %al
8894; AVX512F-NEXT:    je LBB58_56
8895; AVX512F-NEXT:  ## BB#55: ## %cond.store53
8896; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8897; AVX512F-NEXT:    vpextrb $11, %xmm0, 27(%rdi)
8898; AVX512F-NEXT:  LBB58_56: ## %else54
8899; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
8900; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8901; AVX512F-NEXT:    kmovw %k0, %eax
8902; AVX512F-NEXT:    testb %al, %al
8903; AVX512F-NEXT:    je LBB58_58
8904; AVX512F-NEXT:  ## BB#57: ## %cond.store55
8905; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8906; AVX512F-NEXT:    vpextrb $12, %xmm0, 28(%rdi)
8907; AVX512F-NEXT:  LBB58_58: ## %else56
8908; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm0
8909; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
8910; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8911; AVX512F-NEXT:    kmovw %k0, %eax
8912; AVX512F-NEXT:    testb %al, %al
8913; AVX512F-NEXT:    je LBB58_60
8914; AVX512F-NEXT:  ## BB#59: ## %cond.store57
8915; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
8916; AVX512F-NEXT:    vpextrb $13, %xmm1, 29(%rdi)
8917; AVX512F-NEXT:  LBB58_60: ## %else58
8918; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
8919; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
8920; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
8921; AVX512F-NEXT:    kmovw %k0, %eax
8922; AVX512F-NEXT:    testb %al, %al
8923; AVX512F-NEXT:    je LBB58_62
8924; AVX512F-NEXT:  ## BB#61: ## %cond.store59
8925; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
8926; AVX512F-NEXT:    vpextrb $14, %xmm1, 30(%rdi)
8927; AVX512F-NEXT:  LBB58_62: ## %else60
8928; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
8929; AVX512F-NEXT:    kshiftlw $0, %k1, %k1
8930; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8931; AVX512F-NEXT:    kmovw %k1, %eax
8932; AVX512F-NEXT:    testb %al, %al
8933; AVX512F-NEXT:    je LBB58_64
8934; AVX512F-NEXT:  ## BB#63: ## %cond.store61
8935; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
8936; AVX512F-NEXT:    vpextrb $15, %xmm0, 31(%rdi)
8937; AVX512F-NEXT:  LBB58_64: ## %else62
8938; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
8939; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8940; AVX512F-NEXT:    kmovw %k1, %eax
8941; AVX512F-NEXT:    testb %al, %al
8942; AVX512F-NEXT:    je LBB58_66
8943; AVX512F-NEXT:  ## BB#65: ## %cond.store63
8944; AVX512F-NEXT:    vpextrb $0, %xmm5, 32(%rdi)
8945; AVX512F-NEXT:  LBB58_66: ## %else64
8946; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
8947; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8948; AVX512F-NEXT:    kmovw %k1, %eax
8949; AVX512F-NEXT:    testb %al, %al
8950; AVX512F-NEXT:    je LBB58_68
8951; AVX512F-NEXT:  ## BB#67: ## %cond.store65
8952; AVX512F-NEXT:    vpextrb $1, %xmm5, 33(%rdi)
8953; AVX512F-NEXT:  LBB58_68: ## %else66
8954; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
8955; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8956; AVX512F-NEXT:    kmovw %k1, %eax
8957; AVX512F-NEXT:    testb %al, %al
8958; AVX512F-NEXT:    je LBB58_70
8959; AVX512F-NEXT:  ## BB#69: ## %cond.store67
8960; AVX512F-NEXT:    vpextrb $2, %xmm5, 34(%rdi)
8961; AVX512F-NEXT:  LBB58_70: ## %else68
8962; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
8963; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8964; AVX512F-NEXT:    kmovw %k1, %eax
8965; AVX512F-NEXT:    testb %al, %al
8966; AVX512F-NEXT:    je LBB58_72
8967; AVX512F-NEXT:  ## BB#71: ## %cond.store69
8968; AVX512F-NEXT:    vpextrb $3, %xmm5, 35(%rdi)
8969; AVX512F-NEXT:  LBB58_72: ## %else70
8970; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
8971; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8972; AVX512F-NEXT:    kmovw %k1, %eax
8973; AVX512F-NEXT:    testb %al, %al
8974; AVX512F-NEXT:    je LBB58_74
8975; AVX512F-NEXT:  ## BB#73: ## %cond.store71
8976; AVX512F-NEXT:    vpextrb $4, %xmm5, 36(%rdi)
8977; AVX512F-NEXT:  LBB58_74: ## %else72
8978; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
8979; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8980; AVX512F-NEXT:    kmovw %k1, %eax
8981; AVX512F-NEXT:    testb %al, %al
8982; AVX512F-NEXT:    je LBB58_76
8983; AVX512F-NEXT:  ## BB#75: ## %cond.store73
8984; AVX512F-NEXT:    vpextrb $5, %xmm5, 37(%rdi)
8985; AVX512F-NEXT:  LBB58_76: ## %else74
8986; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
8987; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8988; AVX512F-NEXT:    kmovw %k1, %eax
8989; AVX512F-NEXT:    testb %al, %al
8990; AVX512F-NEXT:    je LBB58_78
8991; AVX512F-NEXT:  ## BB#77: ## %cond.store75
8992; AVX512F-NEXT:    vpextrb $6, %xmm5, 38(%rdi)
8993; AVX512F-NEXT:  LBB58_78: ## %else76
8994; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
8995; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
8996; AVX512F-NEXT:    kmovw %k1, %eax
8997; AVX512F-NEXT:    testb %al, %al
8998; AVX512F-NEXT:    je LBB58_80
8999; AVX512F-NEXT:  ## BB#79: ## %cond.store77
9000; AVX512F-NEXT:    vpextrb $7, %xmm5, 39(%rdi)
9001; AVX512F-NEXT:  LBB58_80: ## %else78
9002; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
9003; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9004; AVX512F-NEXT:    kmovw %k1, %eax
9005; AVX512F-NEXT:    testb %al, %al
9006; AVX512F-NEXT:    je LBB58_82
9007; AVX512F-NEXT:  ## BB#81: ## %cond.store79
9008; AVX512F-NEXT:    vpextrb $8, %xmm5, 40(%rdi)
9009; AVX512F-NEXT:  LBB58_82: ## %else80
9010; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
9011; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9012; AVX512F-NEXT:    kmovw %k1, %eax
9013; AVX512F-NEXT:    testb %al, %al
9014; AVX512F-NEXT:    je LBB58_84
9015; AVX512F-NEXT:  ## BB#83: ## %cond.store81
9016; AVX512F-NEXT:    vpextrb $9, %xmm5, 41(%rdi)
9017; AVX512F-NEXT:  LBB58_84: ## %else82
9018; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
9019; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9020; AVX512F-NEXT:    kmovw %k1, %eax
9021; AVX512F-NEXT:    testb %al, %al
9022; AVX512F-NEXT:    je LBB58_86
9023; AVX512F-NEXT:  ## BB#85: ## %cond.store83
9024; AVX512F-NEXT:    vpextrb $10, %xmm5, 42(%rdi)
9025; AVX512F-NEXT:  LBB58_86: ## %else84
9026; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
9027; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9028; AVX512F-NEXT:    kmovw %k1, %eax
9029; AVX512F-NEXT:    testb %al, %al
9030; AVX512F-NEXT:    je LBB58_88
9031; AVX512F-NEXT:  ## BB#87: ## %cond.store85
9032; AVX512F-NEXT:    vpextrb $11, %xmm5, 43(%rdi)
9033; AVX512F-NEXT:  LBB58_88: ## %else86
9034; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
9035; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9036; AVX512F-NEXT:    kmovw %k1, %eax
9037; AVX512F-NEXT:    testb %al, %al
9038; AVX512F-NEXT:    je LBB58_90
9039; AVX512F-NEXT:  ## BB#89: ## %cond.store87
9040; AVX512F-NEXT:    vpextrb $12, %xmm5, 44(%rdi)
9041; AVX512F-NEXT:  LBB58_90: ## %else88
9042; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm0
9043; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
9044; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9045; AVX512F-NEXT:    kmovw %k1, %eax
9046; AVX512F-NEXT:    testb %al, %al
9047; AVX512F-NEXT:    je LBB58_92
9048; AVX512F-NEXT:  ## BB#91: ## %cond.store89
9049; AVX512F-NEXT:    vpextrb $13, %xmm5, 45(%rdi)
9050; AVX512F-NEXT:  LBB58_92: ## %else90
9051; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
9052; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
9053; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9054; AVX512F-NEXT:    kmovw %k1, %eax
9055; AVX512F-NEXT:    testb %al, %al
9056; AVX512F-NEXT:    je LBB58_94
9057; AVX512F-NEXT:  ## BB#93: ## %cond.store91
9058; AVX512F-NEXT:    vpextrb $14, %xmm5, 46(%rdi)
9059; AVX512F-NEXT:  LBB58_94: ## %else92
9060; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
9061; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
9062; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9063; AVX512F-NEXT:    kmovw %k0, %eax
9064; AVX512F-NEXT:    testb %al, %al
9065; AVX512F-NEXT:    je LBB58_96
9066; AVX512F-NEXT:  ## BB#95: ## %cond.store93
9067; AVX512F-NEXT:    vpextrb $15, %xmm5, 47(%rdi)
9068; AVX512F-NEXT:  LBB58_96: ## %else94
9069; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
9070; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9071; AVX512F-NEXT:    kmovw %k0, %eax
9072; AVX512F-NEXT:    testb %al, %al
9073; AVX512F-NEXT:    je LBB58_98
9074; AVX512F-NEXT:  ## BB#97: ## %cond.store95
9075; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9076; AVX512F-NEXT:    vpextrb $0, %xmm0, 48(%rdi)
9077; AVX512F-NEXT:  LBB58_98: ## %else96
9078; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
9079; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9080; AVX512F-NEXT:    kmovw %k0, %eax
9081; AVX512F-NEXT:    testb %al, %al
9082; AVX512F-NEXT:    je LBB58_100
9083; AVX512F-NEXT:  ## BB#99: ## %cond.store97
9084; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9085; AVX512F-NEXT:    vpextrb $1, %xmm0, 49(%rdi)
9086; AVX512F-NEXT:  LBB58_100: ## %else98
9087; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
9088; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9089; AVX512F-NEXT:    kmovw %k0, %eax
9090; AVX512F-NEXT:    testb %al, %al
9091; AVX512F-NEXT:    je LBB58_102
9092; AVX512F-NEXT:  ## BB#101: ## %cond.store99
9093; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9094; AVX512F-NEXT:    vpextrb $2, %xmm0, 50(%rdi)
9095; AVX512F-NEXT:  LBB58_102: ## %else100
9096; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
9097; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9098; AVX512F-NEXT:    kmovw %k0, %eax
9099; AVX512F-NEXT:    testb %al, %al
9100; AVX512F-NEXT:    je LBB58_104
9101; AVX512F-NEXT:  ## BB#103: ## %cond.store101
9102; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9103; AVX512F-NEXT:    vpextrb $3, %xmm0, 51(%rdi)
9104; AVX512F-NEXT:  LBB58_104: ## %else102
9105; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
9106; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9107; AVX512F-NEXT:    kmovw %k0, %eax
9108; AVX512F-NEXT:    testb %al, %al
9109; AVX512F-NEXT:    je LBB58_106
9110; AVX512F-NEXT:  ## BB#105: ## %cond.store103
9111; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9112; AVX512F-NEXT:    vpextrb $4, %xmm0, 52(%rdi)
9113; AVX512F-NEXT:  LBB58_106: ## %else104
9114; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
9115; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9116; AVX512F-NEXT:    kmovw %k0, %eax
9117; AVX512F-NEXT:    testb %al, %al
9118; AVX512F-NEXT:    je LBB58_108
9119; AVX512F-NEXT:  ## BB#107: ## %cond.store105
9120; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9121; AVX512F-NEXT:    vpextrb $5, %xmm0, 53(%rdi)
9122; AVX512F-NEXT:  LBB58_108: ## %else106
9123; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
9124; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9125; AVX512F-NEXT:    kmovw %k0, %eax
9126; AVX512F-NEXT:    testb %al, %al
9127; AVX512F-NEXT:    je LBB58_110
9128; AVX512F-NEXT:  ## BB#109: ## %cond.store107
9129; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9130; AVX512F-NEXT:    vpextrb $6, %xmm0, 54(%rdi)
9131; AVX512F-NEXT:  LBB58_110: ## %else108
9132; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
9133; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9134; AVX512F-NEXT:    kmovw %k0, %eax
9135; AVX512F-NEXT:    testb %al, %al
9136; AVX512F-NEXT:    je LBB58_112
9137; AVX512F-NEXT:  ## BB#111: ## %cond.store109
9138; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9139; AVX512F-NEXT:    vpextrb $7, %xmm0, 55(%rdi)
9140; AVX512F-NEXT:  LBB58_112: ## %else110
9141; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
9142; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9143; AVX512F-NEXT:    kmovw %k0, %eax
9144; AVX512F-NEXT:    testb %al, %al
9145; AVX512F-NEXT:    je LBB58_114
9146; AVX512F-NEXT:  ## BB#113: ## %cond.store111
9147; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9148; AVX512F-NEXT:    vpextrb $8, %xmm0, 56(%rdi)
9149; AVX512F-NEXT:  LBB58_114: ## %else112
9150; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
9151; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9152; AVX512F-NEXT:    kmovw %k0, %eax
9153; AVX512F-NEXT:    testb %al, %al
9154; AVX512F-NEXT:    je LBB58_116
9155; AVX512F-NEXT:  ## BB#115: ## %cond.store113
9156; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9157; AVX512F-NEXT:    vpextrb $9, %xmm0, 57(%rdi)
9158; AVX512F-NEXT:  LBB58_116: ## %else114
9159; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
9160; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9161; AVX512F-NEXT:    kmovw %k0, %eax
9162; AVX512F-NEXT:    testb %al, %al
9163; AVX512F-NEXT:    je LBB58_118
9164; AVX512F-NEXT:  ## BB#117: ## %cond.store115
9165; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9166; AVX512F-NEXT:    vpextrb $10, %xmm0, 58(%rdi)
9167; AVX512F-NEXT:  LBB58_118: ## %else116
9168; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
9169; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9170; AVX512F-NEXT:    kmovw %k0, %eax
9171; AVX512F-NEXT:    testb %al, %al
9172; AVX512F-NEXT:    je LBB58_120
9173; AVX512F-NEXT:  ## BB#119: ## %cond.store117
9174; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9175; AVX512F-NEXT:    vpextrb $11, %xmm0, 59(%rdi)
9176; AVX512F-NEXT:  LBB58_120: ## %else118
9177; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
9178; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9179; AVX512F-NEXT:    kmovw %k0, %eax
9180; AVX512F-NEXT:    testb %al, %al
9181; AVX512F-NEXT:    je LBB58_122
9182; AVX512F-NEXT:  ## BB#121: ## %cond.store119
9183; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9184; AVX512F-NEXT:    vpextrb $12, %xmm0, 60(%rdi)
9185; AVX512F-NEXT:  LBB58_122: ## %else120
9186; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
9187; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9188; AVX512F-NEXT:    kmovw %k0, %eax
9189; AVX512F-NEXT:    testb %al, %al
9190; AVX512F-NEXT:    je LBB58_124
9191; AVX512F-NEXT:  ## BB#123: ## %cond.store121
9192; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9193; AVX512F-NEXT:    vpextrb $13, %xmm0, 61(%rdi)
9194; AVX512F-NEXT:  LBB58_124: ## %else122
9195; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
9196; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9197; AVX512F-NEXT:    kmovw %k0, %eax
9198; AVX512F-NEXT:    testb %al, %al
9199; AVX512F-NEXT:    je LBB58_126
9200; AVX512F-NEXT:  ## BB#125: ## %cond.store123
9201; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9202; AVX512F-NEXT:    vpextrb $14, %xmm0, 62(%rdi)
9203; AVX512F-NEXT:  LBB58_126: ## %else124
9204; AVX512F-NEXT:    kshiftlw $0, %k1, %k0
9205; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9206; AVX512F-NEXT:    kmovw %k0, %eax
9207; AVX512F-NEXT:    testb %al, %al
9208; AVX512F-NEXT:    je LBB58_128
9209; AVX512F-NEXT:  ## BB#127: ## %cond.store125
9210; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
9211; AVX512F-NEXT:    vpextrb $15, %xmm0, 63(%rdi)
9212; AVX512F-NEXT:  LBB58_128: ## %else126
9213; AVX512F-NEXT:    retq
9214;
9215; SKX-LABEL: test_mask_store_64xi8:
9216; SKX:       ## BB#0:
9217; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
9218; SKX-NEXT:    vpmovb2m %zmm0, %k1
9219; SKX-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
9220; SKX-NEXT:    retq
9221  call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> %val, <64 x i8>* %addr, i32 4, <64 x i1>%mask)
9222  ret void
9223}
9224declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>)
9225
9226define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
9227; AVX-LABEL: test_mask_store_8xi16:
9228; AVX:       ## BB#0:
9229; AVX-NEXT:    vpextrb $0, %xmm0, %eax
9230; AVX-NEXT:    testb $1, %al
9231; AVX-NEXT:    je LBB59_2
9232; AVX-NEXT:  ## BB#1: ## %cond.store
9233; AVX-NEXT:    vmovd %xmm1, %eax
9234; AVX-NEXT:    movw %ax, (%rdi)
9235; AVX-NEXT:  LBB59_2: ## %else
9236; AVX-NEXT:    vpextrb $2, %xmm0, %eax
9237; AVX-NEXT:    testb $1, %al
9238; AVX-NEXT:    je LBB59_4
9239; AVX-NEXT:  ## BB#3: ## %cond.store1
9240; AVX-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9241; AVX-NEXT:  LBB59_4: ## %else2
9242; AVX-NEXT:    vpextrb $4, %xmm0, %eax
9243; AVX-NEXT:    testb $1, %al
9244; AVX-NEXT:    je LBB59_6
9245; AVX-NEXT:  ## BB#5: ## %cond.store3
9246; AVX-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9247; AVX-NEXT:  LBB59_6: ## %else4
9248; AVX-NEXT:    vpextrb $6, %xmm0, %eax
9249; AVX-NEXT:    testb $1, %al
9250; AVX-NEXT:    je LBB59_8
9251; AVX-NEXT:  ## BB#7: ## %cond.store5
9252; AVX-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9253; AVX-NEXT:  LBB59_8: ## %else6
9254; AVX-NEXT:    vpextrb $8, %xmm0, %eax
9255; AVX-NEXT:    testb $1, %al
9256; AVX-NEXT:    je LBB59_10
9257; AVX-NEXT:  ## BB#9: ## %cond.store7
9258; AVX-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9259; AVX-NEXT:  LBB59_10: ## %else8
9260; AVX-NEXT:    vpextrb $10, %xmm0, %eax
9261; AVX-NEXT:    testb $1, %al
9262; AVX-NEXT:    je LBB59_12
9263; AVX-NEXT:  ## BB#11: ## %cond.store9
9264; AVX-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9265; AVX-NEXT:  LBB59_12: ## %else10
9266; AVX-NEXT:    vpextrb $12, %xmm0, %eax
9267; AVX-NEXT:    testb $1, %al
9268; AVX-NEXT:    je LBB59_14
9269; AVX-NEXT:  ## BB#13: ## %cond.store11
9270; AVX-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9271; AVX-NEXT:  LBB59_14: ## %else12
9272; AVX-NEXT:    vpextrb $14, %xmm0, %eax
9273; AVX-NEXT:    testb $1, %al
9274; AVX-NEXT:    je LBB59_16
9275; AVX-NEXT:  ## BB#15: ## %cond.store13
9276; AVX-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9277; AVX-NEXT:  LBB59_16: ## %else14
9278; AVX-NEXT:    retq
9279;
9280; AVX512F-LABEL: test_mask_store_8xi16:
9281; AVX512F:       ## BB#0:
9282; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
9283; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
9284; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
9285; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
9286; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9287; AVX512F-NEXT:    kmovw %k1, %eax
9288; AVX512F-NEXT:    testb %al, %al
9289; AVX512F-NEXT:    je LBB59_2
9290; AVX512F-NEXT:  ## BB#1: ## %cond.store
9291; AVX512F-NEXT:    vmovd %xmm1, %eax
9292; AVX512F-NEXT:    movw %ax, (%rdi)
9293; AVX512F-NEXT:  LBB59_2: ## %else
9294; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
9295; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9296; AVX512F-NEXT:    kmovw %k1, %eax
9297; AVX512F-NEXT:    testb %al, %al
9298; AVX512F-NEXT:    je LBB59_4
9299; AVX512F-NEXT:  ## BB#3: ## %cond.store1
9300; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9301; AVX512F-NEXT:  LBB59_4: ## %else2
9302; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
9303; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9304; AVX512F-NEXT:    kmovw %k1, %eax
9305; AVX512F-NEXT:    testb %al, %al
9306; AVX512F-NEXT:    je LBB59_6
9307; AVX512F-NEXT:  ## BB#5: ## %cond.store3
9308; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9309; AVX512F-NEXT:  LBB59_6: ## %else4
9310; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
9311; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9312; AVX512F-NEXT:    kmovw %k1, %eax
9313; AVX512F-NEXT:    testb %al, %al
9314; AVX512F-NEXT:    je LBB59_8
9315; AVX512F-NEXT:  ## BB#7: ## %cond.store5
9316; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9317; AVX512F-NEXT:  LBB59_8: ## %else6
9318; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
9319; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9320; AVX512F-NEXT:    kmovw %k1, %eax
9321; AVX512F-NEXT:    testb %al, %al
9322; AVX512F-NEXT:    je LBB59_10
9323; AVX512F-NEXT:  ## BB#9: ## %cond.store7
9324; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9325; AVX512F-NEXT:  LBB59_10: ## %else8
9326; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
9327; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9328; AVX512F-NEXT:    kmovw %k1, %eax
9329; AVX512F-NEXT:    testb %al, %al
9330; AVX512F-NEXT:    je LBB59_12
9331; AVX512F-NEXT:  ## BB#11: ## %cond.store9
9332; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9333; AVX512F-NEXT:  LBB59_12: ## %else10
9334; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
9335; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9336; AVX512F-NEXT:    kmovw %k1, %eax
9337; AVX512F-NEXT:    testb %al, %al
9338; AVX512F-NEXT:    je LBB59_14
9339; AVX512F-NEXT:  ## BB#13: ## %cond.store11
9340; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9341; AVX512F-NEXT:  LBB59_14: ## %else12
9342; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
9343; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9344; AVX512F-NEXT:    kmovw %k0, %eax
9345; AVX512F-NEXT:    testb %al, %al
9346; AVX512F-NEXT:    je LBB59_16
9347; AVX512F-NEXT:  ## BB#15: ## %cond.store13
9348; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9349; AVX512F-NEXT:  LBB59_16: ## %else14
9350; AVX512F-NEXT:    retq
9351;
9352; SKX-LABEL: test_mask_store_8xi16:
9353; SKX:       ## BB#0:
9354; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
9355; SKX-NEXT:    vpmovw2m %xmm0, %k1
9356; SKX-NEXT:    vmovdqu16 %xmm1, (%rdi) {%k1}
9357; SKX-NEXT:    retq
9358  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %addr, i32 4, <8 x i1>%mask)
9359  ret void
9360}
9361declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
9362
9363define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
9364; AVX1-LABEL: test_mask_store_16xi16:
9365; AVX1:       ## BB#0:
9366; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
9367; AVX1-NEXT:    testb $1, %al
9368; AVX1-NEXT:    je LBB60_2
9369; AVX1-NEXT:  ## BB#1: ## %cond.store
9370; AVX1-NEXT:    vmovd %xmm1, %eax
9371; AVX1-NEXT:    movw %ax, (%rdi)
9372; AVX1-NEXT:  LBB60_2: ## %else
9373; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
9374; AVX1-NEXT:    testb $1, %al
9375; AVX1-NEXT:    je LBB60_4
9376; AVX1-NEXT:  ## BB#3: ## %cond.store1
9377; AVX1-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9378; AVX1-NEXT:  LBB60_4: ## %else2
9379; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
9380; AVX1-NEXT:    testb $1, %al
9381; AVX1-NEXT:    je LBB60_6
9382; AVX1-NEXT:  ## BB#5: ## %cond.store3
9383; AVX1-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9384; AVX1-NEXT:  LBB60_6: ## %else4
9385; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
9386; AVX1-NEXT:    testb $1, %al
9387; AVX1-NEXT:    je LBB60_8
9388; AVX1-NEXT:  ## BB#7: ## %cond.store5
9389; AVX1-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9390; AVX1-NEXT:  LBB60_8: ## %else6
9391; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
9392; AVX1-NEXT:    testb $1, %al
9393; AVX1-NEXT:    je LBB60_10
9394; AVX1-NEXT:  ## BB#9: ## %cond.store7
9395; AVX1-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9396; AVX1-NEXT:  LBB60_10: ## %else8
9397; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
9398; AVX1-NEXT:    testb $1, %al
9399; AVX1-NEXT:    je LBB60_12
9400; AVX1-NEXT:  ## BB#11: ## %cond.store9
9401; AVX1-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9402; AVX1-NEXT:  LBB60_12: ## %else10
9403; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
9404; AVX1-NEXT:    testb $1, %al
9405; AVX1-NEXT:    je LBB60_14
9406; AVX1-NEXT:  ## BB#13: ## %cond.store11
9407; AVX1-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9408; AVX1-NEXT:  LBB60_14: ## %else12
9409; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
9410; AVX1-NEXT:    testb $1, %al
9411; AVX1-NEXT:    je LBB60_16
9412; AVX1-NEXT:  ## BB#15: ## %cond.store13
9413; AVX1-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9414; AVX1-NEXT:  LBB60_16: ## %else14
9415; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
9416; AVX1-NEXT:    testb $1, %al
9417; AVX1-NEXT:    je LBB60_18
9418; AVX1-NEXT:  ## BB#17: ## %cond.store15
9419; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9420; AVX1-NEXT:    vmovd %xmm2, %eax
9421; AVX1-NEXT:    movw %ax, 16(%rdi)
9422; AVX1-NEXT:  LBB60_18: ## %else16
9423; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
9424; AVX1-NEXT:    testb $1, %al
9425; AVX1-NEXT:    je LBB60_20
9426; AVX1-NEXT:  ## BB#19: ## %cond.store17
9427; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9428; AVX1-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
9429; AVX1-NEXT:  LBB60_20: ## %else18
9430; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
9431; AVX1-NEXT:    testb $1, %al
9432; AVX1-NEXT:    je LBB60_22
9433; AVX1-NEXT:  ## BB#21: ## %cond.store19
9434; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9435; AVX1-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
9436; AVX1-NEXT:  LBB60_22: ## %else20
9437; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
9438; AVX1-NEXT:    testb $1, %al
9439; AVX1-NEXT:    je LBB60_24
9440; AVX1-NEXT:  ## BB#23: ## %cond.store21
9441; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9442; AVX1-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
9443; AVX1-NEXT:  LBB60_24: ## %else22
9444; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
9445; AVX1-NEXT:    testb $1, %al
9446; AVX1-NEXT:    je LBB60_26
9447; AVX1-NEXT:  ## BB#25: ## %cond.store23
9448; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9449; AVX1-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
9450; AVX1-NEXT:  LBB60_26: ## %else24
9451; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
9452; AVX1-NEXT:    testb $1, %al
9453; AVX1-NEXT:    je LBB60_28
9454; AVX1-NEXT:  ## BB#27: ## %cond.store25
9455; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9456; AVX1-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
9457; AVX1-NEXT:  LBB60_28: ## %else26
9458; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
9459; AVX1-NEXT:    testb $1, %al
9460; AVX1-NEXT:    je LBB60_30
9461; AVX1-NEXT:  ## BB#29: ## %cond.store27
9462; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
9463; AVX1-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
9464; AVX1-NEXT:  LBB60_30: ## %else28
9465; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
9466; AVX1-NEXT:    testb $1, %al
9467; AVX1-NEXT:    je LBB60_32
9468; AVX1-NEXT:  ## BB#31: ## %cond.store29
9469; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
9470; AVX1-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
9471; AVX1-NEXT:  LBB60_32: ## %else30
9472; AVX1-NEXT:    vzeroupper
9473; AVX1-NEXT:    retq
9474;
9475; AVX2-LABEL: test_mask_store_16xi16:
9476; AVX2:       ## BB#0:
9477; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
9478; AVX2-NEXT:    testb $1, %al
9479; AVX2-NEXT:    je LBB60_2
9480; AVX2-NEXT:  ## BB#1: ## %cond.store
9481; AVX2-NEXT:    vmovd %xmm1, %eax
9482; AVX2-NEXT:    movw %ax, (%rdi)
9483; AVX2-NEXT:  LBB60_2: ## %else
9484; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
9485; AVX2-NEXT:    testb $1, %al
9486; AVX2-NEXT:    je LBB60_4
9487; AVX2-NEXT:  ## BB#3: ## %cond.store1
9488; AVX2-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9489; AVX2-NEXT:  LBB60_4: ## %else2
9490; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
9491; AVX2-NEXT:    testb $1, %al
9492; AVX2-NEXT:    je LBB60_6
9493; AVX2-NEXT:  ## BB#5: ## %cond.store3
9494; AVX2-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9495; AVX2-NEXT:  LBB60_6: ## %else4
9496; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
9497; AVX2-NEXT:    testb $1, %al
9498; AVX2-NEXT:    je LBB60_8
9499; AVX2-NEXT:  ## BB#7: ## %cond.store5
9500; AVX2-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9501; AVX2-NEXT:  LBB60_8: ## %else6
9502; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
9503; AVX2-NEXT:    testb $1, %al
9504; AVX2-NEXT:    je LBB60_10
9505; AVX2-NEXT:  ## BB#9: ## %cond.store7
9506; AVX2-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9507; AVX2-NEXT:  LBB60_10: ## %else8
9508; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
9509; AVX2-NEXT:    testb $1, %al
9510; AVX2-NEXT:    je LBB60_12
9511; AVX2-NEXT:  ## BB#11: ## %cond.store9
9512; AVX2-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9513; AVX2-NEXT:  LBB60_12: ## %else10
9514; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
9515; AVX2-NEXT:    testb $1, %al
9516; AVX2-NEXT:    je LBB60_14
9517; AVX2-NEXT:  ## BB#13: ## %cond.store11
9518; AVX2-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9519; AVX2-NEXT:  LBB60_14: ## %else12
9520; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
9521; AVX2-NEXT:    testb $1, %al
9522; AVX2-NEXT:    je LBB60_16
9523; AVX2-NEXT:  ## BB#15: ## %cond.store13
9524; AVX2-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9525; AVX2-NEXT:  LBB60_16: ## %else14
9526; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
9527; AVX2-NEXT:    testb $1, %al
9528; AVX2-NEXT:    je LBB60_18
9529; AVX2-NEXT:  ## BB#17: ## %cond.store15
9530; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9531; AVX2-NEXT:    vmovd %xmm2, %eax
9532; AVX2-NEXT:    movw %ax, 16(%rdi)
9533; AVX2-NEXT:  LBB60_18: ## %else16
9534; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
9535; AVX2-NEXT:    testb $1, %al
9536; AVX2-NEXT:    je LBB60_20
9537; AVX2-NEXT:  ## BB#19: ## %cond.store17
9538; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9539; AVX2-NEXT:    vpextrw $1, %xmm2, 18(%rdi)
9540; AVX2-NEXT:  LBB60_20: ## %else18
9541; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
9542; AVX2-NEXT:    testb $1, %al
9543; AVX2-NEXT:    je LBB60_22
9544; AVX2-NEXT:  ## BB#21: ## %cond.store19
9545; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9546; AVX2-NEXT:    vpextrw $2, %xmm2, 20(%rdi)
9547; AVX2-NEXT:  LBB60_22: ## %else20
9548; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
9549; AVX2-NEXT:    testb $1, %al
9550; AVX2-NEXT:    je LBB60_24
9551; AVX2-NEXT:  ## BB#23: ## %cond.store21
9552; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9553; AVX2-NEXT:    vpextrw $3, %xmm2, 22(%rdi)
9554; AVX2-NEXT:  LBB60_24: ## %else22
9555; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
9556; AVX2-NEXT:    testb $1, %al
9557; AVX2-NEXT:    je LBB60_26
9558; AVX2-NEXT:  ## BB#25: ## %cond.store23
9559; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9560; AVX2-NEXT:    vpextrw $4, %xmm2, 24(%rdi)
9561; AVX2-NEXT:  LBB60_26: ## %else24
9562; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
9563; AVX2-NEXT:    testb $1, %al
9564; AVX2-NEXT:    je LBB60_28
9565; AVX2-NEXT:  ## BB#27: ## %cond.store25
9566; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9567; AVX2-NEXT:    vpextrw $5, %xmm2, 26(%rdi)
9568; AVX2-NEXT:  LBB60_28: ## %else26
9569; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
9570; AVX2-NEXT:    testb $1, %al
9571; AVX2-NEXT:    je LBB60_30
9572; AVX2-NEXT:  ## BB#29: ## %cond.store27
9573; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
9574; AVX2-NEXT:    vpextrw $6, %xmm2, 28(%rdi)
9575; AVX2-NEXT:  LBB60_30: ## %else28
9576; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
9577; AVX2-NEXT:    testb $1, %al
9578; AVX2-NEXT:    je LBB60_32
9579; AVX2-NEXT:  ## BB#31: ## %cond.store29
9580; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
9581; AVX2-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
9582; AVX2-NEXT:  LBB60_32: ## %else30
9583; AVX2-NEXT:    vzeroupper
9584; AVX2-NEXT:    retq
9585;
9586; AVX512F-LABEL: test_mask_store_16xi16:
9587; AVX512F:       ## BB#0:
9588; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
9589; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
9590; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
9591; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
9592; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9593; AVX512F-NEXT:    kmovw %k1, %eax
9594; AVX512F-NEXT:    testb %al, %al
9595; AVX512F-NEXT:    je LBB60_2
9596; AVX512F-NEXT:  ## BB#1: ## %cond.store
9597; AVX512F-NEXT:    vmovd %xmm1, %eax
9598; AVX512F-NEXT:    movw %ax, (%rdi)
9599; AVX512F-NEXT:  LBB60_2: ## %else
9600; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
9601; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9602; AVX512F-NEXT:    kmovw %k1, %eax
9603; AVX512F-NEXT:    testb %al, %al
9604; AVX512F-NEXT:    je LBB60_4
9605; AVX512F-NEXT:  ## BB#3: ## %cond.store1
9606; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9607; AVX512F-NEXT:  LBB60_4: ## %else2
9608; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
9609; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9610; AVX512F-NEXT:    kmovw %k1, %eax
9611; AVX512F-NEXT:    testb %al, %al
9612; AVX512F-NEXT:    je LBB60_6
9613; AVX512F-NEXT:  ## BB#5: ## %cond.store3
9614; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9615; AVX512F-NEXT:  LBB60_6: ## %else4
9616; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
9617; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9618; AVX512F-NEXT:    kmovw %k1, %eax
9619; AVX512F-NEXT:    testb %al, %al
9620; AVX512F-NEXT:    je LBB60_8
9621; AVX512F-NEXT:  ## BB#7: ## %cond.store5
9622; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9623; AVX512F-NEXT:  LBB60_8: ## %else6
9624; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
9625; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9626; AVX512F-NEXT:    kmovw %k1, %eax
9627; AVX512F-NEXT:    testb %al, %al
9628; AVX512F-NEXT:    je LBB60_10
9629; AVX512F-NEXT:  ## BB#9: ## %cond.store7
9630; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9631; AVX512F-NEXT:  LBB60_10: ## %else8
9632; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
9633; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9634; AVX512F-NEXT:    kmovw %k1, %eax
9635; AVX512F-NEXT:    testb %al, %al
9636; AVX512F-NEXT:    je LBB60_12
9637; AVX512F-NEXT:  ## BB#11: ## %cond.store9
9638; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9639; AVX512F-NEXT:  LBB60_12: ## %else10
9640; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
9641; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9642; AVX512F-NEXT:    kmovw %k1, %eax
9643; AVX512F-NEXT:    testb %al, %al
9644; AVX512F-NEXT:    je LBB60_14
9645; AVX512F-NEXT:  ## BB#13: ## %cond.store11
9646; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9647; AVX512F-NEXT:  LBB60_14: ## %else12
9648; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
9649; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9650; AVX512F-NEXT:    kmovw %k1, %eax
9651; AVX512F-NEXT:    testb %al, %al
9652; AVX512F-NEXT:    je LBB60_16
9653; AVX512F-NEXT:  ## BB#15: ## %cond.store13
9654; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9655; AVX512F-NEXT:  LBB60_16: ## %else14
9656; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
9657; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9658; AVX512F-NEXT:    kmovw %k1, %eax
9659; AVX512F-NEXT:    testb %al, %al
9660; AVX512F-NEXT:    je LBB60_18
9661; AVX512F-NEXT:  ## BB#17: ## %cond.store15
9662; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9663; AVX512F-NEXT:    vmovd %xmm0, %eax
9664; AVX512F-NEXT:    movw %ax, 16(%rdi)
9665; AVX512F-NEXT:  LBB60_18: ## %else16
9666; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
9667; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9668; AVX512F-NEXT:    kmovw %k1, %eax
9669; AVX512F-NEXT:    testb %al, %al
9670; AVX512F-NEXT:    je LBB60_20
9671; AVX512F-NEXT:  ## BB#19: ## %cond.store17
9672; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9673; AVX512F-NEXT:    vpextrw $1, %xmm0, 18(%rdi)
9674; AVX512F-NEXT:  LBB60_20: ## %else18
9675; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
9676; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9677; AVX512F-NEXT:    kmovw %k1, %eax
9678; AVX512F-NEXT:    testb %al, %al
9679; AVX512F-NEXT:    je LBB60_22
9680; AVX512F-NEXT:  ## BB#21: ## %cond.store19
9681; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9682; AVX512F-NEXT:    vpextrw $2, %xmm0, 20(%rdi)
9683; AVX512F-NEXT:  LBB60_22: ## %else20
9684; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
9685; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9686; AVX512F-NEXT:    kmovw %k1, %eax
9687; AVX512F-NEXT:    testb %al, %al
9688; AVX512F-NEXT:    je LBB60_24
9689; AVX512F-NEXT:  ## BB#23: ## %cond.store21
9690; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9691; AVX512F-NEXT:    vpextrw $3, %xmm0, 22(%rdi)
9692; AVX512F-NEXT:  LBB60_24: ## %else22
9693; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
9694; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9695; AVX512F-NEXT:    kmovw %k1, %eax
9696; AVX512F-NEXT:    testb %al, %al
9697; AVX512F-NEXT:    je LBB60_26
9698; AVX512F-NEXT:  ## BB#25: ## %cond.store23
9699; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9700; AVX512F-NEXT:    vpextrw $4, %xmm0, 24(%rdi)
9701; AVX512F-NEXT:  LBB60_26: ## %else24
9702; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
9703; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9704; AVX512F-NEXT:    kmovw %k1, %eax
9705; AVX512F-NEXT:    testb %al, %al
9706; AVX512F-NEXT:    je LBB60_28
9707; AVX512F-NEXT:  ## BB#27: ## %cond.store25
9708; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9709; AVX512F-NEXT:    vpextrw $5, %xmm0, 26(%rdi)
9710; AVX512F-NEXT:  LBB60_28: ## %else26
9711; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
9712; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
9713; AVX512F-NEXT:    kmovw %k1, %eax
9714; AVX512F-NEXT:    testb %al, %al
9715; AVX512F-NEXT:    je LBB60_30
9716; AVX512F-NEXT:  ## BB#29: ## %cond.store27
9717; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9718; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
9719; AVX512F-NEXT:  LBB60_30: ## %else28
9720; AVX512F-NEXT:    kshiftlw $0, %k0, %k0
9721; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
9722; AVX512F-NEXT:    kmovw %k0, %eax
9723; AVX512F-NEXT:    testb %al, %al
9724; AVX512F-NEXT:    je LBB60_32
9725; AVX512F-NEXT:  ## BB#31: ## %cond.store29
9726; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
9727; AVX512F-NEXT:    vpextrw $7, %xmm0, 30(%rdi)
9728; AVX512F-NEXT:  LBB60_32: ## %else30
9729; AVX512F-NEXT:    retq
9730;
9731; SKX-LABEL: test_mask_store_16xi16:
9732; SKX:       ## BB#0:
9733; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
9734; SKX-NEXT:    vpmovb2m %xmm0, %k1
9735; SKX-NEXT:    vmovdqu16 %ymm1, (%rdi) {%k1}
9736; SKX-NEXT:    retq
9737  call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)
9738  ret void
9739}
9740declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>)
9741
9742define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) {
9743; AVX1-LABEL: test_mask_store_32xi16:
9744; AVX1:       ## BB#0:
9745; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
9746; AVX1-NEXT:    testb $1, %al
9747; AVX1-NEXT:    je LBB61_2
9748; AVX1-NEXT:  ## BB#1: ## %cond.store
9749; AVX1-NEXT:    vmovd %xmm1, %eax
9750; AVX1-NEXT:    movw %ax, (%rdi)
9751; AVX1-NEXT:  LBB61_2: ## %else
9752; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
9753; AVX1-NEXT:    testb $1, %al
9754; AVX1-NEXT:    je LBB61_4
9755; AVX1-NEXT:  ## BB#3: ## %cond.store1
9756; AVX1-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9757; AVX1-NEXT:  LBB61_4: ## %else2
9758; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
9759; AVX1-NEXT:    testb $1, %al
9760; AVX1-NEXT:    je LBB61_6
9761; AVX1-NEXT:  ## BB#5: ## %cond.store3
9762; AVX1-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9763; AVX1-NEXT:  LBB61_6: ## %else4
9764; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
9765; AVX1-NEXT:    testb $1, %al
9766; AVX1-NEXT:    je LBB61_8
9767; AVX1-NEXT:  ## BB#7: ## %cond.store5
9768; AVX1-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9769; AVX1-NEXT:  LBB61_8: ## %else6
9770; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
9771; AVX1-NEXT:    testb $1, %al
9772; AVX1-NEXT:    je LBB61_10
9773; AVX1-NEXT:  ## BB#9: ## %cond.store7
9774; AVX1-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9775; AVX1-NEXT:  LBB61_10: ## %else8
9776; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
9777; AVX1-NEXT:    testb $1, %al
9778; AVX1-NEXT:    je LBB61_12
9779; AVX1-NEXT:  ## BB#11: ## %cond.store9
9780; AVX1-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9781; AVX1-NEXT:  LBB61_12: ## %else10
9782; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
9783; AVX1-NEXT:    testb $1, %al
9784; AVX1-NEXT:    je LBB61_14
9785; AVX1-NEXT:  ## BB#13: ## %cond.store11
9786; AVX1-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
9787; AVX1-NEXT:  LBB61_14: ## %else12
9788; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
9789; AVX1-NEXT:    testb $1, %al
9790; AVX1-NEXT:    je LBB61_16
9791; AVX1-NEXT:  ## BB#15: ## %cond.store13
9792; AVX1-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
9793; AVX1-NEXT:  LBB61_16: ## %else14
9794; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
9795; AVX1-NEXT:    testb $1, %al
9796; AVX1-NEXT:    je LBB61_18
9797; AVX1-NEXT:  ## BB#17: ## %cond.store15
9798; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9799; AVX1-NEXT:    vmovd %xmm3, %eax
9800; AVX1-NEXT:    movw %ax, 16(%rdi)
9801; AVX1-NEXT:  LBB61_18: ## %else16
9802; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
9803; AVX1-NEXT:    testb $1, %al
9804; AVX1-NEXT:    je LBB61_20
9805; AVX1-NEXT:  ## BB#19: ## %cond.store17
9806; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9807; AVX1-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
9808; AVX1-NEXT:  LBB61_20: ## %else18
9809; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
9810; AVX1-NEXT:    testb $1, %al
9811; AVX1-NEXT:    je LBB61_22
9812; AVX1-NEXT:  ## BB#21: ## %cond.store19
9813; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9814; AVX1-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
9815; AVX1-NEXT:  LBB61_22: ## %else20
9816; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
9817; AVX1-NEXT:    testb $1, %al
9818; AVX1-NEXT:    je LBB61_24
9819; AVX1-NEXT:  ## BB#23: ## %cond.store21
9820; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9821; AVX1-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
9822; AVX1-NEXT:  LBB61_24: ## %else22
9823; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
9824; AVX1-NEXT:    testb $1, %al
9825; AVX1-NEXT:    je LBB61_26
9826; AVX1-NEXT:  ## BB#25: ## %cond.store23
9827; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9828; AVX1-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
9829; AVX1-NEXT:  LBB61_26: ## %else24
9830; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
9831; AVX1-NEXT:    testb $1, %al
9832; AVX1-NEXT:    je LBB61_28
9833; AVX1-NEXT:  ## BB#27: ## %cond.store25
9834; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9835; AVX1-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
9836; AVX1-NEXT:  LBB61_28: ## %else26
9837; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
9838; AVX1-NEXT:    testb $1, %al
9839; AVX1-NEXT:    je LBB61_30
9840; AVX1-NEXT:  ## BB#29: ## %cond.store27
9841; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
9842; AVX1-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
9843; AVX1-NEXT:  LBB61_30: ## %else28
9844; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
9845; AVX1-NEXT:    testb $1, %al
9846; AVX1-NEXT:    je LBB61_32
9847; AVX1-NEXT:  ## BB#31: ## %cond.store29
9848; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
9849; AVX1-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
9850; AVX1-NEXT:  LBB61_32: ## %else30
9851; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
9852; AVX1-NEXT:    vpextrb $0, %xmm0, %eax
9853; AVX1-NEXT:    testb $1, %al
9854; AVX1-NEXT:    je LBB61_34
9855; AVX1-NEXT:  ## BB#33: ## %cond.store31
9856; AVX1-NEXT:    vmovd %xmm2, %eax
9857; AVX1-NEXT:    movw %ax, 32(%rdi)
9858; AVX1-NEXT:  LBB61_34: ## %else32
9859; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
9860; AVX1-NEXT:    testb $1, %al
9861; AVX1-NEXT:    je LBB61_36
9862; AVX1-NEXT:  ## BB#35: ## %cond.store33
9863; AVX1-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
9864; AVX1-NEXT:  LBB61_36: ## %else34
9865; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
9866; AVX1-NEXT:    testb $1, %al
9867; AVX1-NEXT:    je LBB61_38
9868; AVX1-NEXT:  ## BB#37: ## %cond.store35
9869; AVX1-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
9870; AVX1-NEXT:  LBB61_38: ## %else36
9871; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
9872; AVX1-NEXT:    testb $1, %al
9873; AVX1-NEXT:    je LBB61_40
9874; AVX1-NEXT:  ## BB#39: ## %cond.store37
9875; AVX1-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
9876; AVX1-NEXT:  LBB61_40: ## %else38
9877; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
9878; AVX1-NEXT:    testb $1, %al
9879; AVX1-NEXT:    je LBB61_42
9880; AVX1-NEXT:  ## BB#41: ## %cond.store39
9881; AVX1-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
9882; AVX1-NEXT:  LBB61_42: ## %else40
9883; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
9884; AVX1-NEXT:    testb $1, %al
9885; AVX1-NEXT:    je LBB61_44
9886; AVX1-NEXT:  ## BB#43: ## %cond.store41
9887; AVX1-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
9888; AVX1-NEXT:  LBB61_44: ## %else42
9889; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
9890; AVX1-NEXT:    testb $1, %al
9891; AVX1-NEXT:    je LBB61_46
9892; AVX1-NEXT:  ## BB#45: ## %cond.store43
9893; AVX1-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
9894; AVX1-NEXT:  LBB61_46: ## %else44
9895; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
9896; AVX1-NEXT:    testb $1, %al
9897; AVX1-NEXT:    je LBB61_48
9898; AVX1-NEXT:  ## BB#47: ## %cond.store45
9899; AVX1-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
9900; AVX1-NEXT:  LBB61_48: ## %else46
9901; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
9902; AVX1-NEXT:    testb $1, %al
9903; AVX1-NEXT:    je LBB61_50
9904; AVX1-NEXT:  ## BB#49: ## %cond.store47
9905; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9906; AVX1-NEXT:    vmovd %xmm1, %eax
9907; AVX1-NEXT:    movw %ax, 48(%rdi)
9908; AVX1-NEXT:  LBB61_50: ## %else48
9909; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
9910; AVX1-NEXT:    testb $1, %al
9911; AVX1-NEXT:    je LBB61_52
9912; AVX1-NEXT:  ## BB#51: ## %cond.store49
9913; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9914; AVX1-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
9915; AVX1-NEXT:  LBB61_52: ## %else50
9916; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
9917; AVX1-NEXT:    testb $1, %al
9918; AVX1-NEXT:    je LBB61_54
9919; AVX1-NEXT:  ## BB#53: ## %cond.store51
9920; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9921; AVX1-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
9922; AVX1-NEXT:  LBB61_54: ## %else52
9923; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
9924; AVX1-NEXT:    testb $1, %al
9925; AVX1-NEXT:    je LBB61_56
9926; AVX1-NEXT:  ## BB#55: ## %cond.store53
9927; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9928; AVX1-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
9929; AVX1-NEXT:  LBB61_56: ## %else54
9930; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
9931; AVX1-NEXT:    testb $1, %al
9932; AVX1-NEXT:    je LBB61_58
9933; AVX1-NEXT:  ## BB#57: ## %cond.store55
9934; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9935; AVX1-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
9936; AVX1-NEXT:  LBB61_58: ## %else56
9937; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
9938; AVX1-NEXT:    testb $1, %al
9939; AVX1-NEXT:    je LBB61_60
9940; AVX1-NEXT:  ## BB#59: ## %cond.store57
9941; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9942; AVX1-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
9943; AVX1-NEXT:  LBB61_60: ## %else58
9944; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
9945; AVX1-NEXT:    testb $1, %al
9946; AVX1-NEXT:    je LBB61_62
9947; AVX1-NEXT:  ## BB#61: ## %cond.store59
9948; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm1
9949; AVX1-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
9950; AVX1-NEXT:  LBB61_62: ## %else60
9951; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
9952; AVX1-NEXT:    testb $1, %al
9953; AVX1-NEXT:    je LBB61_64
9954; AVX1-NEXT:  ## BB#63: ## %cond.store61
9955; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm0
9956; AVX1-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
9957; AVX1-NEXT:  LBB61_64: ## %else62
9958; AVX1-NEXT:    vzeroupper
9959; AVX1-NEXT:    retq
9960;
9961; AVX2-LABEL: test_mask_store_32xi16:
9962; AVX2:       ## BB#0:
9963; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
9964; AVX2-NEXT:    testb $1, %al
9965; AVX2-NEXT:    je LBB61_2
9966; AVX2-NEXT:  ## BB#1: ## %cond.store
9967; AVX2-NEXT:    vmovd %xmm1, %eax
9968; AVX2-NEXT:    movw %ax, (%rdi)
9969; AVX2-NEXT:  LBB61_2: ## %else
9970; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
9971; AVX2-NEXT:    testb $1, %al
9972; AVX2-NEXT:    je LBB61_4
9973; AVX2-NEXT:  ## BB#3: ## %cond.store1
9974; AVX2-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
9975; AVX2-NEXT:  LBB61_4: ## %else2
9976; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
9977; AVX2-NEXT:    testb $1, %al
9978; AVX2-NEXT:    je LBB61_6
9979; AVX2-NEXT:  ## BB#5: ## %cond.store3
9980; AVX2-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
9981; AVX2-NEXT:  LBB61_6: ## %else4
9982; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
9983; AVX2-NEXT:    testb $1, %al
9984; AVX2-NEXT:    je LBB61_8
9985; AVX2-NEXT:  ## BB#7: ## %cond.store5
9986; AVX2-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
9987; AVX2-NEXT:  LBB61_8: ## %else6
9988; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
9989; AVX2-NEXT:    testb $1, %al
9990; AVX2-NEXT:    je LBB61_10
9991; AVX2-NEXT:  ## BB#9: ## %cond.store7
9992; AVX2-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
9993; AVX2-NEXT:  LBB61_10: ## %else8
9994; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
9995; AVX2-NEXT:    testb $1, %al
9996; AVX2-NEXT:    je LBB61_12
9997; AVX2-NEXT:  ## BB#11: ## %cond.store9
9998; AVX2-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
9999; AVX2-NEXT:  LBB61_12: ## %else10
10000; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
10001; AVX2-NEXT:    testb $1, %al
10002; AVX2-NEXT:    je LBB61_14
10003; AVX2-NEXT:  ## BB#13: ## %cond.store11
10004; AVX2-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
10005; AVX2-NEXT:  LBB61_14: ## %else12
10006; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
10007; AVX2-NEXT:    testb $1, %al
10008; AVX2-NEXT:    je LBB61_16
10009; AVX2-NEXT:  ## BB#15: ## %cond.store13
10010; AVX2-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
10011; AVX2-NEXT:  LBB61_16: ## %else14
10012; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
10013; AVX2-NEXT:    testb $1, %al
10014; AVX2-NEXT:    je LBB61_18
10015; AVX2-NEXT:  ## BB#17: ## %cond.store15
10016; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10017; AVX2-NEXT:    vmovd %xmm3, %eax
10018; AVX2-NEXT:    movw %ax, 16(%rdi)
10019; AVX2-NEXT:  LBB61_18: ## %else16
10020; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
10021; AVX2-NEXT:    testb $1, %al
10022; AVX2-NEXT:    je LBB61_20
10023; AVX2-NEXT:  ## BB#19: ## %cond.store17
10024; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10025; AVX2-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
10026; AVX2-NEXT:  LBB61_20: ## %else18
10027; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
10028; AVX2-NEXT:    testb $1, %al
10029; AVX2-NEXT:    je LBB61_22
10030; AVX2-NEXT:  ## BB#21: ## %cond.store19
10031; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10032; AVX2-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
10033; AVX2-NEXT:  LBB61_22: ## %else20
10034; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
10035; AVX2-NEXT:    testb $1, %al
10036; AVX2-NEXT:    je LBB61_24
10037; AVX2-NEXT:  ## BB#23: ## %cond.store21
10038; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10039; AVX2-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
10040; AVX2-NEXT:  LBB61_24: ## %else22
10041; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
10042; AVX2-NEXT:    testb $1, %al
10043; AVX2-NEXT:    je LBB61_26
10044; AVX2-NEXT:  ## BB#25: ## %cond.store23
10045; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10046; AVX2-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
10047; AVX2-NEXT:  LBB61_26: ## %else24
10048; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
10049; AVX2-NEXT:    testb $1, %al
10050; AVX2-NEXT:    je LBB61_28
10051; AVX2-NEXT:  ## BB#27: ## %cond.store25
10052; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10053; AVX2-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
10054; AVX2-NEXT:  LBB61_28: ## %else26
10055; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
10056; AVX2-NEXT:    testb $1, %al
10057; AVX2-NEXT:    je LBB61_30
10058; AVX2-NEXT:  ## BB#29: ## %cond.store27
10059; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
10060; AVX2-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
10061; AVX2-NEXT:  LBB61_30: ## %else28
10062; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
10063; AVX2-NEXT:    testb $1, %al
10064; AVX2-NEXT:    je LBB61_32
10065; AVX2-NEXT:  ## BB#31: ## %cond.store29
10066; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm1
10067; AVX2-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
10068; AVX2-NEXT:  LBB61_32: ## %else30
10069; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
10070; AVX2-NEXT:    vpextrb $0, %xmm0, %eax
10071; AVX2-NEXT:    testb $1, %al
10072; AVX2-NEXT:    je LBB61_34
10073; AVX2-NEXT:  ## BB#33: ## %cond.store31
10074; AVX2-NEXT:    vmovd %xmm2, %eax
10075; AVX2-NEXT:    movw %ax, 32(%rdi)
10076; AVX2-NEXT:  LBB61_34: ## %else32
10077; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
10078; AVX2-NEXT:    testb $1, %al
10079; AVX2-NEXT:    je LBB61_36
10080; AVX2-NEXT:  ## BB#35: ## %cond.store33
10081; AVX2-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
10082; AVX2-NEXT:  LBB61_36: ## %else34
10083; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
10084; AVX2-NEXT:    testb $1, %al
10085; AVX2-NEXT:    je LBB61_38
10086; AVX2-NEXT:  ## BB#37: ## %cond.store35
10087; AVX2-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
10088; AVX2-NEXT:  LBB61_38: ## %else36
10089; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
10090; AVX2-NEXT:    testb $1, %al
10091; AVX2-NEXT:    je LBB61_40
10092; AVX2-NEXT:  ## BB#39: ## %cond.store37
10093; AVX2-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
10094; AVX2-NEXT:  LBB61_40: ## %else38
10095; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
10096; AVX2-NEXT:    testb $1, %al
10097; AVX2-NEXT:    je LBB61_42
10098; AVX2-NEXT:  ## BB#41: ## %cond.store39
10099; AVX2-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
10100; AVX2-NEXT:  LBB61_42: ## %else40
10101; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
10102; AVX2-NEXT:    testb $1, %al
10103; AVX2-NEXT:    je LBB61_44
10104; AVX2-NEXT:  ## BB#43: ## %cond.store41
10105; AVX2-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
10106; AVX2-NEXT:  LBB61_44: ## %else42
10107; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
10108; AVX2-NEXT:    testb $1, %al
10109; AVX2-NEXT:    je LBB61_46
10110; AVX2-NEXT:  ## BB#45: ## %cond.store43
10111; AVX2-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
10112; AVX2-NEXT:  LBB61_46: ## %else44
10113; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
10114; AVX2-NEXT:    testb $1, %al
10115; AVX2-NEXT:    je LBB61_48
10116; AVX2-NEXT:  ## BB#47: ## %cond.store45
10117; AVX2-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
10118; AVX2-NEXT:  LBB61_48: ## %else46
10119; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
10120; AVX2-NEXT:    testb $1, %al
10121; AVX2-NEXT:    je LBB61_50
10122; AVX2-NEXT:  ## BB#49: ## %cond.store47
10123; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10124; AVX2-NEXT:    vmovd %xmm1, %eax
10125; AVX2-NEXT:    movw %ax, 48(%rdi)
10126; AVX2-NEXT:  LBB61_50: ## %else48
10127; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
10128; AVX2-NEXT:    testb $1, %al
10129; AVX2-NEXT:    je LBB61_52
10130; AVX2-NEXT:  ## BB#51: ## %cond.store49
10131; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10132; AVX2-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
10133; AVX2-NEXT:  LBB61_52: ## %else50
10134; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
10135; AVX2-NEXT:    testb $1, %al
10136; AVX2-NEXT:    je LBB61_54
10137; AVX2-NEXT:  ## BB#53: ## %cond.store51
10138; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10139; AVX2-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
10140; AVX2-NEXT:  LBB61_54: ## %else52
10141; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
10142; AVX2-NEXT:    testb $1, %al
10143; AVX2-NEXT:    je LBB61_56
10144; AVX2-NEXT:  ## BB#55: ## %cond.store53
10145; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10146; AVX2-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
10147; AVX2-NEXT:  LBB61_56: ## %else54
10148; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
10149; AVX2-NEXT:    testb $1, %al
10150; AVX2-NEXT:    je LBB61_58
10151; AVX2-NEXT:  ## BB#57: ## %cond.store55
10152; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10153; AVX2-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
10154; AVX2-NEXT:  LBB61_58: ## %else56
10155; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
10156; AVX2-NEXT:    testb $1, %al
10157; AVX2-NEXT:    je LBB61_60
10158; AVX2-NEXT:  ## BB#59: ## %cond.store57
10159; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10160; AVX2-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
10161; AVX2-NEXT:  LBB61_60: ## %else58
10162; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
10163; AVX2-NEXT:    testb $1, %al
10164; AVX2-NEXT:    je LBB61_62
10165; AVX2-NEXT:  ## BB#61: ## %cond.store59
10166; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm1
10167; AVX2-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
10168; AVX2-NEXT:  LBB61_62: ## %else60
10169; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
10170; AVX2-NEXT:    testb $1, %al
10171; AVX2-NEXT:    je LBB61_64
10172; AVX2-NEXT:  ## BB#63: ## %cond.store61
10173; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm0
10174; AVX2-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
10175; AVX2-NEXT:  LBB61_64: ## %else62
10176; AVX2-NEXT:    vzeroupper
10177; AVX2-NEXT:    retq
10178;
10179; AVX512F-LABEL: test_mask_store_32xi16:
10180; AVX512F:       ## BB#0:
10181; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
10182; AVX512F-NEXT:    testb $1, %al
10183; AVX512F-NEXT:    je LBB61_2
10184; AVX512F-NEXT:  ## BB#1: ## %cond.store
10185; AVX512F-NEXT:    vmovd %xmm1, %eax
10186; AVX512F-NEXT:    movw %ax, (%rdi)
10187; AVX512F-NEXT:  LBB61_2: ## %else
10188; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
10189; AVX512F-NEXT:    testb $1, %al
10190; AVX512F-NEXT:    je LBB61_4
10191; AVX512F-NEXT:  ## BB#3: ## %cond.store1
10192; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
10193; AVX512F-NEXT:  LBB61_4: ## %else2
10194; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
10195; AVX512F-NEXT:    testb $1, %al
10196; AVX512F-NEXT:    je LBB61_6
10197; AVX512F-NEXT:  ## BB#5: ## %cond.store3
10198; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
10199; AVX512F-NEXT:  LBB61_6: ## %else4
10200; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
10201; AVX512F-NEXT:    testb $1, %al
10202; AVX512F-NEXT:    je LBB61_8
10203; AVX512F-NEXT:  ## BB#7: ## %cond.store5
10204; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
10205; AVX512F-NEXT:  LBB61_8: ## %else6
10206; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
10207; AVX512F-NEXT:    testb $1, %al
10208; AVX512F-NEXT:    je LBB61_10
10209; AVX512F-NEXT:  ## BB#9: ## %cond.store7
10210; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
10211; AVX512F-NEXT:  LBB61_10: ## %else8
10212; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
10213; AVX512F-NEXT:    testb $1, %al
10214; AVX512F-NEXT:    je LBB61_12
10215; AVX512F-NEXT:  ## BB#11: ## %cond.store9
10216; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
10217; AVX512F-NEXT:  LBB61_12: ## %else10
10218; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
10219; AVX512F-NEXT:    testb $1, %al
10220; AVX512F-NEXT:    je LBB61_14
10221; AVX512F-NEXT:  ## BB#13: ## %cond.store11
10222; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
10223; AVX512F-NEXT:  LBB61_14: ## %else12
10224; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
10225; AVX512F-NEXT:    testb $1, %al
10226; AVX512F-NEXT:    je LBB61_16
10227; AVX512F-NEXT:  ## BB#15: ## %cond.store13
10228; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
10229; AVX512F-NEXT:  LBB61_16: ## %else14
10230; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
10231; AVX512F-NEXT:    testb $1, %al
10232; AVX512F-NEXT:    je LBB61_18
10233; AVX512F-NEXT:  ## BB#17: ## %cond.store15
10234; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10235; AVX512F-NEXT:    vmovd %xmm3, %eax
10236; AVX512F-NEXT:    movw %ax, 16(%rdi)
10237; AVX512F-NEXT:  LBB61_18: ## %else16
10238; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
10239; AVX512F-NEXT:    testb $1, %al
10240; AVX512F-NEXT:    je LBB61_20
10241; AVX512F-NEXT:  ## BB#19: ## %cond.store17
10242; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10243; AVX512F-NEXT:    vpextrw $1, %xmm3, 18(%rdi)
10244; AVX512F-NEXT:  LBB61_20: ## %else18
10245; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
10246; AVX512F-NEXT:    testb $1, %al
10247; AVX512F-NEXT:    je LBB61_22
10248; AVX512F-NEXT:  ## BB#21: ## %cond.store19
10249; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10250; AVX512F-NEXT:    vpextrw $2, %xmm3, 20(%rdi)
10251; AVX512F-NEXT:  LBB61_22: ## %else20
10252; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
10253; AVX512F-NEXT:    testb $1, %al
10254; AVX512F-NEXT:    je LBB61_24
10255; AVX512F-NEXT:  ## BB#23: ## %cond.store21
10256; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10257; AVX512F-NEXT:    vpextrw $3, %xmm3, 22(%rdi)
10258; AVX512F-NEXT:  LBB61_24: ## %else22
10259; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
10260; AVX512F-NEXT:    testb $1, %al
10261; AVX512F-NEXT:    je LBB61_26
10262; AVX512F-NEXT:  ## BB#25: ## %cond.store23
10263; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10264; AVX512F-NEXT:    vpextrw $4, %xmm3, 24(%rdi)
10265; AVX512F-NEXT:  LBB61_26: ## %else24
10266; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
10267; AVX512F-NEXT:    testb $1, %al
10268; AVX512F-NEXT:    je LBB61_28
10269; AVX512F-NEXT:  ## BB#27: ## %cond.store25
10270; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10271; AVX512F-NEXT:    vpextrw $5, %xmm3, 26(%rdi)
10272; AVX512F-NEXT:  LBB61_28: ## %else26
10273; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
10274; AVX512F-NEXT:    testb $1, %al
10275; AVX512F-NEXT:    je LBB61_30
10276; AVX512F-NEXT:  ## BB#29: ## %cond.store27
10277; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm3
10278; AVX512F-NEXT:    vpextrw $6, %xmm3, 28(%rdi)
10279; AVX512F-NEXT:  LBB61_30: ## %else28
10280; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
10281; AVX512F-NEXT:    testb $1, %al
10282; AVX512F-NEXT:    je LBB61_32
10283; AVX512F-NEXT:  ## BB#31: ## %cond.store29
10284; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm1
10285; AVX512F-NEXT:    vpextrw $7, %xmm1, 30(%rdi)
10286; AVX512F-NEXT:  LBB61_32: ## %else30
10287; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
10288; AVX512F-NEXT:    vpextrb $0, %xmm0, %eax
10289; AVX512F-NEXT:    testb $1, %al
10290; AVX512F-NEXT:    je LBB61_34
10291; AVX512F-NEXT:  ## BB#33: ## %cond.store31
10292; AVX512F-NEXT:    vmovd %xmm2, %eax
10293; AVX512F-NEXT:    movw %ax, 32(%rdi)
10294; AVX512F-NEXT:  LBB61_34: ## %else32
10295; AVX512F-NEXT:    vpextrb $1, %xmm0, %eax
10296; AVX512F-NEXT:    testb $1, %al
10297; AVX512F-NEXT:    je LBB61_36
10298; AVX512F-NEXT:  ## BB#35: ## %cond.store33
10299; AVX512F-NEXT:    vpextrw $1, %xmm2, 34(%rdi)
10300; AVX512F-NEXT:  LBB61_36: ## %else34
10301; AVX512F-NEXT:    vpextrb $2, %xmm0, %eax
10302; AVX512F-NEXT:    testb $1, %al
10303; AVX512F-NEXT:    je LBB61_38
10304; AVX512F-NEXT:  ## BB#37: ## %cond.store35
10305; AVX512F-NEXT:    vpextrw $2, %xmm2, 36(%rdi)
10306; AVX512F-NEXT:  LBB61_38: ## %else36
10307; AVX512F-NEXT:    vpextrb $3, %xmm0, %eax
10308; AVX512F-NEXT:    testb $1, %al
10309; AVX512F-NEXT:    je LBB61_40
10310; AVX512F-NEXT:  ## BB#39: ## %cond.store37
10311; AVX512F-NEXT:    vpextrw $3, %xmm2, 38(%rdi)
10312; AVX512F-NEXT:  LBB61_40: ## %else38
10313; AVX512F-NEXT:    vpextrb $4, %xmm0, %eax
10314; AVX512F-NEXT:    testb $1, %al
10315; AVX512F-NEXT:    je LBB61_42
10316; AVX512F-NEXT:  ## BB#41: ## %cond.store39
10317; AVX512F-NEXT:    vpextrw $4, %xmm2, 40(%rdi)
10318; AVX512F-NEXT:  LBB61_42: ## %else40
10319; AVX512F-NEXT:    vpextrb $5, %xmm0, %eax
10320; AVX512F-NEXT:    testb $1, %al
10321; AVX512F-NEXT:    je LBB61_44
10322; AVX512F-NEXT:  ## BB#43: ## %cond.store41
10323; AVX512F-NEXT:    vpextrw $5, %xmm2, 42(%rdi)
10324; AVX512F-NEXT:  LBB61_44: ## %else42
10325; AVX512F-NEXT:    vpextrb $6, %xmm0, %eax
10326; AVX512F-NEXT:    testb $1, %al
10327; AVX512F-NEXT:    je LBB61_46
10328; AVX512F-NEXT:  ## BB#45: ## %cond.store43
10329; AVX512F-NEXT:    vpextrw $6, %xmm2, 44(%rdi)
10330; AVX512F-NEXT:  LBB61_46: ## %else44
10331; AVX512F-NEXT:    vpextrb $7, %xmm0, %eax
10332; AVX512F-NEXT:    testb $1, %al
10333; AVX512F-NEXT:    je LBB61_48
10334; AVX512F-NEXT:  ## BB#47: ## %cond.store45
10335; AVX512F-NEXT:    vpextrw $7, %xmm2, 46(%rdi)
10336; AVX512F-NEXT:  LBB61_48: ## %else46
10337; AVX512F-NEXT:    vpextrb $8, %xmm0, %eax
10338; AVX512F-NEXT:    testb $1, %al
10339; AVX512F-NEXT:    je LBB61_50
10340; AVX512F-NEXT:  ## BB#49: ## %cond.store47
10341; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10342; AVX512F-NEXT:    vmovd %xmm1, %eax
10343; AVX512F-NEXT:    movw %ax, 48(%rdi)
10344; AVX512F-NEXT:  LBB61_50: ## %else48
10345; AVX512F-NEXT:    vpextrb $9, %xmm0, %eax
10346; AVX512F-NEXT:    testb $1, %al
10347; AVX512F-NEXT:    je LBB61_52
10348; AVX512F-NEXT:  ## BB#51: ## %cond.store49
10349; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10350; AVX512F-NEXT:    vpextrw $1, %xmm1, 50(%rdi)
10351; AVX512F-NEXT:  LBB61_52: ## %else50
10352; AVX512F-NEXT:    vpextrb $10, %xmm0, %eax
10353; AVX512F-NEXT:    testb $1, %al
10354; AVX512F-NEXT:    je LBB61_54
10355; AVX512F-NEXT:  ## BB#53: ## %cond.store51
10356; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10357; AVX512F-NEXT:    vpextrw $2, %xmm1, 52(%rdi)
10358; AVX512F-NEXT:  LBB61_54: ## %else52
10359; AVX512F-NEXT:    vpextrb $11, %xmm0, %eax
10360; AVX512F-NEXT:    testb $1, %al
10361; AVX512F-NEXT:    je LBB61_56
10362; AVX512F-NEXT:  ## BB#55: ## %cond.store53
10363; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10364; AVX512F-NEXT:    vpextrw $3, %xmm1, 54(%rdi)
10365; AVX512F-NEXT:  LBB61_56: ## %else54
10366; AVX512F-NEXT:    vpextrb $12, %xmm0, %eax
10367; AVX512F-NEXT:    testb $1, %al
10368; AVX512F-NEXT:    je LBB61_58
10369; AVX512F-NEXT:  ## BB#57: ## %cond.store55
10370; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10371; AVX512F-NEXT:    vpextrw $4, %xmm1, 56(%rdi)
10372; AVX512F-NEXT:  LBB61_58: ## %else56
10373; AVX512F-NEXT:    vpextrb $13, %xmm0, %eax
10374; AVX512F-NEXT:    testb $1, %al
10375; AVX512F-NEXT:    je LBB61_60
10376; AVX512F-NEXT:  ## BB#59: ## %cond.store57
10377; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10378; AVX512F-NEXT:    vpextrw $5, %xmm1, 58(%rdi)
10379; AVX512F-NEXT:  LBB61_60: ## %else58
10380; AVX512F-NEXT:    vpextrb $14, %xmm0, %eax
10381; AVX512F-NEXT:    testb $1, %al
10382; AVX512F-NEXT:    je LBB61_62
10383; AVX512F-NEXT:  ## BB#61: ## %cond.store59
10384; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
10385; AVX512F-NEXT:    vpextrw $6, %xmm1, 60(%rdi)
10386; AVX512F-NEXT:  LBB61_62: ## %else60
10387; AVX512F-NEXT:    vpextrb $15, %xmm0, %eax
10388; AVX512F-NEXT:    testb $1, %al
10389; AVX512F-NEXT:    je LBB61_64
10390; AVX512F-NEXT:  ## BB#63: ## %cond.store61
10391; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm0
10392; AVX512F-NEXT:    vpextrw $7, %xmm0, 62(%rdi)
10393; AVX512F-NEXT:  LBB61_64: ## %else62
10394; AVX512F-NEXT:    retq
10395;
10396; SKX-LABEL: test_mask_store_32xi16:
10397; SKX:       ## BB#0:
10398; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
10399; SKX-NEXT:    vpmovb2m %ymm0, %k1
10400; SKX-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
10401; SKX-NEXT:    retq
10402  call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> %val, <32 x i16>* %addr, i32 4, <32 x i1>%mask)
10403  ret void
10404}
10405
10406declare void @llvm.masked.store.v32i16.p0v32i16(<32 x i16>, <32 x i16>*, i32, <32 x i1>)
10407