xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
4*9880d681SAndroid Build Coastguard Worker
5*9880d681SAndroid Build Coastguard Worker;
6*9880d681SAndroid Build Coastguard Worker; Unary shuffle indices from registers
7*9880d681SAndroid Build Coastguard Worker;
8*9880d681SAndroid Build Coastguard Worker
9*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
10*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shuffle_v4f64_v4f64_xxxx_i64:
11*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
12*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    pushq %rbp
13*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movq %rsp, %rbp
14*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    andq $-32, %rsp
15*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    subq $64, %rsp
16*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %ymm0, (%rsp)
17*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
18*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
19*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
20*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
21*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
22*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movq %rbp, %rsp
23*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    popq %rbp
24*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
25*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x double> %x, i64 %i0
26*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x double> %x, i64 %i1
27*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x double> %x, i64 %i2
28*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x double> %x, i64 %i3
29*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x double> undef, double %x0, i32 0
30*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
31*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
32*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
33*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %r3
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
37*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shuffle_v4f64_v4f64_uxx0_i64:
38*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
39*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    pushq %rbp
40*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movq %rsp, %rbp
41*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    andq $-32, %rsp
42*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    subq $64, %rsp
43*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %ymm0, (%rsp)
44*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
45*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
46*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
47*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movq %rbp, %rsp
48*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    popq %rbp
49*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
50*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x double> %x, i64 %i0
51*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x double> %x, i64 %i1
52*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x double> %x, i64 %i2
53*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x double> %x, i64 %i3
54*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x double> undef, double undef, i32 0
55*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x double>   %r0, double   %x1, i32 1
56*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x double>   %r1, double   %x2, i32 2
57*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x double>   %r2, double   0.0, i32 3
58*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %r3
59*9880d681SAndroid Build Coastguard Worker}
60*9880d681SAndroid Build Coastguard Worker
61*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
62*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
63*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
64*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
65*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
66*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
67*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
68*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
69*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
70*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
71*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <2 x double> %x, i64 %i0
72*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <2 x double> %x, i64 %i1
73*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <2 x double> %x, i64 %i2
74*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <2 x double> %x, i64 %i3
75*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x double> undef, double %x0, i32 0
76*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
77*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
78*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
79*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %r3
80*9880d681SAndroid Build Coastguard Worker}
81*9880d681SAndroid Build Coastguard Worker
82*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
83*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
84*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
85*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    pushq %rbp
86*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rsp, %rbp
87*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    andq $-32, %rsp
88*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq $64, %rsp
89*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
90*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
91*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
92*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
93*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
94*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
95*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
96*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
97*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rbp, %rsp
98*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    popq %rbp
99*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
100*9880d681SAndroid Build Coastguard Worker;
101*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
102*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
103*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    pushq %rbp
104*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rsp, %rbp
105*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    andq $-32, %rsp
106*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq $64, %rsp
107*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
108*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
109*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
110*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
111*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
112*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
113*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
114*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
115*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rbp, %rsp
116*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    popq %rbp
117*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
118*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x i64> %x, i64 %i0
119*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x i64> %x, i64 %i1
120*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x i64> %x, i64 %i2
121*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x i64> %x, i64 %i3
122*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
123*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
124*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
125*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
126*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %r3
127*9880d681SAndroid Build Coastguard Worker}
128*9880d681SAndroid Build Coastguard Worker
129*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
130*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v4i64_v4i64_xx00_i64:
131*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
132*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    pushq %rbp
133*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rsp, %rbp
134*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    andq $-32, %rsp
135*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq $64, %rsp
136*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
137*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
138*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
139*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
140*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
141*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
142*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rbp, %rsp
143*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    popq %rbp
144*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
145*9880d681SAndroid Build Coastguard Worker;
146*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v4i64_v4i64_xx00_i64:
147*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
148*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    pushq %rbp
149*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rsp, %rbp
150*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    andq $-32, %rsp
151*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq $64, %rsp
152*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
153*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
154*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
155*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
156*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
157*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
158*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rbp, %rsp
159*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    popq %rbp
160*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
161*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x i64> %x, i64 %i0
162*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x i64> %x, i64 %i1
163*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x i64> %x, i64 %i2
164*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x i64> %x, i64 %i3
165*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
166*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
167*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x i64>   %r1, i64   0, i32 2
168*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x i64>   %r2, i64   0, i32 3
169*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %r3
170*9880d681SAndroid Build Coastguard Worker}
171*9880d681SAndroid Build Coastguard Worker
172*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
173*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
174*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
175*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
176*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
177*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
178*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
179*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
180*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
181*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
182*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
183*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
184*9880d681SAndroid Build Coastguard Worker;
185*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
186*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
187*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
188*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
189*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
190*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
191*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
192*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
193*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
194*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
195*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
196*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <2 x i64> %x, i64 %i0
197*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <2 x i64> %x, i64 %i1
198*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <2 x i64> %x, i64 %i2
199*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <2 x i64> %x, i64 %i3
200*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
201*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
202*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
203*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
204*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %r3
205*9880d681SAndroid Build Coastguard Worker}
206*9880d681SAndroid Build Coastguard Worker
207*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
208*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32:
209*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
210*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    pushq %rbp
211*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rsp, %rbp
212*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    andq $-32, %rsp
213*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq $64, %rsp
214*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edi, %rax
215*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %esi, %rsi
216*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edx, %rdx
217*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %ecx, %r11
218*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r8d, %r10
219*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
220*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r9d, %r8
221*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 16(%rbp), %rdi
222*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 24(%rbp), %rcx
223*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
224*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
225*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
226*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
227*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
228*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
229*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
230*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
231*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3]
232*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
233*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
234*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rbp, %rsp
235*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    popq %rbp
236*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
237*9880d681SAndroid Build Coastguard Worker;
238*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32:
239*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
240*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %edi, %xmm1
241*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm1
242*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %esi, %xmm2
243*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm2
244*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %edx, %xmm3
245*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm3, %ymm3
246*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %ecx, %xmm4
247*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm4, %ymm4
248*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %r8d, %xmm5
249*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm5, %ymm5
250*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %r9d, %xmm6
251*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm6, %ymm6
252*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm7 = mem[0],zero,zero,zero
253*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm7, %ymm7
254*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd {{.*#+}} xmm8 = mem[0],zero,zero,zero
255*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpermps %ymm0, %ymm8, %ymm0
256*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[2,3]
257*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm5 = xmm5[0,1],xmm7[0],xmm5[3]
258*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm5[0,1,2],xmm0[0]
259*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
260*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
261*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
262*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
263*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
264*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <8 x float> %x, i32 %i0
265*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <8 x float> %x, i32 %i1
266*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <8 x float> %x, i32 %i2
267*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <8 x float> %x, i32 %i3
268*9880d681SAndroid Build Coastguard Worker  %x4 = extractelement <8 x float> %x, i32 %i4
269*9880d681SAndroid Build Coastguard Worker  %x5 = extractelement <8 x float> %x, i32 %i5
270*9880d681SAndroid Build Coastguard Worker  %x6 = extractelement <8 x float> %x, i32 %i6
271*9880d681SAndroid Build Coastguard Worker  %x7 = extractelement <8 x float> %x, i32 %i7
272*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <8 x float> undef, float %x0, i32 0
273*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
274*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
275*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
276*9880d681SAndroid Build Coastguard Worker  %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
277*9880d681SAndroid Build Coastguard Worker  %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
278*9880d681SAndroid Build Coastguard Worker  %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
279*9880d681SAndroid Build Coastguard Worker  %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
280*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %r7
281*9880d681SAndroid Build Coastguard Worker}
282*9880d681SAndroid Build Coastguard Worker
283*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
284*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32:
285*9880d681SAndroid Build Coastguard Worker; ALL:       # BB#0:
286*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %edi, %rax
287*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %esi, %rsi
288*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %edx, %rdx
289*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %ecx, %r11
290*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %r8d, %r10
291*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
292*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq %r9d, %r8
293*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq {{[0-9]+}}(%rsp), %rdi
294*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    movslq {{[0-9]+}}(%rsp), %rcx
295*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
296*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
297*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
298*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
299*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
300*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
301*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
302*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
303*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1],xmm0[0],xmm3[3]
304*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
305*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
306*9880d681SAndroid Build Coastguard Worker; ALL-NEXT:    retq
307*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x float> %x, i32 %i0
308*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x float> %x, i32 %i1
309*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x float> %x, i32 %i2
310*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x float> %x, i32 %i3
311*9880d681SAndroid Build Coastguard Worker  %x4 = extractelement <4 x float> %x, i32 %i4
312*9880d681SAndroid Build Coastguard Worker  %x5 = extractelement <4 x float> %x, i32 %i5
313*9880d681SAndroid Build Coastguard Worker  %x6 = extractelement <4 x float> %x, i32 %i6
314*9880d681SAndroid Build Coastguard Worker  %x7 = extractelement <4 x float> %x, i32 %i7
315*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <8 x float> undef, float %x0, i32 0
316*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
317*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
318*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
319*9880d681SAndroid Build Coastguard Worker  %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
320*9880d681SAndroid Build Coastguard Worker  %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
321*9880d681SAndroid Build Coastguard Worker  %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
322*9880d681SAndroid Build Coastguard Worker  %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
323*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %r7
324*9880d681SAndroid Build Coastguard Worker}
325*9880d681SAndroid Build Coastguard Worker
326*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
327*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
328*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
329*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    pushq %rbp
330*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rsp, %rbp
331*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    andq $-32, %rsp
332*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq $64, %rsp
333*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
334*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 32(%rbp), %rax
335*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
336*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %eax, %xmm0
337*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 40(%rbp), %rax
338*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
339*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
340*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 48(%rbp), %rax
341*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
342*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
343*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 56(%rbp), %rax
344*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
345*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
346*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 64(%rbp), %rax
347*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
348*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
349*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 72(%rbp), %rax
350*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
351*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
352*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 80(%rbp), %rax
353*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
354*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
355*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 88(%rbp), %rax
356*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
357*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
358*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edi, %rax
359*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
360*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %eax, %xmm1
361*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %esi, %rax
362*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
363*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edx, %rax
364*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
365*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %ecx, %rax
366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r8d, %rax
368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r9d, %rax
370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 16(%rbp), %rax
372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq 24(%rbp), %rax
375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
376*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
377*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
378*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rbp, %rsp
379*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    popq %rbp
380*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
381*9880d681SAndroid Build Coastguard Worker;
382*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
383*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
384*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    pushq %rbp
385*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rsp, %rbp
386*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    andq $-32, %rsp
387*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq $64, %rsp
388*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
389*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 32(%rbp), %rax
390*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
391*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %eax, %xmm0
392*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 40(%rbp), %rax
393*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
394*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
395*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 48(%rbp), %rax
396*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
397*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
398*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 56(%rbp), %rax
399*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
400*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
401*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 64(%rbp), %rax
402*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
403*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
404*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 72(%rbp), %rax
405*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
406*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
407*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 80(%rbp), %rax
408*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
409*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
410*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 88(%rbp), %rax
411*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
412*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
413*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %edi, %rax
414*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
415*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %eax, %xmm1
416*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %esi, %rax
417*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
418*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %edx, %rax
419*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
420*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %ecx, %rax
421*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
422*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %r8d, %rax
423*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
424*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %r9d, %rax
425*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
426*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 16(%rbp), %rax
427*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
428*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
429*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq 24(%rbp), %rax
430*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
431*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
433*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rbp, %rsp
434*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    popq %rbp
435*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
436*9880d681SAndroid Build Coastguard Worker  %x0  = extractelement <16 x i16> %x, i32 %i0
437*9880d681SAndroid Build Coastguard Worker  %x1  = extractelement <16 x i16> %x, i32 %i1
438*9880d681SAndroid Build Coastguard Worker  %x2  = extractelement <16 x i16> %x, i32 %i2
439*9880d681SAndroid Build Coastguard Worker  %x3  = extractelement <16 x i16> %x, i32 %i3
440*9880d681SAndroid Build Coastguard Worker  %x4  = extractelement <16 x i16> %x, i32 %i4
441*9880d681SAndroid Build Coastguard Worker  %x5  = extractelement <16 x i16> %x, i32 %i5
442*9880d681SAndroid Build Coastguard Worker  %x6  = extractelement <16 x i16> %x, i32 %i6
443*9880d681SAndroid Build Coastguard Worker  %x7  = extractelement <16 x i16> %x, i32 %i7
444*9880d681SAndroid Build Coastguard Worker  %x8  = extractelement <16 x i16> %x, i32 %i8
445*9880d681SAndroid Build Coastguard Worker  %x9  = extractelement <16 x i16> %x, i32 %i9
446*9880d681SAndroid Build Coastguard Worker  %x10 = extractelement <16 x i16> %x, i32 %i10
447*9880d681SAndroid Build Coastguard Worker  %x11 = extractelement <16 x i16> %x, i32 %i11
448*9880d681SAndroid Build Coastguard Worker  %x12 = extractelement <16 x i16> %x, i32 %i12
449*9880d681SAndroid Build Coastguard Worker  %x13 = extractelement <16 x i16> %x, i32 %i13
450*9880d681SAndroid Build Coastguard Worker  %x14 = extractelement <16 x i16> %x, i32 %i14
451*9880d681SAndroid Build Coastguard Worker  %x15 = extractelement <16 x i16> %x, i32 %i15
452*9880d681SAndroid Build Coastguard Worker  %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
453*9880d681SAndroid Build Coastguard Worker  %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
454*9880d681SAndroid Build Coastguard Worker  %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
455*9880d681SAndroid Build Coastguard Worker  %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
456*9880d681SAndroid Build Coastguard Worker  %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
457*9880d681SAndroid Build Coastguard Worker  %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
458*9880d681SAndroid Build Coastguard Worker  %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
459*9880d681SAndroid Build Coastguard Worker  %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
460*9880d681SAndroid Build Coastguard Worker  %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
461*9880d681SAndroid Build Coastguard Worker  %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
462*9880d681SAndroid Build Coastguard Worker  %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
463*9880d681SAndroid Build Coastguard Worker  %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
464*9880d681SAndroid Build Coastguard Worker  %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
465*9880d681SAndroid Build Coastguard Worker  %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
466*9880d681SAndroid Build Coastguard Worker  %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
467*9880d681SAndroid Build Coastguard Worker  %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
468*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %r15
469*9880d681SAndroid Build Coastguard Worker}
470*9880d681SAndroid Build Coastguard Worker
471*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
472*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
473*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
474*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
475*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
476*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
477*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %eax, %xmm0
478*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
479*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
480*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
481*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
482*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
483*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
484*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
485*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
486*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
487*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
488*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
489*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
490*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
491*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
492*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
493*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
494*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
495*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
496*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
497*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
498*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
499*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edi, %rax
500*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
501*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %eax, %xmm1
502*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %esi, %rax
503*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
504*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %edx, %rax
505*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
506*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %ecx, %rax
507*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
508*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r8d, %rax
509*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
510*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq %r9d, %rax
511*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
512*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
513*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
514*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
515*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
516*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
517*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
518*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
519*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
520*9880d681SAndroid Build Coastguard Worker;
521*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
522*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
523*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
524*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
525*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
526*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %eax, %xmm0
527*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
528*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
529*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
530*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
531*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
532*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
533*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
534*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
535*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0
536*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
537*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
538*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
539*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
540*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
541*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
542*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
543*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
544*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
545*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
546*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
547*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
548*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %edi, %rax
549*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
550*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovd %eax, %xmm1
551*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %esi, %rax
552*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
553*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %edx, %rax
554*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
555*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %ecx, %rax
556*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
557*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %r8d, %rax
558*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
559*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq %r9d, %rax
560*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
561*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
562*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
563*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
564*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movslq {{[0-9]+}}(%rsp), %rax
565*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
566*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm1
567*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
568*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
569*9880d681SAndroid Build Coastguard Worker  %x0  = extractelement <8 x i16> %x, i32 %i0
570*9880d681SAndroid Build Coastguard Worker  %x1  = extractelement <8 x i16> %x, i32 %i1
571*9880d681SAndroid Build Coastguard Worker  %x2  = extractelement <8 x i16> %x, i32 %i2
572*9880d681SAndroid Build Coastguard Worker  %x3  = extractelement <8 x i16> %x, i32 %i3
573*9880d681SAndroid Build Coastguard Worker  %x4  = extractelement <8 x i16> %x, i32 %i4
574*9880d681SAndroid Build Coastguard Worker  %x5  = extractelement <8 x i16> %x, i32 %i5
575*9880d681SAndroid Build Coastguard Worker  %x6  = extractelement <8 x i16> %x, i32 %i6
576*9880d681SAndroid Build Coastguard Worker  %x7  = extractelement <8 x i16> %x, i32 %i7
577*9880d681SAndroid Build Coastguard Worker  %x8  = extractelement <8 x i16> %x, i32 %i8
578*9880d681SAndroid Build Coastguard Worker  %x9  = extractelement <8 x i16> %x, i32 %i9
579*9880d681SAndroid Build Coastguard Worker  %x10 = extractelement <8 x i16> %x, i32 %i10
580*9880d681SAndroid Build Coastguard Worker  %x11 = extractelement <8 x i16> %x, i32 %i11
581*9880d681SAndroid Build Coastguard Worker  %x12 = extractelement <8 x i16> %x, i32 %i12
582*9880d681SAndroid Build Coastguard Worker  %x13 = extractelement <8 x i16> %x, i32 %i13
583*9880d681SAndroid Build Coastguard Worker  %x14 = extractelement <8 x i16> %x, i32 %i14
584*9880d681SAndroid Build Coastguard Worker  %x15 = extractelement <8 x i16> %x, i32 %i15
585*9880d681SAndroid Build Coastguard Worker  %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
586*9880d681SAndroid Build Coastguard Worker  %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
587*9880d681SAndroid Build Coastguard Worker  %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
588*9880d681SAndroid Build Coastguard Worker  %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
589*9880d681SAndroid Build Coastguard Worker  %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
590*9880d681SAndroid Build Coastguard Worker  %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
591*9880d681SAndroid Build Coastguard Worker  %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
592*9880d681SAndroid Build Coastguard Worker  %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
593*9880d681SAndroid Build Coastguard Worker  %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
594*9880d681SAndroid Build Coastguard Worker  %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
595*9880d681SAndroid Build Coastguard Worker  %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
596*9880d681SAndroid Build Coastguard Worker  %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
597*9880d681SAndroid Build Coastguard Worker  %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
598*9880d681SAndroid Build Coastguard Worker  %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
599*9880d681SAndroid Build Coastguard Worker  %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
600*9880d681SAndroid Build Coastguard Worker  %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
601*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %r15
602*9880d681SAndroid Build Coastguard Worker}
603*9880d681SAndroid Build Coastguard Worker
604*9880d681SAndroid Build Coastguard Worker;
605*9880d681SAndroid Build Coastguard Worker; Unary shuffle indices from memory
606*9880d681SAndroid Build Coastguard Worker;
607*9880d681SAndroid Build Coastguard Worker
608*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind {
609*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
610*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
611*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    pushq %rbp
612*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rsp, %rbp
613*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    andq $-32, %rsp
614*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    subq $64, %rsp
615*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq (%rdi), %rax
616*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 8(%rdi), %rcx
617*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 16(%rdi), %rdx
618*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 24(%rdi), %rsi
619*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
620*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
621*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
622*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
623*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
624*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
625*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
626*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
627*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq %rbp, %rsp
628*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    popq %rbp
629*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
630*9880d681SAndroid Build Coastguard Worker;
631*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
632*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
633*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    pushq %rbp
634*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rsp, %rbp
635*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    andq $-32, %rsp
636*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    subq $64, %rsp
637*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq (%rdi), %rax
638*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 8(%rdi), %rcx
639*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 16(%rdi), %rdx
640*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 24(%rdi), %rsi
641*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
642*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
643*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
644*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
645*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
646*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
647*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
648*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
649*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq %rbp, %rsp
650*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    popq %rbp
651*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
652*9880d681SAndroid Build Coastguard Worker  %p0  = getelementptr inbounds i64, i64* %i, i32 0
653*9880d681SAndroid Build Coastguard Worker  %p1  = getelementptr inbounds i64, i64* %i, i32 1
654*9880d681SAndroid Build Coastguard Worker  %p2  = getelementptr inbounds i64, i64* %i, i32 2
655*9880d681SAndroid Build Coastguard Worker  %p3  = getelementptr inbounds i64, i64* %i, i32 3
656*9880d681SAndroid Build Coastguard Worker  %i0  = load i64, i64* %p0, align 4
657*9880d681SAndroid Build Coastguard Worker  %i1  = load i64, i64* %p1, align 4
658*9880d681SAndroid Build Coastguard Worker  %i2  = load i64, i64* %p2, align 4
659*9880d681SAndroid Build Coastguard Worker  %i3  = load i64, i64* %p3, align 4
660*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <4 x i64> %x, i64 %i0
661*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <4 x i64> %x, i64 %i1
662*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <4 x i64> %x, i64 %i2
663*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <4 x i64> %x, i64 %i3
664*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
665*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
666*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
667*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
668*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %r3
669*9880d681SAndroid Build Coastguard Worker}
670*9880d681SAndroid Build Coastguard Worker
671*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind {
672*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
673*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
674*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq (%rdi), %rax
675*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 8(%rdi), %rcx
676*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 16(%rdi), %rdx
677*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    movq 24(%rdi), %rsi
678*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
679*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
680*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
681*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
682*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
683*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
684*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
685*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
686*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
687*9880d681SAndroid Build Coastguard Worker;
688*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
689*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
690*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq (%rdi), %rax
691*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 8(%rdi), %rcx
692*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 16(%rdi), %rdx
693*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    movq 24(%rdi), %rsi
694*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
695*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
696*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
697*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
698*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
699*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero
700*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
701*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
702*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
703*9880d681SAndroid Build Coastguard Worker  %p0  = getelementptr inbounds i64, i64* %i, i32 0
704*9880d681SAndroid Build Coastguard Worker  %p1  = getelementptr inbounds i64, i64* %i, i32 1
705*9880d681SAndroid Build Coastguard Worker  %p2  = getelementptr inbounds i64, i64* %i, i32 2
706*9880d681SAndroid Build Coastguard Worker  %p3  = getelementptr inbounds i64, i64* %i, i32 3
707*9880d681SAndroid Build Coastguard Worker  %i0  = load i64, i64* %p0, align 4
708*9880d681SAndroid Build Coastguard Worker  %i1  = load i64, i64* %p1, align 4
709*9880d681SAndroid Build Coastguard Worker  %i2  = load i64, i64* %p2, align 4
710*9880d681SAndroid Build Coastguard Worker  %i3  = load i64, i64* %p3, align 4
711*9880d681SAndroid Build Coastguard Worker  %x0 = extractelement <2 x i64> %x, i64 %i0
712*9880d681SAndroid Build Coastguard Worker  %x1 = extractelement <2 x i64> %x, i64 %i1
713*9880d681SAndroid Build Coastguard Worker  %x2 = extractelement <2 x i64> %x, i64 %i2
714*9880d681SAndroid Build Coastguard Worker  %x3 = extractelement <2 x i64> %x, i64 %i3
715*9880d681SAndroid Build Coastguard Worker  %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
716*9880d681SAndroid Build Coastguard Worker  %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
717*9880d681SAndroid Build Coastguard Worker  %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
718*9880d681SAndroid Build Coastguard Worker  %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
719*9880d681SAndroid Build Coastguard Worker  ret <4 x i64> %r3
720*9880d681SAndroid Build Coastguard Worker}
721