xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/haddsub-2.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
6*9880d681SAndroid Build Coastguard Worker
7*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) {
8*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_ps_test1:
9*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
10*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
12*9880d681SAndroid Build Coastguard Worker;
13*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_ps_test1:
14*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
15*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
17*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
18*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %A, i32 1
19*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
20*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
21*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %A, i32 2
22*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %A, i32 3
23*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
24*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
25*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %B, i32 0
26*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %B, i32 1
27*9880d681SAndroid Build Coastguard Worker  %add8 = fadd float %vecext6, %vecext7
28*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
29*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %B, i32 2
30*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %B, i32 3
31*9880d681SAndroid Build Coastguard Worker  %add12 = fadd float %vecext10, %vecext11
32*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
33*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
34*9880d681SAndroid Build Coastguard Worker}
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) {
37*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_ps_test2:
38*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
39*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
40*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
41*9880d681SAndroid Build Coastguard Worker;
42*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_ps_test2:
43*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
44*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
45*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
46*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 2
47*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %A, i32 3
48*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
49*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 1
50*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %A, i32 0
51*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %A, i32 1
52*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
53*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 0
54*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %B, i32 2
55*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %B, i32 3
56*9880d681SAndroid Build Coastguard Worker  %add8 = fadd float %vecext6, %vecext7
57*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 3
58*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %B, i32 0
59*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %B, i32 1
60*9880d681SAndroid Build Coastguard Worker  %add12 = fadd float %vecext10, %vecext11
61*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 2
62*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
63*9880d681SAndroid Build Coastguard Worker}
64*9880d681SAndroid Build Coastguard Worker
65*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) {
66*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_ps_test1:
67*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
68*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubps %xmm1, %xmm0
69*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
70*9880d681SAndroid Build Coastguard Worker;
71*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_ps_test1:
72*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
73*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
74*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
75*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 0
76*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %A, i32 1
77*9880d681SAndroid Build Coastguard Worker  %sub = fsub float %vecext, %vecext1
78*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %sub, i32 0
79*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %A, i32 2
80*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %A, i32 3
81*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub float %vecext2, %vecext3
82*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 1
83*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %B, i32 0
84*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %B, i32 1
85*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub float %vecext6, %vecext7
86*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 2
87*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %B, i32 2
88*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %B, i32 3
89*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub float %vecext10, %vecext11
90*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 3
91*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
92*9880d681SAndroid Build Coastguard Worker}
93*9880d681SAndroid Build Coastguard Worker
94*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) {
95*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_ps_test2:
96*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
97*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubps %xmm1, %xmm0
98*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
99*9880d681SAndroid Build Coastguard Worker;
100*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_ps_test2:
101*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
102*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
103*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
104*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 2
105*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %A, i32 3
106*9880d681SAndroid Build Coastguard Worker  %sub = fsub float %vecext, %vecext1
107*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %sub, i32 1
108*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %A, i32 0
109*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %A, i32 1
110*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub float %vecext2, %vecext3
111*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0
112*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %B, i32 2
113*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %B, i32 3
114*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub float %vecext6, %vecext7
115*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3
116*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %B, i32 0
117*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %B, i32 1
118*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub float %vecext10, %vecext11
119*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2
120*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) {
124*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phadd_d_test1:
125*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
126*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
127*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
128*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %ecx
129*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
130*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
131*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
132*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
133*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
134*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edx
135*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %eax
136*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
137*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
138*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %esi
139*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
140*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
141*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
142*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edi
143*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edi
144*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edi, %xmm0
145*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm1
146*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
147*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm2
148*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm0
149*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
150*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
151*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
152*9880d681SAndroid Build Coastguard Worker;
153*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phadd_d_test1:
154*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
155*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm1, %xmm0
156*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
157*9880d681SAndroid Build Coastguard Worker;
158*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phadd_d_test1:
159*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
160*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
161*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
162*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %A, i32 0
163*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %A, i32 1
164*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
165*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %add, i32 0
166*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %A, i32 2
167*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %A, i32 3
168*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
169*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 1
170*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x i32> %B, i32 0
171*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x i32> %B, i32 1
172*9880d681SAndroid Build Coastguard Worker  %add8 = add i32 %vecext6, %vecext7
173*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 2
174*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x i32> %B, i32 2
175*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x i32> %B, i32 3
176*9880d681SAndroid Build Coastguard Worker  %add12 = add i32 %vecext10, %vecext11
177*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 3
178*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit13
179*9880d681SAndroid Build Coastguard Worker}
180*9880d681SAndroid Build Coastguard Worker
181*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) {
182*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phadd_d_test2:
183*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
184*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
185*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
186*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
187*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %ecx
188*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
189*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
190*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
191*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
192*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edx
193*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
194*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
195*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
196*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
197*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %esi
198*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm0
199*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm2
200*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
201*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
202*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
203*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %ecx
204*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
205*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm1
206*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm0
207*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
208*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
209*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
210*9880d681SAndroid Build Coastguard Worker;
211*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phadd_d_test2:
212*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
213*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm1, %xmm0
214*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
215*9880d681SAndroid Build Coastguard Worker;
216*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phadd_d_test2:
217*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
218*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
219*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
220*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %A, i32 2
221*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %A, i32 3
222*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
223*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %add, i32 1
224*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %A, i32 0
225*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %A, i32 1
226*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
227*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 0
228*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x i32> %B, i32 3
229*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x i32> %B, i32 2
230*9880d681SAndroid Build Coastguard Worker  %add8 = add i32 %vecext6, %vecext7
231*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 3
232*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x i32> %B, i32 1
233*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x i32> %B, i32 0
234*9880d681SAndroid Build Coastguard Worker  %add12 = add i32 %vecext10, %vecext11
235*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 2
236*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit13
237*9880d681SAndroid Build Coastguard Worker}
238*9880d681SAndroid Build Coastguard Worker
239*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) {
240*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phsub_d_test1:
241*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
242*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
243*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
244*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %ecx
245*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %ecx, %eax
246*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
247*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %ecx
248*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
249*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
250*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %edx, %ecx
251*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %edx
252*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
253*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
254*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %esi, %edx
255*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
256*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
257*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
258*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edi
259*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %edi, %esi
260*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm0
261*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm1
262*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
263*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm2
264*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %eax, %xmm0
265*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
266*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
267*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
268*9880d681SAndroid Build Coastguard Worker;
269*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phsub_d_test1:
270*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
271*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phsubd %xmm1, %xmm0
272*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
273*9880d681SAndroid Build Coastguard Worker;
274*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phsub_d_test1:
275*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
277*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
278*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %A, i32 0
279*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %A, i32 1
280*9880d681SAndroid Build Coastguard Worker  %sub = sub i32 %vecext, %vecext1
281*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0
282*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %A, i32 2
283*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %A, i32 3
284*9880d681SAndroid Build Coastguard Worker  %sub4 = sub i32 %vecext2, %vecext3
285*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1
286*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x i32> %B, i32 0
287*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x i32> %B, i32 1
288*9880d681SAndroid Build Coastguard Worker  %sub8 = sub i32 %vecext6, %vecext7
289*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2
290*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x i32> %B, i32 2
291*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x i32> %B, i32 3
292*9880d681SAndroid Build Coastguard Worker  %sub12 = sub i32 %vecext10, %vecext11
293*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3
294*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit13
295*9880d681SAndroid Build Coastguard Worker}
296*9880d681SAndroid Build Coastguard Worker
297*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) {
298*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phsub_d_test2:
299*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
300*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
301*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
302*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
303*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %ecx
304*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %ecx, %eax
305*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %ecx
306*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
307*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
308*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %edx, %ecx
309*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
310*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
311*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
312*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
313*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %esi, %edx
314*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm0
315*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %eax, %xmm2
316*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
317*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %eax
318*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
319*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
320*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    subl %edx, %eax
321*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %eax, %xmm1
322*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm0
323*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
324*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
325*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
326*9880d681SAndroid Build Coastguard Worker;
327*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phsub_d_test2:
328*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
329*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phsubd %xmm1, %xmm0
330*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
331*9880d681SAndroid Build Coastguard Worker;
332*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phsub_d_test2:
333*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
334*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
335*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
336*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %A, i32 2
337*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %A, i32 3
338*9880d681SAndroid Build Coastguard Worker  %sub = sub i32 %vecext, %vecext1
339*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 1
340*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %A, i32 0
341*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %A, i32 1
342*9880d681SAndroid Build Coastguard Worker  %sub4 = sub i32 %vecext2, %vecext3
343*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 0
344*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x i32> %B, i32 2
345*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x i32> %B, i32 3
346*9880d681SAndroid Build Coastguard Worker  %sub8 = sub i32 %vecext6, %vecext7
347*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 3
348*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x i32> %B, i32 0
349*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x i32> %B, i32 1
350*9880d681SAndroid Build Coastguard Worker  %sub12 = sub i32 %vecext10, %vecext11
351*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 2
352*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit13
353*9880d681SAndroid Build Coastguard Worker}
354*9880d681SAndroid Build Coastguard Worker
355*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) {
356*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_pd_test1:
357*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm1, %xmm0
359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
360*9880d681SAndroid Build Coastguard Worker;
361*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_pd_test1:
362*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
363*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
364*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
365*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %A, i32 0
366*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %A, i32 1
367*9880d681SAndroid Build Coastguard Worker  %add = fadd double %vecext, %vecext1
368*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %add, i32 0
369*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <2 x double> %B, i32 0
370*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <2 x double> %B, i32 1
371*9880d681SAndroid Build Coastguard Worker  %add2 = fadd double %vecext2, %vecext3
372*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1
373*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit2
374*9880d681SAndroid Build Coastguard Worker}
375*9880d681SAndroid Build Coastguard Worker
376*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) {
377*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_pd_test2:
378*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
379*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm1, %xmm0
380*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
381*9880d681SAndroid Build Coastguard Worker;
382*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_pd_test2:
383*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
384*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
385*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
386*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %A, i32 1
387*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %A, i32 0
388*9880d681SAndroid Build Coastguard Worker  %add = fadd double %vecext, %vecext1
389*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %add, i32 0
390*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <2 x double> %B, i32 1
391*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <2 x double> %B, i32 0
392*9880d681SAndroid Build Coastguard Worker  %add2 = fadd double %vecext2, %vecext3
393*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1
394*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit2
395*9880d681SAndroid Build Coastguard Worker}
396*9880d681SAndroid Build Coastguard Worker
397*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) {
398*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_pd_test1:
399*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
400*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm1, %xmm0
401*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
402*9880d681SAndroid Build Coastguard Worker;
403*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_pd_test1:
404*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
405*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
406*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
407*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %A, i32 0
408*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %A, i32 1
409*9880d681SAndroid Build Coastguard Worker  %sub = fsub double %vecext, %vecext1
410*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %sub, i32 0
411*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <2 x double> %B, i32 0
412*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <2 x double> %B, i32 1
413*9880d681SAndroid Build Coastguard Worker  %sub2 = fsub double %vecext2, %vecext3
414*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 1
415*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit2
416*9880d681SAndroid Build Coastguard Worker}
417*9880d681SAndroid Build Coastguard Worker
418*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) {
419*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_pd_test2:
420*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm1, %xmm0
422*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
423*9880d681SAndroid Build Coastguard Worker;
424*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_pd_test2:
425*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
426*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
427*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
428*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %B, i32 0
429*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %B, i32 1
430*9880d681SAndroid Build Coastguard Worker  %sub = fsub double %vecext, %vecext1
431*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %sub, i32 1
432*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <2 x double> %A, i32 0
433*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <2 x double> %A, i32 1
434*9880d681SAndroid Build Coastguard Worker  %sub2 = fsub double %vecext2, %vecext3
435*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0
436*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit2
437*9880d681SAndroid Build Coastguard Worker}
438*9880d681SAndroid Build Coastguard Worker
439*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) {
440*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhadd_pd_test:
441*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
442*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm1, %xmm0
443*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm3, %xmm2
444*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm2, %xmm1
445*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
446*9880d681SAndroid Build Coastguard Worker;
447*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhadd_pd_test:
448*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
449*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm2
450*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddpd %xmm2, %xmm1, %xmm1
451*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
452*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddpd %xmm2, %xmm0, %xmm0
453*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
455*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x double> %A, i32 0
456*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x double> %A, i32 1
457*9880d681SAndroid Build Coastguard Worker  %add = fadd double %vecext, %vecext1
458*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x double> undef, double %add, i32 0
459*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x double> %A, i32 2
460*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x double> %A, i32 3
461*9880d681SAndroid Build Coastguard Worker  %add4 = fadd double %vecext2, %vecext3
462*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
463*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x double> %B, i32 0
464*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x double> %B, i32 1
465*9880d681SAndroid Build Coastguard Worker  %add8 = fadd double %vecext6, %vecext7
466*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
467*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x double> %B, i32 2
468*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x double> %B, i32 3
469*9880d681SAndroid Build Coastguard Worker  %add12 = fadd double %vecext10, %vecext11
470*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
471*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %vecinit13
472*9880d681SAndroid Build Coastguard Worker}
473*9880d681SAndroid Build Coastguard Worker
474*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) {
475*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhsub_pd_test:
476*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
477*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm1, %xmm0
478*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm3, %xmm2
479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm2, %xmm1
480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
481*9880d681SAndroid Build Coastguard Worker;
482*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhsub_pd_test:
483*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
484*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractf128 $1, %ymm1, %xmm2
485*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubpd %xmm2, %xmm1, %xmm1
486*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubpd %xmm2, %xmm0, %xmm0
488*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
489*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
490*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x double> %A, i32 0
491*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x double> %A, i32 1
492*9880d681SAndroid Build Coastguard Worker  %sub = fsub double %vecext, %vecext1
493*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x double> undef, double %sub, i32 0
494*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x double> %A, i32 2
495*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x double> %A, i32 3
496*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub double %vecext2, %vecext3
497*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1
498*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x double> %B, i32 0
499*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x double> %B, i32 1
500*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub double %vecext6, %vecext7
501*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2
502*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x double> %B, i32 2
503*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x double> %B, i32 3
504*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub double %vecext10, %vecext11
505*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3
506*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %vecinit13
507*9880d681SAndroid Build Coastguard Worker}
508*9880d681SAndroid Build Coastguard Worker
509*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) {
510*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_vphadd_d_test:
511*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
512*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %ecx
513*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
514*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm4, %r8d
515*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %ecx, %r8d
516*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
517*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm4, %edx
518*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
519*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r9d
520*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %edx, %r9d
521*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %esi
522*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
523*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r10d
524*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %r10d
525*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
526*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
527*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
528*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edi
529*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %edi
530*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
531*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
532*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r11d
533*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r11d
534*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
535*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
536*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
537*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %ecx
538*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
539*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm3, %eax
540*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
541*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
542*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edx
543*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
544*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
545*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
546*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
547*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %esi
548*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edi, %xmm0
549*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r9d, %xmm1
550*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
551*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r10d, %xmm2
552*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r8d, %xmm0
553*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
554*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
555*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm1
556*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm2
557*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
558*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm3
559*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r11d, %xmm1
560*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
561*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
562*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
563*9880d681SAndroid Build Coastguard Worker;
564*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_vphadd_d_test:
565*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
566*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm1, %xmm0
567*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm3, %xmm2
568*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqa %xmm2, %xmm1
569*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
570*9880d681SAndroid Build Coastguard Worker;
571*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_vphadd_d_test:
572*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
573*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
574*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm2, %xmm1, %xmm1
575*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
576*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm2, %xmm0, %xmm0
577*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
578*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
579*9880d681SAndroid Build Coastguard Worker;
580*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_vphadd_d_test:
581*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
582*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
583*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %xmm2, %xmm1, %xmm1
584*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
585*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %xmm2, %xmm0, %xmm0
586*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
587*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
588*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %A, i32 0
589*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %A, i32 1
590*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
591*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
592*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %A, i32 2
593*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %A, i32 3
594*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
595*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
596*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x i32> %A, i32 4
597*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x i32> %A, i32 5
598*9880d681SAndroid Build Coastguard Worker  %add8 = add i32 %vecext6, %vecext7
599*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2
600*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <8 x i32> %A, i32 6
601*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <8 x i32> %A, i32 7
602*9880d681SAndroid Build Coastguard Worker  %add12 = add i32 %vecext10, %vecext11
603*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3
604*9880d681SAndroid Build Coastguard Worker  %vecext14 = extractelement <8 x i32> %B, i32 0
605*9880d681SAndroid Build Coastguard Worker  %vecext15 = extractelement <8 x i32> %B, i32 1
606*9880d681SAndroid Build Coastguard Worker  %add16 = add i32 %vecext14, %vecext15
607*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4
608*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <8 x i32> %B, i32 2
609*9880d681SAndroid Build Coastguard Worker  %vecext19 = extractelement <8 x i32> %B, i32 3
610*9880d681SAndroid Build Coastguard Worker  %add20 = add i32 %vecext18, %vecext19
611*9880d681SAndroid Build Coastguard Worker  %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5
612*9880d681SAndroid Build Coastguard Worker  %vecext22 = extractelement <8 x i32> %B, i32 4
613*9880d681SAndroid Build Coastguard Worker  %vecext23 = extractelement <8 x i32> %B, i32 5
614*9880d681SAndroid Build Coastguard Worker  %add24 = add i32 %vecext22, %vecext23
615*9880d681SAndroid Build Coastguard Worker  %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6
616*9880d681SAndroid Build Coastguard Worker  %vecext26 = extractelement <8 x i32> %B, i32 6
617*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <8 x i32> %B, i32 7
618*9880d681SAndroid Build Coastguard Worker  %add28 = add i32 %vecext26, %vecext27
619*9880d681SAndroid Build Coastguard Worker  %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7
620*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit29
621*9880d681SAndroid Build Coastguard Worker}
622*9880d681SAndroid Build Coastguard Worker
623*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) {
624*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_vphadd_w_test:
625*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
626*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %rbp
627*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp0:
628*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 16
629*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r15
630*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp1:
631*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 24
632*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r14
633*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp2:
634*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 32
635*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r13
636*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp3:
637*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 40
638*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r12
639*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp4:
640*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 48
641*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %rbx
642*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp5:
643*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 56
644*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp6:
645*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %rbx, -56
646*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp7:
647*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r12, -48
648*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp8:
649*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r13, -40
650*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp9:
651*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r14, -32
652*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp10:
653*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r15, -24
654*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp11:
655*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %rbp, -16
656*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
657*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm0, %ecx
658*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
659*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
660*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm0, %eax
661*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm0, %r11d
662*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r11d
663*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm0, %eax
664*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm0, %r10d
665*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r10d
666*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm0, %eax
667*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm0, %r13d
668*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r13d
669*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %eax
670*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm1, %r14d
671*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r14d
672*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm1, %eax
673*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm1, %ebp
674*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ebp
675*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm1, %eax
676*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm1, %ebx
677*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ebx
678*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm1, %eax
679*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm1, %edx
680*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edx
681*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
682*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm2, %ecx
683*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
684*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
685*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm2, %eax
686*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm2, %r12d
687*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r12d
688*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm2, %eax
689*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm2, %r15d
690*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r15d
691*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm2, %eax
692*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm2, %r8d
693*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r8d
694*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm3, %eax
695*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm3, %r9d
696*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r9d
697*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm3, %eax
698*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm3, %esi
699*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %esi
700*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm3, %eax
701*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm3, %edi
702*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edi
703*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm3, %ecx
704*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm3, %eax
705*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %ecx, %eax
706*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm8
707*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r13d, %xmm3
708*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ebp, %xmm9
709*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r11d, %xmm4
710*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ebx, %xmm10
711*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r10d, %xmm7
712*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r14d, %xmm11
713*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
714*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    # xmm0 = mem[0],zero,zero,zero
715*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %eax, %xmm12
716*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r8d, %xmm6
717*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm13
718*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r12d, %xmm5
719*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edi, %xmm14
720*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r15d, %xmm2
721*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r9d, %xmm15
722*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
723*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    # xmm1 = mem[0],zero,zero,zero
724*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
725*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
726*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
727*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
728*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
729*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
730*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
731*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
732*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
733*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
734*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
735*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
736*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
737*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
738*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %rbx
739*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r12
740*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r13
741*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r14
742*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r15
743*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %rbp
744*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
745*9880d681SAndroid Build Coastguard Worker;
746*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_vphadd_w_test:
747*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
748*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddw %xmm1, %xmm0
749*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddw %xmm3, %xmm2
750*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    movdqa %xmm2, %xmm1
751*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
752*9880d681SAndroid Build Coastguard Worker;
753*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_vphadd_w_test:
754*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
755*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
756*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddw %xmm2, %xmm1, %xmm1
757*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
758*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddw %xmm2, %xmm0, %xmm0
759*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
760*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
761*9880d681SAndroid Build Coastguard Worker;
762*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_vphadd_w_test:
763*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
764*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
765*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddw %xmm2, %xmm1, %xmm1
766*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
767*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddw %xmm2, %xmm0, %xmm0
768*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
769*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
770*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <16 x i16> %a, i32 0
771*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <16 x i16> %a, i32 1
772*9880d681SAndroid Build Coastguard Worker  %add = add i16 %vecext, %vecext1
773*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0
774*9880d681SAndroid Build Coastguard Worker  %vecext4 = extractelement <16 x i16> %a, i32 2
775*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <16 x i16> %a, i32 3
776*9880d681SAndroid Build Coastguard Worker  %add8 = add i16 %vecext4, %vecext6
777*9880d681SAndroid Build Coastguard Worker  %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1
778*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <16 x i16> %a, i32 4
779*9880d681SAndroid Build Coastguard Worker  %vecext13 = extractelement <16 x i16> %a, i32 5
780*9880d681SAndroid Build Coastguard Worker  %add15 = add i16 %vecext11, %vecext13
781*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2
782*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <16 x i16> %a, i32 6
783*9880d681SAndroid Build Coastguard Worker  %vecext20 = extractelement <16 x i16> %a, i32 7
784*9880d681SAndroid Build Coastguard Worker  %add22 = add i16 %vecext18, %vecext20
785*9880d681SAndroid Build Coastguard Worker  %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3
786*9880d681SAndroid Build Coastguard Worker  %vecext25 = extractelement <16 x i16> %a, i32 8
787*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <16 x i16> %a, i32 9
788*9880d681SAndroid Build Coastguard Worker  %add29 = add i16 %vecext25, %vecext27
789*9880d681SAndroid Build Coastguard Worker  %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 4
790*9880d681SAndroid Build Coastguard Worker  %vecext32 = extractelement <16 x i16> %a, i32 10
791*9880d681SAndroid Build Coastguard Worker  %vecext34 = extractelement <16 x i16> %a, i32 11
792*9880d681SAndroid Build Coastguard Worker  %add36 = add i16 %vecext32, %vecext34
793*9880d681SAndroid Build Coastguard Worker  %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 5
794*9880d681SAndroid Build Coastguard Worker  %vecext39 = extractelement <16 x i16> %a, i32 12
795*9880d681SAndroid Build Coastguard Worker  %vecext41 = extractelement <16 x i16> %a, i32 13
796*9880d681SAndroid Build Coastguard Worker  %add43 = add i16 %vecext39, %vecext41
797*9880d681SAndroid Build Coastguard Worker  %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 6
798*9880d681SAndroid Build Coastguard Worker  %vecext46 = extractelement <16 x i16> %a, i32 14
799*9880d681SAndroid Build Coastguard Worker  %vecext48 = extractelement <16 x i16> %a, i32 15
800*9880d681SAndroid Build Coastguard Worker  %add50 = add i16 %vecext46, %vecext48
801*9880d681SAndroid Build Coastguard Worker  %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 7
802*9880d681SAndroid Build Coastguard Worker  %vecext53 = extractelement <16 x i16> %b, i32 0
803*9880d681SAndroid Build Coastguard Worker  %vecext55 = extractelement <16 x i16> %b, i32 1
804*9880d681SAndroid Build Coastguard Worker  %add57 = add i16 %vecext53, %vecext55
805*9880d681SAndroid Build Coastguard Worker  %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 8
806*9880d681SAndroid Build Coastguard Worker  %vecext60 = extractelement <16 x i16> %b, i32 2
807*9880d681SAndroid Build Coastguard Worker  %vecext62 = extractelement <16 x i16> %b, i32 3
808*9880d681SAndroid Build Coastguard Worker  %add64 = add i16 %vecext60, %vecext62
809*9880d681SAndroid Build Coastguard Worker  %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 9
810*9880d681SAndroid Build Coastguard Worker  %vecext67 = extractelement <16 x i16> %b, i32 4
811*9880d681SAndroid Build Coastguard Worker  %vecext69 = extractelement <16 x i16> %b, i32 5
812*9880d681SAndroid Build Coastguard Worker  %add71 = add i16 %vecext67, %vecext69
813*9880d681SAndroid Build Coastguard Worker  %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 10
814*9880d681SAndroid Build Coastguard Worker  %vecext74 = extractelement <16 x i16> %b, i32 6
815*9880d681SAndroid Build Coastguard Worker  %vecext76 = extractelement <16 x i16> %b, i32 7
816*9880d681SAndroid Build Coastguard Worker  %add78 = add i16 %vecext74, %vecext76
817*9880d681SAndroid Build Coastguard Worker  %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 11
818*9880d681SAndroid Build Coastguard Worker  %vecext81 = extractelement <16 x i16> %b, i32 8
819*9880d681SAndroid Build Coastguard Worker  %vecext83 = extractelement <16 x i16> %b, i32 9
820*9880d681SAndroid Build Coastguard Worker  %add85 = add i16 %vecext81, %vecext83
821*9880d681SAndroid Build Coastguard Worker  %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12
822*9880d681SAndroid Build Coastguard Worker  %vecext88 = extractelement <16 x i16> %b, i32 10
823*9880d681SAndroid Build Coastguard Worker  %vecext90 = extractelement <16 x i16> %b, i32 11
824*9880d681SAndroid Build Coastguard Worker  %add92 = add i16 %vecext88, %vecext90
825*9880d681SAndroid Build Coastguard Worker  %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13
826*9880d681SAndroid Build Coastguard Worker  %vecext95 = extractelement <16 x i16> %b, i32 12
827*9880d681SAndroid Build Coastguard Worker  %vecext97 = extractelement <16 x i16> %b, i32 13
828*9880d681SAndroid Build Coastguard Worker  %add99 = add i16 %vecext95, %vecext97
829*9880d681SAndroid Build Coastguard Worker  %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14
830*9880d681SAndroid Build Coastguard Worker  %vecext102 = extractelement <16 x i16> %b, i32 14
831*9880d681SAndroid Build Coastguard Worker  %vecext104 = extractelement <16 x i16> %b, i32 15
832*9880d681SAndroid Build Coastguard Worker  %add106 = add i16 %vecext102, %vecext104
833*9880d681SAndroid Build Coastguard Worker  %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15
834*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %vecinit108
835*9880d681SAndroid Build Coastguard Worker}
836*9880d681SAndroid Build Coastguard Worker
837*9880d681SAndroid Build Coastguard Worker; Verify that we don't select horizontal subs in the following functions.
838*9880d681SAndroid Build Coastguard Worker
839*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) {
840*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_1:
841*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
842*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %eax
843*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
844*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm2, %ecx
845*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subl %ecx, %eax
846*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
847*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm2, %ecx
848*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
849*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %edx
850*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subl %edx, %ecx
851*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %edx
853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm1, %esi
854*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subl %esi, %edx
855*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
856*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %esi
857*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
858*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %edi
859*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subl %edi, %esi
860*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %esi, %xmm0
861*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %ecx, %xmm1
862*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
863*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %edx, %xmm2
864*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %eax, %xmm0
865*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
866*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
867*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
868*9880d681SAndroid Build Coastguard Worker;
869*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_1:
870*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
871*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd %xmm0, %eax
872*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $1, %xmm0, %ecx
873*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subl %ecx, %eax
874*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $2, %xmm0, %ecx
875*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $3, %xmm0, %edx
876*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subl %edx, %ecx
877*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $1, %xmm1, %edx
878*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd %xmm1, %esi
879*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subl %esi, %edx
880*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $3, %xmm1, %esi
881*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpextrd $2, %xmm1, %edi
882*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    subl %edi, %esi
883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovd %eax, %xmm0
884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0
886*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpinsrd $3, %esi, %xmm0, %xmm0
887*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
888*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x i32> %A, i32 0
889*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x i32> %A, i32 1
890*9880d681SAndroid Build Coastguard Worker  %sub = sub i32 %vecext, %vecext1
891*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0
892*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x i32> %A, i32 2
893*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x i32> %A, i32 3
894*9880d681SAndroid Build Coastguard Worker  %sub4 = sub i32 %vecext2, %vecext3
895*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1
896*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x i32> %B, i32 1
897*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x i32> %B, i32 0
898*9880d681SAndroid Build Coastguard Worker  %sub8 = sub i32 %vecext6, %vecext7
899*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2
900*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x i32> %B, i32 3
901*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x i32> %B, i32 2
902*9880d681SAndroid Build Coastguard Worker  %sub12 = sub i32 %vecext10, %vecext11
903*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3
904*9880d681SAndroid Build Coastguard Worker  ret <4 x i32> %vecinit13
905*9880d681SAndroid Build Coastguard Worker}
906*9880d681SAndroid Build Coastguard Worker
907*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) {
908*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_2:
909*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
910*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm0, %xmm2
911*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
912*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm0, %xmm3
913*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
914*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subss %xmm3, %xmm2
915*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
916*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subss %xmm3, %xmm0
917*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm1, %xmm3
918*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
919*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm1, %xmm4
920*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm4 = xmm4[1,0]
921*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subss %xmm4, %xmm3
922*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
923*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
924*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subss %xmm3, %xmm1
925*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
926*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
927*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
928*9880d681SAndroid Build Coastguard Worker;
929*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_2:
930*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
931*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
932*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
933*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubss %xmm3, %xmm2, %xmm2
934*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
935*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubss %xmm3, %xmm0, %xmm0
936*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3]
937*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
938*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubss %xmm4, %xmm3, %xmm3
939*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
940*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
941*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubss %xmm2, %xmm1, %xmm1
942*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
943*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
944*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
945*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %A, i32 2
946*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %A, i32 3
947*9880d681SAndroid Build Coastguard Worker  %sub = fsub float %vecext, %vecext1
948*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %sub, i32 1
949*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %A, i32 0
950*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %A, i32 1
951*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub float %vecext2, %vecext3
952*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0
953*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %B, i32 3
954*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %B, i32 2
955*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub float %vecext6, %vecext7
956*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3
957*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %B, i32 0
958*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %B, i32 1
959*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub float %vecext10, %vecext11
960*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2
961*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
962*9880d681SAndroid Build Coastguard Worker}
963*9880d681SAndroid Build Coastguard Worker
964*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) {
965*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_3:
966*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
967*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm1, %xmm2
968*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
969*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subsd %xmm2, %xmm1
970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm0, %xmm2
971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
972*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    subsd %xmm0, %xmm2
973*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
974*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm2, %xmm0
975*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
976*9880d681SAndroid Build Coastguard Worker;
977*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_3:
978*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
979*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
980*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubsd %xmm2, %xmm1, %xmm1
981*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
982*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vsubsd %xmm0, %xmm2, %xmm0
983*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
984*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
985*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %B, i32 0
986*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %B, i32 1
987*9880d681SAndroid Build Coastguard Worker  %sub = fsub double %vecext, %vecext1
988*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %sub, i32 1
989*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <2 x double> %A, i32 1
990*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <2 x double> %A, i32 0
991*9880d681SAndroid Build Coastguard Worker  %sub2 = fsub double %vecext2, %vecext3
992*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0
993*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit2
994*9880d681SAndroid Build Coastguard Worker}
995*9880d681SAndroid Build Coastguard Worker
996*9880d681SAndroid Build Coastguard Worker; Test AVX horizontal add/sub of packed single/double precision
997*9880d681SAndroid Build Coastguard Worker; floating point values from 256-bit vectors.
998*9880d681SAndroid Build Coastguard Worker
999*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @avx_vhadd_ps(<8 x float> %a, <8 x float> %b) {
1000*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhadd_ps:
1001*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1002*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm2, %xmm0
1003*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm3, %xmm1
1004*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1005*9880d681SAndroid Build Coastguard Worker;
1006*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhadd_ps:
1007*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1008*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
1009*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1010*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
1011*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
1012*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
1013*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x float> undef, float %add, i32 0
1014*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %a, i32 2
1015*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %a, i32 3
1016*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
1017*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
1018*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x float> %b, i32 0
1019*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x float> %b, i32 1
1020*9880d681SAndroid Build Coastguard Worker  %add8 = fadd float %vecext6, %vecext7
1021*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <8 x float> %vecinit5, float %add8, i32 2
1022*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <8 x float> %b, i32 2
1023*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <8 x float> %b, i32 3
1024*9880d681SAndroid Build Coastguard Worker  %add12 = fadd float %vecext10, %vecext11
1025*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <8 x float> %vecinit9, float %add12, i32 3
1026*9880d681SAndroid Build Coastguard Worker  %vecext14 = extractelement <8 x float> %a, i32 4
1027*9880d681SAndroid Build Coastguard Worker  %vecext15 = extractelement <8 x float> %a, i32 5
1028*9880d681SAndroid Build Coastguard Worker  %add16 = fadd float %vecext14, %vecext15
1029*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <8 x float> %vecinit13, float %add16, i32 4
1030*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <8 x float> %a, i32 6
1031*9880d681SAndroid Build Coastguard Worker  %vecext19 = extractelement <8 x float> %a, i32 7
1032*9880d681SAndroid Build Coastguard Worker  %add20 = fadd float %vecext18, %vecext19
1033*9880d681SAndroid Build Coastguard Worker  %vecinit21 = insertelement <8 x float> %vecinit17, float %add20, i32 5
1034*9880d681SAndroid Build Coastguard Worker  %vecext22 = extractelement <8 x float> %b, i32 4
1035*9880d681SAndroid Build Coastguard Worker  %vecext23 = extractelement <8 x float> %b, i32 5
1036*9880d681SAndroid Build Coastguard Worker  %add24 = fadd float %vecext22, %vecext23
1037*9880d681SAndroid Build Coastguard Worker  %vecinit25 = insertelement <8 x float> %vecinit21, float %add24, i32 6
1038*9880d681SAndroid Build Coastguard Worker  %vecext26 = extractelement <8 x float> %b, i32 6
1039*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <8 x float> %b, i32 7
1040*9880d681SAndroid Build Coastguard Worker  %add28 = fadd float %vecext26, %vecext27
1041*9880d681SAndroid Build Coastguard Worker  %vecinit29 = insertelement <8 x float> %vecinit25, float %add28, i32 7
1042*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit29
1043*9880d681SAndroid Build Coastguard Worker}
1044*9880d681SAndroid Build Coastguard Worker
1045*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @avx_vhsub_ps(<8 x float> %a, <8 x float> %b) {
1046*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhsub_ps:
1047*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1048*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubps %xmm2, %xmm0
1049*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubps %xmm3, %xmm1
1050*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1051*9880d681SAndroid Build Coastguard Worker;
1052*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhsub_ps:
1053*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1054*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
1055*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1056*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
1057*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
1058*9880d681SAndroid Build Coastguard Worker  %sub = fsub float %vecext, %vecext1
1059*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x float> undef, float %sub, i32 0
1060*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %a, i32 2
1061*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %a, i32 3
1062*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub float %vecext2, %vecext3
1063*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x float> %vecinit, float %sub4, i32 1
1064*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x float> %b, i32 0
1065*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x float> %b, i32 1
1066*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub float %vecext6, %vecext7
1067*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <8 x float> %vecinit5, float %sub8, i32 2
1068*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <8 x float> %b, i32 2
1069*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <8 x float> %b, i32 3
1070*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub float %vecext10, %vecext11
1071*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <8 x float> %vecinit9, float %sub12, i32 3
1072*9880d681SAndroid Build Coastguard Worker  %vecext14 = extractelement <8 x float> %a, i32 4
1073*9880d681SAndroid Build Coastguard Worker  %vecext15 = extractelement <8 x float> %a, i32 5
1074*9880d681SAndroid Build Coastguard Worker  %sub16 = fsub float %vecext14, %vecext15
1075*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <8 x float> %vecinit13, float %sub16, i32 4
1076*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <8 x float> %a, i32 6
1077*9880d681SAndroid Build Coastguard Worker  %vecext19 = extractelement <8 x float> %a, i32 7
1078*9880d681SAndroid Build Coastguard Worker  %sub20 = fsub float %vecext18, %vecext19
1079*9880d681SAndroid Build Coastguard Worker  %vecinit21 = insertelement <8 x float> %vecinit17, float %sub20, i32 5
1080*9880d681SAndroid Build Coastguard Worker  %vecext22 = extractelement <8 x float> %b, i32 4
1081*9880d681SAndroid Build Coastguard Worker  %vecext23 = extractelement <8 x float> %b, i32 5
1082*9880d681SAndroid Build Coastguard Worker  %sub24 = fsub float %vecext22, %vecext23
1083*9880d681SAndroid Build Coastguard Worker  %vecinit25 = insertelement <8 x float> %vecinit21, float %sub24, i32 6
1084*9880d681SAndroid Build Coastguard Worker  %vecext26 = extractelement <8 x float> %b, i32 6
1085*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <8 x float> %b, i32 7
1086*9880d681SAndroid Build Coastguard Worker  %sub28 = fsub float %vecext26, %vecext27
1087*9880d681SAndroid Build Coastguard Worker  %vecinit29 = insertelement <8 x float> %vecinit25, float %sub28, i32 7
1088*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit29
1089*9880d681SAndroid Build Coastguard Worker}
1090*9880d681SAndroid Build Coastguard Worker
1091*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_hadd_pd(<4 x double> %a, <4 x double> %b) {
1092*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_hadd_pd:
1093*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1094*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm2, %xmm0
1095*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddpd %xmm3, %xmm1
1096*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1097*9880d681SAndroid Build Coastguard Worker;
1098*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_hadd_pd:
1099*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1100*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
1101*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1102*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x double> %a, i32 0
1103*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x double> %a, i32 1
1104*9880d681SAndroid Build Coastguard Worker  %add = fadd double %vecext, %vecext1
1105*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x double> undef, double %add, i32 0
1106*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x double> %b, i32 0
1107*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x double> %b, i32 1
1108*9880d681SAndroid Build Coastguard Worker  %add4 = fadd double %vecext2, %vecext3
1109*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
1110*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x double> %a, i32 2
1111*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x double> %a, i32 3
1112*9880d681SAndroid Build Coastguard Worker  %add8 = fadd double %vecext6, %vecext7
1113*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
1114*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x double> %b, i32 2
1115*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x double> %b, i32 3
1116*9880d681SAndroid Build Coastguard Worker  %add12 = fadd double %vecext10, %vecext11
1117*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
1118*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %vecinit13
1119*9880d681SAndroid Build Coastguard Worker}
1120*9880d681SAndroid Build Coastguard Worker
1121*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_hsub_pd(<4 x double> %a, <4 x double> %b) {
1122*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_hsub_pd:
1123*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
1124*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm2, %xmm0
1125*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    hsubpd %xmm3, %xmm1
1126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
1127*9880d681SAndroid Build Coastguard Worker;
1128*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_hsub_pd:
1129*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
1130*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
1131*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
1132*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x double> %a, i32 0
1133*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x double> %a, i32 1
1134*9880d681SAndroid Build Coastguard Worker  %sub = fsub double %vecext, %vecext1
1135*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x double> undef, double %sub, i32 0
1136*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x double> %b, i32 0
1137*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x double> %b, i32 1
1138*9880d681SAndroid Build Coastguard Worker  %sub4 = fsub double %vecext2, %vecext3
1139*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1
1140*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x double> %a, i32 2
1141*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x double> %a, i32 3
1142*9880d681SAndroid Build Coastguard Worker  %sub8 = fsub double %vecext6, %vecext7
1143*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2
1144*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x double> %b, i32 2
1145*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x double> %b, i32 3
1146*9880d681SAndroid Build Coastguard Worker  %sub12 = fsub double %vecext10, %vecext11
1147*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3
1148*9880d681SAndroid Build Coastguard Worker  ret <4 x double> %vecinit13
1149*9880d681SAndroid Build Coastguard Worker}
1150*9880d681SAndroid Build Coastguard Worker
1151*9880d681SAndroid Build Coastguard Worker; Test AVX2 horizontal add of packed integer values from 256-bit vectors.
1152*9880d681SAndroid Build Coastguard Worker
1153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) {
1154*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_hadd_d:
1155*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
1156*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %ecx
1157*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3]
1158*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm4, %r8d
1159*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %ecx, %r8d
1160*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
1161*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm4, %edx
1162*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
1163*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r9d
1164*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %edx, %r9d
1165*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %esi
1166*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1167*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r10d
1168*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %r10d
1169*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1170*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
1171*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3]
1172*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edi
1173*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %edi
1174*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %eax
1175*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1176*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %r11d
1177*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r11d
1178*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1179*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
1180*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
1181*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %ecx
1182*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
1183*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm3, %eax
1184*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
1185*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %edx
1186*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %edx
1187*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1]
1188*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
1189*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3]
1190*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %esi
1191*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %esi
1192*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edi, %xmm0
1193*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r9d, %xmm1
1194*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1195*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r10d, %xmm2
1196*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r8d, %xmm0
1197*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1198*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1199*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %esi, %xmm1
1200*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm2
1201*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1202*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm3
1203*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r11d, %xmm1
1204*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1205*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1206*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
1207*9880d681SAndroid Build Coastguard Worker;
1208*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_hadd_d:
1209*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
1210*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm2, %xmm0
1211*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddd %xmm3, %xmm1
1212*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
1213*9880d681SAndroid Build Coastguard Worker;
1214*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_hadd_d:
1215*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1216*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1217*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1218*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm2, %xmm3, %xmm2
1219*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
1220*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1221*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1222*9880d681SAndroid Build Coastguard Worker;
1223*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_hadd_d:
1224*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1225*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
1226*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1227*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %a, i32 0
1228*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %a, i32 1
1229*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
1230*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
1231*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %a, i32 2
1232*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %a, i32 3
1233*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
1234*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
1235*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x i32> %b, i32 0
1236*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x i32> %b, i32 1
1237*9880d681SAndroid Build Coastguard Worker  %add8 = add i32 %vecext6, %vecext7
1238*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2
1239*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <8 x i32> %b, i32 2
1240*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <8 x i32> %b, i32 3
1241*9880d681SAndroid Build Coastguard Worker  %add12 = add i32 %vecext10, %vecext11
1242*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3
1243*9880d681SAndroid Build Coastguard Worker  %vecext14 = extractelement <8 x i32> %a, i32 4
1244*9880d681SAndroid Build Coastguard Worker  %vecext15 = extractelement <8 x i32> %a, i32 5
1245*9880d681SAndroid Build Coastguard Worker  %add16 = add i32 %vecext14, %vecext15
1246*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4
1247*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <8 x i32> %a, i32 6
1248*9880d681SAndroid Build Coastguard Worker  %vecext19 = extractelement <8 x i32> %a, i32 7
1249*9880d681SAndroid Build Coastguard Worker  %add20 = add i32 %vecext18, %vecext19
1250*9880d681SAndroid Build Coastguard Worker  %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5
1251*9880d681SAndroid Build Coastguard Worker  %vecext22 = extractelement <8 x i32> %b, i32 4
1252*9880d681SAndroid Build Coastguard Worker  %vecext23 = extractelement <8 x i32> %b, i32 5
1253*9880d681SAndroid Build Coastguard Worker  %add24 = add i32 %vecext22, %vecext23
1254*9880d681SAndroid Build Coastguard Worker  %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6
1255*9880d681SAndroid Build Coastguard Worker  %vecext26 = extractelement <8 x i32> %b, i32 6
1256*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <8 x i32> %b, i32 7
1257*9880d681SAndroid Build Coastguard Worker  %add28 = add i32 %vecext26, %vecext27
1258*9880d681SAndroid Build Coastguard Worker  %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7
1259*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit29
1260*9880d681SAndroid Build Coastguard Worker}
1261*9880d681SAndroid Build Coastguard Worker
1262*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) {
1263*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_hadd_w:
1264*9880d681SAndroid Build Coastguard Worker; SSE3:       # BB#0:
1265*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %rbp
1266*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp12:
1267*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 16
1268*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r15
1269*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp13:
1270*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 24
1271*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r14
1272*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp14:
1273*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 32
1274*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r13
1275*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp15:
1276*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 40
1277*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %r12
1278*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp16:
1279*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 48
1280*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pushq %rbx
1281*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp17:
1282*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_def_cfa_offset 56
1283*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp18:
1284*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %rbx, -56
1285*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp19:
1286*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r12, -48
1287*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp20:
1288*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r13, -40
1289*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp21:
1290*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r14, -32
1291*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp22:
1292*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %r15, -24
1293*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:  .Ltmp23:
1294*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    .cfi_offset %rbp, -16
1295*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm0, %eax
1296*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm0, %ecx
1297*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
1298*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
1299*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm0, %eax
1300*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm0, %r15d
1301*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r15d
1302*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm0, %eax
1303*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm0, %r14d
1304*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r14d
1305*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm0, %eax
1306*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm0, %r13d
1307*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r13d
1308*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm1, %eax
1309*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm1, %ecx
1310*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
1311*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill
1312*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm1, %eax
1313*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm1, %r11d
1314*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r11d
1315*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm1, %eax
1316*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm1, %r10d
1317*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r10d
1318*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm1, %eax
1319*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm1, %r12d
1320*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %r12d
1321*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm2, %eax
1322*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm2, %ebx
1323*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ebx
1324*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm2, %eax
1325*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm2, %ecx
1326*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ecx
1327*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm2, %esi
1328*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm2, %r8d
1329*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %r8d
1330*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm2, %esi
1331*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm2, %edx
1332*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %edx
1333*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %xmm3, %edi
1334*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $1, %xmm3, %r9d
1335*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %edi, %r9d
1336*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $2, %xmm3, %ebp
1337*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $3, %xmm3, %edi
1338*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %ebp, %edi
1339*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $4, %xmm3, %eax
1340*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $5, %xmm3, %ebp
1341*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %eax, %ebp
1342*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $6, %xmm3, %esi
1343*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    pextrw $7, %xmm3, %eax
1344*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    addl %esi, %eax
1345*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edx, %xmm8
1346*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r13d, %xmm3
1347*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ecx, %xmm9
1348*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r15d, %xmm4
1349*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r8d, %xmm10
1350*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r14d, %xmm7
1351*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ebx, %xmm11
1352*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload
1353*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    # xmm0 = mem[0],zero,zero,zero
1354*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %eax, %xmm12
1355*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r12d, %xmm6
1356*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %edi, %xmm13
1357*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r11d, %xmm5
1358*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %ebp, %xmm14
1359*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r10d, %xmm2
1360*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd %r9d, %xmm15
1361*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload
1362*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    # xmm1 = mem[0],zero,zero,zero
1363*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
1364*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3]
1365*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1366*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3]
1367*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3]
1368*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3]
1369*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
1370*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3]
1371*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3]
1372*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
1373*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3]
1374*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3]
1375*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1376*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
1377*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %rbx
1378*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r12
1379*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r13
1380*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r14
1381*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %r15
1382*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    popq %rbp
1383*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT:    retq
1384*9880d681SAndroid Build Coastguard Worker;
1385*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_hadd_w:
1386*9880d681SAndroid Build Coastguard Worker; SSSE3:       # BB#0:
1387*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddw %xmm2, %xmm0
1388*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    phaddw %xmm3, %xmm1
1389*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT:    retq
1390*9880d681SAndroid Build Coastguard Worker;
1391*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_hadd_w:
1392*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
1393*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
1394*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
1395*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddw %xmm2, %xmm3, %xmm2
1396*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
1397*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
1398*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
1399*9880d681SAndroid Build Coastguard Worker;
1400*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_hadd_w:
1401*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
1402*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddw %ymm1, %ymm0, %ymm0
1403*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
1404*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <16 x i16> %a, i32 0
1405*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <16 x i16> %a, i32 1
1406*9880d681SAndroid Build Coastguard Worker  %add = add i16 %vecext, %vecext1
1407*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0
1408*9880d681SAndroid Build Coastguard Worker  %vecext4 = extractelement <16 x i16> %a, i32 2
1409*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <16 x i16> %a, i32 3
1410*9880d681SAndroid Build Coastguard Worker  %add8 = add i16 %vecext4, %vecext6
1411*9880d681SAndroid Build Coastguard Worker  %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1
1412*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <16 x i16> %a, i32 4
1413*9880d681SAndroid Build Coastguard Worker  %vecext13 = extractelement <16 x i16> %a, i32 5
1414*9880d681SAndroid Build Coastguard Worker  %add15 = add i16 %vecext11, %vecext13
1415*9880d681SAndroid Build Coastguard Worker  %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2
1416*9880d681SAndroid Build Coastguard Worker  %vecext18 = extractelement <16 x i16> %a, i32 6
1417*9880d681SAndroid Build Coastguard Worker  %vecext20 = extractelement <16 x i16> %a, i32 7
1418*9880d681SAndroid Build Coastguard Worker  %add22 = add i16 %vecext18, %vecext20
1419*9880d681SAndroid Build Coastguard Worker  %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3
1420*9880d681SAndroid Build Coastguard Worker  %vecext25 = extractelement <16 x i16> %a, i32 8
1421*9880d681SAndroid Build Coastguard Worker  %vecext27 = extractelement <16 x i16> %a, i32 9
1422*9880d681SAndroid Build Coastguard Worker  %add29 = add i16 %vecext25, %vecext27
1423*9880d681SAndroid Build Coastguard Worker  %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 8
1424*9880d681SAndroid Build Coastguard Worker  %vecext32 = extractelement <16 x i16> %a, i32 10
1425*9880d681SAndroid Build Coastguard Worker  %vecext34 = extractelement <16 x i16> %a, i32 11
1426*9880d681SAndroid Build Coastguard Worker  %add36 = add i16 %vecext32, %vecext34
1427*9880d681SAndroid Build Coastguard Worker  %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 9
1428*9880d681SAndroid Build Coastguard Worker  %vecext39 = extractelement <16 x i16> %a, i32 12
1429*9880d681SAndroid Build Coastguard Worker  %vecext41 = extractelement <16 x i16> %a, i32 13
1430*9880d681SAndroid Build Coastguard Worker  %add43 = add i16 %vecext39, %vecext41
1431*9880d681SAndroid Build Coastguard Worker  %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 10
1432*9880d681SAndroid Build Coastguard Worker  %vecext46 = extractelement <16 x i16> %a, i32 14
1433*9880d681SAndroid Build Coastguard Worker  %vecext48 = extractelement <16 x i16> %a, i32 15
1434*9880d681SAndroid Build Coastguard Worker  %add50 = add i16 %vecext46, %vecext48
1435*9880d681SAndroid Build Coastguard Worker  %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 11
1436*9880d681SAndroid Build Coastguard Worker  %vecext53 = extractelement <16 x i16> %b, i32 0
1437*9880d681SAndroid Build Coastguard Worker  %vecext55 = extractelement <16 x i16> %b, i32 1
1438*9880d681SAndroid Build Coastguard Worker  %add57 = add i16 %vecext53, %vecext55
1439*9880d681SAndroid Build Coastguard Worker  %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 4
1440*9880d681SAndroid Build Coastguard Worker  %vecext60 = extractelement <16 x i16> %b, i32 2
1441*9880d681SAndroid Build Coastguard Worker  %vecext62 = extractelement <16 x i16> %b, i32 3
1442*9880d681SAndroid Build Coastguard Worker  %add64 = add i16 %vecext60, %vecext62
1443*9880d681SAndroid Build Coastguard Worker  %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 5
1444*9880d681SAndroid Build Coastguard Worker  %vecext67 = extractelement <16 x i16> %b, i32 4
1445*9880d681SAndroid Build Coastguard Worker  %vecext69 = extractelement <16 x i16> %b, i32 5
1446*9880d681SAndroid Build Coastguard Worker  %add71 = add i16 %vecext67, %vecext69
1447*9880d681SAndroid Build Coastguard Worker  %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 6
1448*9880d681SAndroid Build Coastguard Worker  %vecext74 = extractelement <16 x i16> %b, i32 6
1449*9880d681SAndroid Build Coastguard Worker  %vecext76 = extractelement <16 x i16> %b, i32 7
1450*9880d681SAndroid Build Coastguard Worker  %add78 = add i16 %vecext74, %vecext76
1451*9880d681SAndroid Build Coastguard Worker  %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 7
1452*9880d681SAndroid Build Coastguard Worker  %vecext81 = extractelement <16 x i16> %b, i32 8
1453*9880d681SAndroid Build Coastguard Worker  %vecext83 = extractelement <16 x i16> %b, i32 9
1454*9880d681SAndroid Build Coastguard Worker  %add85 = add i16 %vecext81, %vecext83
1455*9880d681SAndroid Build Coastguard Worker  %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12
1456*9880d681SAndroid Build Coastguard Worker  %vecext88 = extractelement <16 x i16> %b, i32 10
1457*9880d681SAndroid Build Coastguard Worker  %vecext90 = extractelement <16 x i16> %b, i32 11
1458*9880d681SAndroid Build Coastguard Worker  %add92 = add i16 %vecext88, %vecext90
1459*9880d681SAndroid Build Coastguard Worker  %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13
1460*9880d681SAndroid Build Coastguard Worker  %vecext95 = extractelement <16 x i16> %b, i32 12
1461*9880d681SAndroid Build Coastguard Worker  %vecext97 = extractelement <16 x i16> %b, i32 13
1462*9880d681SAndroid Build Coastguard Worker  %add99 = add i16 %vecext95, %vecext97
1463*9880d681SAndroid Build Coastguard Worker  %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14
1464*9880d681SAndroid Build Coastguard Worker  %vecext102 = extractelement <16 x i16> %b, i32 14
1465*9880d681SAndroid Build Coastguard Worker  %vecext104 = extractelement <16 x i16> %b, i32 15
1466*9880d681SAndroid Build Coastguard Worker  %add106 = add i16 %vecext102, %vecext104
1467*9880d681SAndroid Build Coastguard Worker  %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15
1468*9880d681SAndroid Build Coastguard Worker  ret <16 x i16> %vecinit108
1469*9880d681SAndroid Build Coastguard Worker}
1470