xref: /aosp_15_r20/external/llvm/test/CodeGen/X86/haddsub-undef.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE
3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
5*9880d681SAndroid Build Coastguard Worker
6*9880d681SAndroid Build Coastguard Worker; Verify that we correctly fold horizontal binop even in the presence of UNDEFs.
7*9880d681SAndroid Build Coastguard Worker
8*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) {
9*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test1_undef:
10*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
12*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
13*9880d681SAndroid Build Coastguard Worker;
14*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test1_undef:
15*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
17*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
18*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
19*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
20*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
21*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
22*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %a, i32 2
23*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %a, i32 3
24*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
25*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
26*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %b, i32 2
27*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %b, i32 3
28*9880d681SAndroid Build Coastguard Worker  %add12 = fadd float %vecext10, %vecext11
29*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3
30*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
31*9880d681SAndroid Build Coastguard Worker}
32*9880d681SAndroid Build Coastguard Worker
33*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) {
34*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test2_undef:
35*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
36*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
37*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
38*9880d681SAndroid Build Coastguard Worker;
39*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test2_undef:
40*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
41*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
42*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
43*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
44*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
45*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
46*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
47*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %b, i32 0
48*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %b, i32 1
49*9880d681SAndroid Build Coastguard Worker  %add8 = fadd float %vecext6, %vecext7
50*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2
51*9880d681SAndroid Build Coastguard Worker  %vecext10 = extractelement <4 x float> %b, i32 2
52*9880d681SAndroid Build Coastguard Worker  %vecext11 = extractelement <4 x float> %b, i32 3
53*9880d681SAndroid Build Coastguard Worker  %add12 = fadd float %vecext10, %vecext11
54*9880d681SAndroid Build Coastguard Worker  %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
55*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit13
56*9880d681SAndroid Build Coastguard Worker}
57*9880d681SAndroid Build Coastguard Worker
58*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) {
59*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test3_undef:
60*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
61*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
62*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
63*9880d681SAndroid Build Coastguard Worker;
64*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test3_undef:
65*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
66*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
67*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
68*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
69*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
70*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
71*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
72*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %a, i32 2
73*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %a, i32 3
74*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
75*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
76*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <4 x float> %b, i32 0
77*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <4 x float> %b, i32 1
78*9880d681SAndroid Build Coastguard Worker  %add8 = fadd float %vecext6, %vecext7
79*9880d681SAndroid Build Coastguard Worker  %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
80*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit9
81*9880d681SAndroid Build Coastguard Worker}
82*9880d681SAndroid Build Coastguard Worker
83*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) {
84*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test4_undef:
85*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
86*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
87*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addss %xmm1, %xmm0
88*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
89*9880d681SAndroid Build Coastguard Worker;
90*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test4_undef:
91*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
92*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
93*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
94*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
95*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
96*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
97*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
98*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
99*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit
100*9880d681SAndroid Build Coastguard Worker}
101*9880d681SAndroid Build Coastguard Worker
102*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
103*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test5_undef:
104*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
105*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm0, %xmm1
106*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addsd %xmm0, %xmm1
108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movapd %xmm1, %xmm0
109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
110*9880d681SAndroid Build Coastguard Worker;
111*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test5_undef:
112*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
113*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
114*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
115*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
116*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <2 x double> %a, i32 0
117*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <2 x double> %a, i32 1
118*9880d681SAndroid Build Coastguard Worker  %add = fadd double %vecext, %vecext1
119*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <2 x double> undef, double %add, i32 0
120*9880d681SAndroid Build Coastguard Worker  ret <2 x double> %vecinit
121*9880d681SAndroid Build Coastguard Worker}
122*9880d681SAndroid Build Coastguard Worker
123*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) {
124*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test6_undef:
125*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm0, %xmm0
127*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
128*9880d681SAndroid Build Coastguard Worker;
129*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test6_undef:
130*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
131*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
132*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
133*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
134*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
135*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
136*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
137*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %a, i32 2
138*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %a, i32 3
139*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
140*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
141*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
142*9880d681SAndroid Build Coastguard Worker}
143*9880d681SAndroid Build Coastguard Worker
144*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) {
145*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test7_undef:
146*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
147*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
148*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
149*9880d681SAndroid Build Coastguard Worker;
150*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test7_undef:
151*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
152*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
153*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
154*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %b, i32 0
155*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %b, i32 1
156*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
157*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 2
158*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %b, i32 2
159*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %b, i32 3
160*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
161*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
162*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
163*9880d681SAndroid Build Coastguard Worker}
164*9880d681SAndroid Build Coastguard Worker
165*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) {
166*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test8_undef:
167*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
168*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
169*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addss %xmm0, %xmm1
170*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm0, %xmm2
171*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
172*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
173*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addss %xmm2, %xmm0
174*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
175*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
176*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movaps %xmm1, %xmm0
177*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
178*9880d681SAndroid Build Coastguard Worker;
179*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test8_undef:
180*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
181*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
182*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
183*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
184*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
185*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vaddss %xmm0, %xmm2, %xmm0
186*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
187*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
188*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
189*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
190*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
191*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
192*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %a, i32 2
193*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %a, i32 3
194*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
195*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2
196*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
197*9880d681SAndroid Build Coastguard Worker}
198*9880d681SAndroid Build Coastguard Worker
199*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) {
200*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test9_undef:
201*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
202*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
203*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
204*9880d681SAndroid Build Coastguard Worker;
205*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test9_undef:
206*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
207*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
208*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
209*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <4 x float> %a, i32 0
210*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <4 x float> %a, i32 1
211*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
212*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <4 x float> undef, float %add, i32 0
213*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <4 x float> %b, i32 2
214*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <4 x float> %b, i32 3
215*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
216*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3
217*9880d681SAndroid Build Coastguard Worker  ret <4 x float> %vecinit5
218*9880d681SAndroid Build Coastguard Worker}
219*9880d681SAndroid Build Coastguard Worker
220*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) {
221*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test10_undef:
222*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
223*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm2, %xmm0
224*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
225*9880d681SAndroid Build Coastguard Worker;
226*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test10_undef:
227*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
228*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
229*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
230*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
231*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
232*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
233*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x float> undef, float %add, i32 0
234*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %b, i32 2
235*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %b, i32 3
236*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
237*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3
238*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit5
239*9880d681SAndroid Build Coastguard Worker}
240*9880d681SAndroid Build Coastguard Worker
241*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) {
242*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test11_undef:
243*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
244*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
245*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addss %xmm1, %xmm0
246*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
247*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addss %xmm3, %xmm1
248*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm1[0,0]
249*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
250*9880d681SAndroid Build Coastguard Worker;
251*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test11_undef:
252*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
253*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
254*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
255*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
256*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
257*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
258*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x float> undef, float %add, i32 0
259*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %b, i32 4
260*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %b, i32 5
261*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
262*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6
263*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit5
264*9880d681SAndroid Build Coastguard Worker}
265*9880d681SAndroid Build Coastguard Worker
266*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) {
267*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test12_undef:
268*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm0, %xmm0
270*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
271*9880d681SAndroid Build Coastguard Worker;
272*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test12_undef:
273*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
274*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %ymm0, %ymm0, %ymm0
275*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
276*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
277*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
278*9880d681SAndroid Build Coastguard Worker  %add = fadd float %vecext, %vecext1
279*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x float> undef, float %add, i32 0
280*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %a, i32 2
281*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %a, i32 3
282*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext2, %vecext3
283*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1
284*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit5
285*9880d681SAndroid Build Coastguard Worker}
286*9880d681SAndroid Build Coastguard Worker
287*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) {
288*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test13_undef:
289*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
290*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    haddps %xmm1, %xmm0
291*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
292*9880d681SAndroid Build Coastguard Worker;
293*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test13_undef:
294*9880d681SAndroid Build Coastguard Worker; AVX:       # BB#0:
295*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
296*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
297*9880d681SAndroid Build Coastguard Worker; AVX-NEXT:    retq
298*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x float> %a, i32 0
299*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x float> %a, i32 1
300*9880d681SAndroid Build Coastguard Worker  %add1 = fadd float %vecext, %vecext1
301*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0
302*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x float> %a, i32 2
303*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x float> %a, i32 3
304*9880d681SAndroid Build Coastguard Worker  %add2 = fadd float %vecext2, %vecext3
305*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1
306*9880d681SAndroid Build Coastguard Worker  %vecext4 = extractelement <8 x float> %a, i32 4
307*9880d681SAndroid Build Coastguard Worker  %vecext5 = extractelement <8 x float> %a, i32 5
308*9880d681SAndroid Build Coastguard Worker  %add3 = fadd float %vecext4, %vecext5
309*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2
310*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x float> %a, i32 6
311*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x float> %a, i32 7
312*9880d681SAndroid Build Coastguard Worker  %add4 = fadd float %vecext6, %vecext7
313*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3
314*9880d681SAndroid Build Coastguard Worker  ret <8 x float> %vecinit4
315*9880d681SAndroid Build Coastguard Worker}
316*9880d681SAndroid Build Coastguard Worker
317*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) {
318*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test14_undef:
319*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
320*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    phaddd %xmm2, %xmm0
321*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
322*9880d681SAndroid Build Coastguard Worker;
323*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test14_undef:
324*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
325*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
326*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
327*9880d681SAndroid Build Coastguard Worker;
328*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test14_undef:
329*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
330*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
331*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
332*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %a, i32 0
333*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %a, i32 1
334*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
335*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
336*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %b, i32 2
337*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %b, i32 3
338*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
339*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3
340*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit5
341*9880d681SAndroid Build Coastguard Worker}
342*9880d681SAndroid Build Coastguard Worker
343*9880d681SAndroid Build Coastguard Worker; On AVX2, the following sequence can be folded into a single horizontal add.
344*9880d681SAndroid Build Coastguard Worker; If the Subtarget doesn't support AVX2, then we avoid emitting two packed
345*9880d681SAndroid Build Coastguard Worker; integer horizontal adds instead of two scalar adds followed by vector inserts.
346*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) {
347*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test15_undef:
348*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
349*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %eax
350*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
351*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %ecx
352*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addl %eax, %ecx
353*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm3, %eax
354*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
355*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %xmm0, %edx
356*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    addl %eax, %edx
357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %ecx, %xmm0
358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    movd %edx, %xmm1
359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
360*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
361*9880d681SAndroid Build Coastguard Worker;
362*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test15_undef:
363*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
364*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %xmm0, %eax
365*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrd $1, %xmm0, %ecx
366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addl %eax, %ecx
367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %xmm0, %eax
369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpextrd $1, %xmm0, %edx
370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    addl %eax, %edx
371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %ecx, %xmm0
372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vmovd %edx, %xmm1
373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
376*9880d681SAndroid Build Coastguard Worker;
377*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test15_undef:
378*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
379*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
380*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
381*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %a, i32 0
382*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %a, i32 1
383*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
384*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
385*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %b, i32 4
386*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %b, i32 5
387*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
388*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6
389*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit5
390*9880d681SAndroid Build Coastguard Worker}
391*9880d681SAndroid Build Coastguard Worker
392*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) {
393*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test16_undef:
394*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
395*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    phaddd %xmm0, %xmm0
396*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
397*9880d681SAndroid Build Coastguard Worker;
398*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test16_undef:
399*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
400*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm0, %xmm0, %xmm0
401*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
402*9880d681SAndroid Build Coastguard Worker;
403*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test16_undef:
404*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
405*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %ymm0, %ymm0, %ymm0
406*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
407*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %a, i32 0
408*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %a, i32 1
409*9880d681SAndroid Build Coastguard Worker  %add = add i32 %vecext, %vecext1
410*9880d681SAndroid Build Coastguard Worker  %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0
411*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %a, i32 2
412*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %a, i32 3
413*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext2, %vecext3
414*9880d681SAndroid Build Coastguard Worker  %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1
415*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit5
416*9880d681SAndroid Build Coastguard Worker}
417*9880d681SAndroid Build Coastguard Worker
418*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) {
419*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test17_undef:
420*9880d681SAndroid Build Coastguard Worker; SSE:       # BB#0:
421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    phaddd %xmm1, %xmm0
422*9880d681SAndroid Build Coastguard Worker; SSE-NEXT:    retq
423*9880d681SAndroid Build Coastguard Worker;
424*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test17_undef:
425*9880d681SAndroid Build Coastguard Worker; AVX1:       # BB#0:
426*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
427*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
428*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT:    retq
429*9880d681SAndroid Build Coastguard Worker;
430*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test17_undef:
431*9880d681SAndroid Build Coastguard Worker; AVX2:       # BB#0:
432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
433*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
434*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT:    retq
435*9880d681SAndroid Build Coastguard Worker  %vecext = extractelement <8 x i32> %a, i32 0
436*9880d681SAndroid Build Coastguard Worker  %vecext1 = extractelement <8 x i32> %a, i32 1
437*9880d681SAndroid Build Coastguard Worker  %add1 = add i32 %vecext, %vecext1
438*9880d681SAndroid Build Coastguard Worker  %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0
439*9880d681SAndroid Build Coastguard Worker  %vecext2 = extractelement <8 x i32> %a, i32 2
440*9880d681SAndroid Build Coastguard Worker  %vecext3 = extractelement <8 x i32> %a, i32 3
441*9880d681SAndroid Build Coastguard Worker  %add2 = add i32 %vecext2, %vecext3
442*9880d681SAndroid Build Coastguard Worker  %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1
443*9880d681SAndroid Build Coastguard Worker  %vecext4 = extractelement <8 x i32> %a, i32 4
444*9880d681SAndroid Build Coastguard Worker  %vecext5 = extractelement <8 x i32> %a, i32 5
445*9880d681SAndroid Build Coastguard Worker  %add3 = add i32 %vecext4, %vecext5
446*9880d681SAndroid Build Coastguard Worker  %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2
447*9880d681SAndroid Build Coastguard Worker  %vecext6 = extractelement <8 x i32> %a, i32 6
448*9880d681SAndroid Build Coastguard Worker  %vecext7 = extractelement <8 x i32> %a, i32 7
449*9880d681SAndroid Build Coastguard Worker  %add4 = add i32 %vecext6, %vecext7
450*9880d681SAndroid Build Coastguard Worker  %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3
451*9880d681SAndroid Build Coastguard Worker  ret <8 x i32> %vecinit4
452*9880d681SAndroid Build Coastguard Worker}
453