1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker; Verify that we correctly fold horizontal binop even in the presence of UNDEFs. 7*9880d681SAndroid Build Coastguard Worker 8*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test1_undef(<4 x float> %a, <4 x float> %b) { 9*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test1_undef: 10*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 12*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 13*9880d681SAndroid Build Coastguard Worker; 14*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test1_undef: 15*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 17*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 18*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 19*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 20*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 21*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 22*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %a, i32 2 23*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %a, i32 3 24*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 25*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 26*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %b, i32 2 27*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %b, i32 3 28*9880d681SAndroid Build Coastguard Worker %add12 = fadd float %vecext10, %vecext11 29*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit5, float %add12, i32 3 30*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 31*9880d681SAndroid Build Coastguard Worker} 32*9880d681SAndroid Build Coastguard Worker 33*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test2_undef(<4 x float> %a, <4 x float> %b) { 34*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test2_undef: 35*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 36*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 37*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 38*9880d681SAndroid Build Coastguard Worker; 39*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test2_undef: 40*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 41*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 42*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 43*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 44*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 45*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 46*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 47*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %b, i32 0 48*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %b, i32 1 49*9880d681SAndroid Build Coastguard Worker %add8 = fadd float %vecext6, %vecext7 50*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit, float %add8, i32 2 51*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %b, i32 2 52*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %b, i32 3 53*9880d681SAndroid Build Coastguard Worker %add12 = fadd float %vecext10, %vecext11 54*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 55*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 56*9880d681SAndroid Build Coastguard Worker} 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test3_undef(<4 x float> %a, <4 x float> %b) { 59*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test3_undef: 60*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 61*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 62*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 63*9880d681SAndroid Build Coastguard Worker; 64*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test3_undef: 65*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 66*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 67*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 68*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 69*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 70*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 71*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 72*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %a, i32 2 73*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %a, i32 3 74*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 75*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 76*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %b, i32 0 77*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %b, i32 1 78*9880d681SAndroid Build Coastguard Worker %add8 = fadd float %vecext6, %vecext7 79*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 80*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit9 81*9880d681SAndroid Build Coastguard Worker} 82*9880d681SAndroid Build Coastguard Worker 83*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) { 84*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test4_undef: 85*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 86*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 87*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss %xmm1, %xmm0 88*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 89*9880d681SAndroid Build Coastguard Worker; 90*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test4_undef: 91*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 92*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 93*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 94*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 95*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 96*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 97*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 98*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 99*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit 100*9880d681SAndroid Build Coastguard Worker} 101*9880d681SAndroid Build Coastguard Worker 102*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { 103*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test5_undef: 104*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 105*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm0, %xmm1 106*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 107*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addsd %xmm0, %xmm1 108*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm1, %xmm0 109*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 110*9880d681SAndroid Build Coastguard Worker; 111*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test5_undef: 112*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 113*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 114*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 115*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %a, i32 0 117*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %a, i32 1 118*9880d681SAndroid Build Coastguard Worker %add = fadd double %vecext, %vecext1 119*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %add, i32 0 120*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit 121*9880d681SAndroid Build Coastguard Worker} 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test6_undef(<4 x float> %a, <4 x float> %b) { 124*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test6_undef: 125*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm0, %xmm0 127*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 128*9880d681SAndroid Build Coastguard Worker; 129*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test6_undef: 130*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 131*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0 132*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 133*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 134*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 135*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 136*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 137*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %a, i32 2 138*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %a, i32 3 139*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 140*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 141*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 142*9880d681SAndroid Build Coastguard Worker} 143*9880d681SAndroid Build Coastguard Worker 144*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test7_undef(<4 x float> %a, <4 x float> %b) { 145*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test7_undef: 146*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 147*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 148*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 149*9880d681SAndroid Build Coastguard Worker; 150*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test7_undef: 151*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 152*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 153*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 154*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %b, i32 0 155*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %b, i32 1 156*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 157*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 2 158*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %b, i32 2 159*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %b, i32 3 160*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 161*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3 162*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 163*9880d681SAndroid Build Coastguard Worker} 164*9880d681SAndroid Build Coastguard Worker 165*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { 166*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test8_undef: 167*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 168*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 169*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss %xmm0, %xmm1 170*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm0, %xmm2 171*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 172*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 173*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss %xmm2, %xmm0 174*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 175*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] 176*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm1, %xmm0 177*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 178*9880d681SAndroid Build Coastguard Worker; 179*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test8_undef: 180*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 181*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 182*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm1 183*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 184*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 185*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vaddss %xmm0, %xmm2, %xmm0 186*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 187*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 188*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 189*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 190*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 191*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 192*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %a, i32 2 193*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %a, i32 3 194*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 195*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 2 196*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 197*9880d681SAndroid Build Coastguard Worker} 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test9_undef(<4 x float> %a, <4 x float> %b) { 200*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test9_undef: 201*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 202*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 203*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 204*9880d681SAndroid Build Coastguard Worker; 205*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test9_undef: 206*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 207*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 208*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 209*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %a, i32 0 210*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %a, i32 1 211*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 212*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 213*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %b, i32 2 214*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %b, i32 3 215*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 216*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 3 217*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit5 218*9880d681SAndroid Build Coastguard Worker} 219*9880d681SAndroid Build Coastguard Worker 220*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test10_undef(<8 x float> %a, <8 x float> %b) { 221*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test10_undef: 222*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 223*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm2, %xmm0 224*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 225*9880d681SAndroid Build Coastguard Worker; 226*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test10_undef: 227*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 228*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 229*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 230*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 231*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 232*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 233*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x float> undef, float %add, i32 0 234*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %b, i32 2 235*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %b, i32 3 236*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 237*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 3 238*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit5 239*9880d681SAndroid Build Coastguard Worker} 240*9880d681SAndroid Build Coastguard Worker 241*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test11_undef(<8 x float> %a, <8 x float> %b) { 242*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test11_undef: 243*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 244*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 245*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss %xmm1, %xmm0 246*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] 247*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addss %xmm3, %xmm1 248*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] 249*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 250*9880d681SAndroid Build Coastguard Worker; 251*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test11_undef: 252*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 253*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 254*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 255*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 256*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 257*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 258*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x float> undef, float %add, i32 0 259*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %b, i32 4 260*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %b, i32 5 261*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 262*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 6 263*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit5 264*9880d681SAndroid Build Coastguard Worker} 265*9880d681SAndroid Build Coastguard Worker 266*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test12_undef(<8 x float> %a, <8 x float> %b) { 267*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test12_undef: 268*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 269*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm0, %xmm0 270*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 271*9880d681SAndroid Build Coastguard Worker; 272*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test12_undef: 273*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 274*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %ymm0, %ymm0, %ymm0 275*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 276*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 277*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 278*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 279*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x float> undef, float %add, i32 0 280*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %a, i32 2 281*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %a, i32 3 282*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 283*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1 284*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit5 285*9880d681SAndroid Build Coastguard Worker} 286*9880d681SAndroid Build Coastguard Worker 287*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test13_undef(<8 x float> %a, <8 x float> %b) { 288*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test13_undef: 289*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 290*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 291*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 292*9880d681SAndroid Build Coastguard Worker; 293*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: test13_undef: 294*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 295*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 296*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 297*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 298*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 299*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 300*9880d681SAndroid Build Coastguard Worker %add1 = fadd float %vecext, %vecext1 301*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <8 x float> undef, float %add1, i32 0 302*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %a, i32 2 303*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %a, i32 3 304*9880d681SAndroid Build Coastguard Worker %add2 = fadd float %vecext2, %vecext3 305*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <8 x float> %vecinit1, float %add2, i32 1 306*9880d681SAndroid Build Coastguard Worker %vecext4 = extractelement <8 x float> %a, i32 4 307*9880d681SAndroid Build Coastguard Worker %vecext5 = extractelement <8 x float> %a, i32 5 308*9880d681SAndroid Build Coastguard Worker %add3 = fadd float %vecext4, %vecext5 309*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <8 x float> %vecinit2, float %add3, i32 2 310*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x float> %a, i32 6 311*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x float> %a, i32 7 312*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext6, %vecext7 313*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <8 x float> %vecinit3, float %add4, i32 3 314*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit4 315*9880d681SAndroid Build Coastguard Worker} 316*9880d681SAndroid Build Coastguard Worker 317*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test14_undef(<8 x i32> %a, <8 x i32> %b) { 318*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test14_undef: 319*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 320*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: phaddd %xmm2, %xmm0 321*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 322*9880d681SAndroid Build Coastguard Worker; 323*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test14_undef: 324*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 325*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 326*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 327*9880d681SAndroid Build Coastguard Worker; 328*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test14_undef: 329*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 330*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 331*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 332*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %a, i32 0 333*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %a, i32 1 334*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 335*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 336*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %b, i32 2 337*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %b, i32 3 338*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 339*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 3 340*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit5 341*9880d681SAndroid Build Coastguard Worker} 342*9880d681SAndroid Build Coastguard Worker 343*9880d681SAndroid Build Coastguard Worker; On AVX2, the following sequence can be folded into a single horizontal add. 344*9880d681SAndroid Build Coastguard Worker; If the Subtarget doesn't support AVX2, then we avoid emitting two packed 345*9880d681SAndroid Build Coastguard Worker; integer horizontal adds instead of two scalar adds followed by vector inserts. 346*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test15_undef(<8 x i32> %a, <8 x i32> %b) { 347*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test15_undef: 348*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 349*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %eax 350*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 351*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %ecx 352*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addl %eax, %ecx 353*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm3, %eax 354*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 355*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %edx 356*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: addl %eax, %edx 357*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %ecx, %xmm0 358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %edx, %xmm1 359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 360*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 361*9880d681SAndroid Build Coastguard Worker; 362*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test15_undef: 363*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 364*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovd %xmm0, %eax 365*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpextrd $1, %xmm0, %ecx 366*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: addl %eax, %ecx 367*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 368*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovd %xmm0, %eax 369*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpextrd $1, %xmm0, %edx 370*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: addl %eax, %edx 371*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovd %ecx, %xmm0 372*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vmovd %edx, %xmm1 373*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 374*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 375*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 376*9880d681SAndroid Build Coastguard Worker; 377*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test15_undef: 378*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 379*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 380*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 381*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %a, i32 0 382*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %a, i32 1 383*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 384*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 385*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %b, i32 4 386*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %b, i32 5 387*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 388*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 6 389*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit5 390*9880d681SAndroid Build Coastguard Worker} 391*9880d681SAndroid Build Coastguard Worker 392*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test16_undef(<8 x i32> %a, <8 x i32> %b) { 393*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test16_undef: 394*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 395*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: phaddd %xmm0, %xmm0 396*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 397*9880d681SAndroid Build Coastguard Worker; 398*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test16_undef: 399*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 400*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0 401*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 402*9880d681SAndroid Build Coastguard Worker; 403*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test16_undef: 404*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 405*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0 406*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 407*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %a, i32 0 408*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %a, i32 1 409*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 410*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 411*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %a, i32 2 412*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %a, i32 3 413*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 414*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1 415*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit5 416*9880d681SAndroid Build Coastguard Worker} 417*9880d681SAndroid Build Coastguard Worker 418*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @test17_undef(<8 x i32> %a, <8 x i32> %b) { 419*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: test17_undef: 420*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: phaddd %xmm1, %xmm0 422*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 423*9880d681SAndroid Build Coastguard Worker; 424*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: test17_undef: 425*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 426*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 427*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 428*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 429*9880d681SAndroid Build Coastguard Worker; 430*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test17_undef: 431*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 432*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 433*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 434*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 435*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %a, i32 0 436*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %a, i32 1 437*9880d681SAndroid Build Coastguard Worker %add1 = add i32 %vecext, %vecext1 438*9880d681SAndroid Build Coastguard Worker %vecinit1 = insertelement <8 x i32> undef, i32 %add1, i32 0 439*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %a, i32 2 440*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %a, i32 3 441*9880d681SAndroid Build Coastguard Worker %add2 = add i32 %vecext2, %vecext3 442*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <8 x i32> %vecinit1, i32 %add2, i32 1 443*9880d681SAndroid Build Coastguard Worker %vecext4 = extractelement <8 x i32> %a, i32 4 444*9880d681SAndroid Build Coastguard Worker %vecext5 = extractelement <8 x i32> %a, i32 5 445*9880d681SAndroid Build Coastguard Worker %add3 = add i32 %vecext4, %vecext5 446*9880d681SAndroid Build Coastguard Worker %vecinit3 = insertelement <8 x i32> %vecinit2, i32 %add3, i32 2 447*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x i32> %a, i32 6 448*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x i32> %a, i32 7 449*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext6, %vecext7 450*9880d681SAndroid Build Coastguard Worker %vecinit4 = insertelement <8 x i32> %vecinit3, i32 %add4, i32 3 451*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit4 452*9880d681SAndroid Build Coastguard Worker} 453