1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSE3 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse3,+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hadd_ps_test1(<4 x float> %A, <4 x float> %B) { 8*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_ps_test1: 9*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 10*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 11*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 12*9880d681SAndroid Build Coastguard Worker; 13*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_ps_test1: 14*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 15*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 16*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 17*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 18*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %A, i32 1 19*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 20*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 0 21*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %A, i32 2 22*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %A, i32 3 23*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 24*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1 25*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %B, i32 0 26*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %B, i32 1 27*9880d681SAndroid Build Coastguard Worker %add8 = fadd float %vecext6, %vecext7 28*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2 29*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %B, i32 2 30*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %B, i32 3 31*9880d681SAndroid Build Coastguard Worker %add12 = fadd float %vecext10, %vecext11 32*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3 33*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 34*9880d681SAndroid Build Coastguard Worker} 35*9880d681SAndroid Build Coastguard Worker 36*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hadd_ps_test2(<4 x float> %A, <4 x float> %B) { 37*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_ps_test2: 38*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 39*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm1, %xmm0 40*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 41*9880d681SAndroid Build Coastguard Worker; 42*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_ps_test2: 43*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 44*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 45*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 46*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 2 47*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %A, i32 3 48*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 49*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %add, i32 1 50*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %A, i32 0 51*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %A, i32 1 52*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 53*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 0 54*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %B, i32 2 55*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %B, i32 3 56*9880d681SAndroid Build Coastguard Worker %add8 = fadd float %vecext6, %vecext7 57*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 3 58*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %B, i32 0 59*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %B, i32 1 60*9880d681SAndroid Build Coastguard Worker %add12 = fadd float %vecext10, %vecext11 61*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 2 62*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 63*9880d681SAndroid Build Coastguard Worker} 64*9880d681SAndroid Build Coastguard Worker 65*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hsub_ps_test1(<4 x float> %A, <4 x float> %B) { 66*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_ps_test1: 67*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 68*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubps %xmm1, %xmm0 69*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 70*9880d681SAndroid Build Coastguard Worker; 71*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_ps_test1: 72*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 73*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 74*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 75*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 0 76*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %A, i32 1 77*9880d681SAndroid Build Coastguard Worker %sub = fsub float %vecext, %vecext1 78*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %sub, i32 0 79*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %A, i32 2 80*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %A, i32 3 81*9880d681SAndroid Build Coastguard Worker %sub4 = fsub float %vecext2, %vecext3 82*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 1 83*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %B, i32 0 84*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %B, i32 1 85*9880d681SAndroid Build Coastguard Worker %sub8 = fsub float %vecext6, %vecext7 86*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 2 87*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %B, i32 2 88*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %B, i32 3 89*9880d681SAndroid Build Coastguard Worker %sub12 = fsub float %vecext10, %vecext11 90*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 3 91*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 92*9880d681SAndroid Build Coastguard Worker} 93*9880d681SAndroid Build Coastguard Worker 94*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @hsub_ps_test2(<4 x float> %A, <4 x float> %B) { 95*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_ps_test2: 96*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 97*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubps %xmm1, %xmm0 98*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 99*9880d681SAndroid Build Coastguard Worker; 100*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_ps_test2: 101*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 102*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 103*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 104*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 2 105*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %A, i32 3 106*9880d681SAndroid Build Coastguard Worker %sub = fsub float %vecext, %vecext1 107*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %sub, i32 1 108*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %A, i32 0 109*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %A, i32 1 110*9880d681SAndroid Build Coastguard Worker %sub4 = fsub float %vecext2, %vecext3 111*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0 112*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %B, i32 2 113*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %B, i32 3 114*9880d681SAndroid Build Coastguard Worker %sub8 = fsub float %vecext6, %vecext7 115*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3 116*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %B, i32 0 117*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %B, i32 1 118*9880d681SAndroid Build Coastguard Worker %sub12 = fsub float %vecext10, %vecext11 119*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2 120*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 121*9880d681SAndroid Build Coastguard Worker} 122*9880d681SAndroid Build Coastguard Worker 123*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phadd_d_test1(<4 x i32> %A, <4 x i32> %B) { 124*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phadd_d_test1: 125*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 126*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 127*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 128*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %ecx 129*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 130*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 131*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 132*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 133*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 134*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edx 135*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %eax 136*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 137*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 138*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %esi 139*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 140*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 141*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 142*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edi 143*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edi 144*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edi, %xmm0 145*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm1 146*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 147*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm2 148*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm0 149*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 150*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 151*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 152*9880d681SAndroid Build Coastguard Worker; 153*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phadd_d_test1: 154*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 155*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm1, %xmm0 156*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 157*9880d681SAndroid Build Coastguard Worker; 158*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phadd_d_test1: 159*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 160*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 161*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 162*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %A, i32 0 163*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %A, i32 1 164*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 165*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %add, i32 0 166*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %A, i32 2 167*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %A, i32 3 168*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 169*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 1 170*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x i32> %B, i32 0 171*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x i32> %B, i32 1 172*9880d681SAndroid Build Coastguard Worker %add8 = add i32 %vecext6, %vecext7 173*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 2 174*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x i32> %B, i32 2 175*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x i32> %B, i32 3 176*9880d681SAndroid Build Coastguard Worker %add12 = add i32 %vecext10, %vecext11 177*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 3 178*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit13 179*9880d681SAndroid Build Coastguard Worker} 180*9880d681SAndroid Build Coastguard Worker 181*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phadd_d_test2(<4 x i32> %A, <4 x i32> %B) { 182*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phadd_d_test2: 183*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 184*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 185*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 186*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] 187*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %ecx 188*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 189*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 190*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 191*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 192*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edx 193*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 194*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 195*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 196*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 197*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %esi 198*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm0 199*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm2 200*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 201*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 202*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 203*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %ecx 204*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 205*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm1 206*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm0 207*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 208*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 209*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 210*9880d681SAndroid Build Coastguard Worker; 211*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phadd_d_test2: 212*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 213*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm1, %xmm0 214*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 215*9880d681SAndroid Build Coastguard Worker; 216*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phadd_d_test2: 217*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 218*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 219*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 220*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %A, i32 2 221*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %A, i32 3 222*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 223*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %add, i32 1 224*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %A, i32 0 225*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %A, i32 1 226*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 227*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit, i32 %add4, i32 0 228*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x i32> %B, i32 3 229*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x i32> %B, i32 2 230*9880d681SAndroid Build Coastguard Worker %add8 = add i32 %vecext6, %vecext7 231*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %add8, i32 3 232*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x i32> %B, i32 1 233*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x i32> %B, i32 0 234*9880d681SAndroid Build Coastguard Worker %add12 = add i32 %vecext10, %vecext11 235*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %add12, i32 2 236*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit13 237*9880d681SAndroid Build Coastguard Worker} 238*9880d681SAndroid Build Coastguard Worker 239*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phsub_d_test1(<4 x i32> %A, <4 x i32> %B) { 240*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phsub_d_test1: 241*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 242*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 243*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 244*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %ecx 245*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %ecx, %eax 246*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 247*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %ecx 248*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 249*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 250*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %edx, %ecx 251*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %edx 252*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 253*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 254*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %esi, %edx 255*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 256*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 257*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 258*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edi 259*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %edi, %esi 260*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm0 261*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm1 262*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 263*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm2 264*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %eax, %xmm0 265*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 266*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 267*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 268*9880d681SAndroid Build Coastguard Worker; 269*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phsub_d_test1: 270*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 271*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phsubd %xmm1, %xmm0 272*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 273*9880d681SAndroid Build Coastguard Worker; 274*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phsub_d_test1: 275*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 276*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 277*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 278*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %A, i32 0 279*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %A, i32 1 280*9880d681SAndroid Build Coastguard Worker %sub = sub i32 %vecext, %vecext1 281*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0 282*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %A, i32 2 283*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %A, i32 3 284*9880d681SAndroid Build Coastguard Worker %sub4 = sub i32 %vecext2, %vecext3 285*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1 286*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x i32> %B, i32 0 287*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x i32> %B, i32 1 288*9880d681SAndroid Build Coastguard Worker %sub8 = sub i32 %vecext6, %vecext7 289*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2 290*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x i32> %B, i32 2 291*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x i32> %B, i32 3 292*9880d681SAndroid Build Coastguard Worker %sub12 = sub i32 %vecext10, %vecext11 293*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3 294*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit13 295*9880d681SAndroid Build Coastguard Worker} 296*9880d681SAndroid Build Coastguard Worker 297*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @phsub_d_test2(<4 x i32> %A, <4 x i32> %B) { 298*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: phsub_d_test2: 299*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 300*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 301*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 302*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] 303*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %ecx 304*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %ecx, %eax 305*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %ecx 306*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 307*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 308*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %edx, %ecx 309*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 310*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 311*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 312*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 313*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %esi, %edx 314*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm0 315*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %eax, %xmm2 316*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 317*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %eax 318*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 319*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 320*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: subl %edx, %eax 321*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %eax, %xmm1 322*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm0 323*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 324*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 325*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 326*9880d681SAndroid Build Coastguard Worker; 327*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: phsub_d_test2: 328*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 329*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phsubd %xmm1, %xmm0 330*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 331*9880d681SAndroid Build Coastguard Worker; 332*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: phsub_d_test2: 333*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 334*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 335*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 336*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %A, i32 2 337*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %A, i32 3 338*9880d681SAndroid Build Coastguard Worker %sub = sub i32 %vecext, %vecext1 339*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 1 340*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %A, i32 0 341*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %A, i32 1 342*9880d681SAndroid Build Coastguard Worker %sub4 = sub i32 %vecext2, %vecext3 343*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 0 344*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x i32> %B, i32 2 345*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x i32> %B, i32 3 346*9880d681SAndroid Build Coastguard Worker %sub8 = sub i32 %vecext6, %vecext7 347*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 3 348*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x i32> %B, i32 0 349*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x i32> %B, i32 1 350*9880d681SAndroid Build Coastguard Worker %sub12 = sub i32 %vecext10, %vecext11 351*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 2 352*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit13 353*9880d681SAndroid Build Coastguard Worker} 354*9880d681SAndroid Build Coastguard Worker 355*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hadd_pd_test1(<2 x double> %A, <2 x double> %B) { 356*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_pd_test1: 357*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 358*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm1, %xmm0 359*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 360*9880d681SAndroid Build Coastguard Worker; 361*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_pd_test1: 362*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 363*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 364*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 365*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %A, i32 0 366*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %A, i32 1 367*9880d681SAndroid Build Coastguard Worker %add = fadd double %vecext, %vecext1 368*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %add, i32 0 369*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <2 x double> %B, i32 0 370*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <2 x double> %B, i32 1 371*9880d681SAndroid Build Coastguard Worker %add2 = fadd double %vecext2, %vecext3 372*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1 373*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit2 374*9880d681SAndroid Build Coastguard Worker} 375*9880d681SAndroid Build Coastguard Worker 376*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hadd_pd_test2(<2 x double> %A, <2 x double> %B) { 377*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hadd_pd_test2: 378*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 379*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm1, %xmm0 380*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 381*9880d681SAndroid Build Coastguard Worker; 382*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hadd_pd_test2: 383*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 384*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 385*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 386*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %A, i32 1 387*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %A, i32 0 388*9880d681SAndroid Build Coastguard Worker %add = fadd double %vecext, %vecext1 389*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %add, i32 0 390*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <2 x double> %B, i32 1 391*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <2 x double> %B, i32 0 392*9880d681SAndroid Build Coastguard Worker %add2 = fadd double %vecext2, %vecext3 393*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <2 x double> %vecinit, double %add2, i32 1 394*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit2 395*9880d681SAndroid Build Coastguard Worker} 396*9880d681SAndroid Build Coastguard Worker 397*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hsub_pd_test1(<2 x double> %A, <2 x double> %B) { 398*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_pd_test1: 399*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 400*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm1, %xmm0 401*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 402*9880d681SAndroid Build Coastguard Worker; 403*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_pd_test1: 404*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 405*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 406*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 407*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %A, i32 0 408*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %A, i32 1 409*9880d681SAndroid Build Coastguard Worker %sub = fsub double %vecext, %vecext1 410*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %sub, i32 0 411*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <2 x double> %B, i32 0 412*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <2 x double> %B, i32 1 413*9880d681SAndroid Build Coastguard Worker %sub2 = fsub double %vecext2, %vecext3 414*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 1 415*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit2 416*9880d681SAndroid Build Coastguard Worker} 417*9880d681SAndroid Build Coastguard Worker 418*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @hsub_pd_test2(<2 x double> %A, <2 x double> %B) { 419*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: hsub_pd_test2: 420*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 421*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm1, %xmm0 422*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 423*9880d681SAndroid Build Coastguard Worker; 424*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: hsub_pd_test2: 425*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 426*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 427*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 428*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %B, i32 0 429*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %B, i32 1 430*9880d681SAndroid Build Coastguard Worker %sub = fsub double %vecext, %vecext1 431*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %sub, i32 1 432*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <2 x double> %A, i32 0 433*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <2 x double> %A, i32 1 434*9880d681SAndroid Build Coastguard Worker %sub2 = fsub double %vecext2, %vecext3 435*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0 436*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit2 437*9880d681SAndroid Build Coastguard Worker} 438*9880d681SAndroid Build Coastguard Worker 439*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_vhadd_pd_test(<4 x double> %A, <4 x double> %B) { 440*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhadd_pd_test: 441*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 442*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm1, %xmm0 443*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm3, %xmm2 444*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm2, %xmm1 445*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 446*9880d681SAndroid Build Coastguard Worker; 447*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhadd_pd_test: 448*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 449*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 450*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddpd %xmm2, %xmm1, %xmm1 451*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 452*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddpd %xmm2, %xmm0, %xmm0 453*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 454*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 455*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x double> %A, i32 0 456*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x double> %A, i32 1 457*9880d681SAndroid Build Coastguard Worker %add = fadd double %vecext, %vecext1 458*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x double> undef, double %add, i32 0 459*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x double> %A, i32 2 460*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x double> %A, i32 3 461*9880d681SAndroid Build Coastguard Worker %add4 = fadd double %vecext2, %vecext3 462*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 463*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x double> %B, i32 0 464*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x double> %B, i32 1 465*9880d681SAndroid Build Coastguard Worker %add8 = fadd double %vecext6, %vecext7 466*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 467*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x double> %B, i32 2 468*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x double> %B, i32 3 469*9880d681SAndroid Build Coastguard Worker %add12 = fadd double %vecext10, %vecext11 470*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 471*9880d681SAndroid Build Coastguard Worker ret <4 x double> %vecinit13 472*9880d681SAndroid Build Coastguard Worker} 473*9880d681SAndroid Build Coastguard Worker 474*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_vhsub_pd_test(<4 x double> %A, <4 x double> %B) { 475*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhsub_pd_test: 476*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 477*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm1, %xmm0 478*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm3, %xmm2 479*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm2, %xmm1 480*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 481*9880d681SAndroid Build Coastguard Worker; 482*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhsub_pd_test: 483*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 484*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 485*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubpd %xmm2, %xmm1, %xmm1 486*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 487*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubpd %xmm2, %xmm0, %xmm0 488*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 489*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 490*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x double> %A, i32 0 491*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x double> %A, i32 1 492*9880d681SAndroid Build Coastguard Worker %sub = fsub double %vecext, %vecext1 493*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x double> undef, double %sub, i32 0 494*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x double> %A, i32 2 495*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x double> %A, i32 3 496*9880d681SAndroid Build Coastguard Worker %sub4 = fsub double %vecext2, %vecext3 497*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1 498*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x double> %B, i32 0 499*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x double> %B, i32 1 500*9880d681SAndroid Build Coastguard Worker %sub8 = fsub double %vecext6, %vecext7 501*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2 502*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x double> %B, i32 2 503*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x double> %B, i32 3 504*9880d681SAndroid Build Coastguard Worker %sub12 = fsub double %vecext10, %vecext11 505*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3 506*9880d681SAndroid Build Coastguard Worker ret <4 x double> %vecinit13 507*9880d681SAndroid Build Coastguard Worker} 508*9880d681SAndroid Build Coastguard Worker 509*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @avx2_vphadd_d_test(<8 x i32> %A, <8 x i32> %B) { 510*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_vphadd_d_test: 511*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 512*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %ecx 513*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3] 514*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm4, %r8d 515*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %ecx, %r8d 516*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 517*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm4, %edx 518*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 519*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r9d 520*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %edx, %r9d 521*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %esi 522*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 523*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r10d 524*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %r10d 525*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 526*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 527*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 528*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edi 529*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %edi 530*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 531*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 532*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r11d 533*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r11d 534*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 535*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 536*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] 537*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %ecx 538*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 539*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm3, %eax 540*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 541*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 542*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edx 543*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] 544*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 545*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] 546*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 547*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %esi 548*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edi, %xmm0 549*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r9d, %xmm1 550*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 551*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r10d, %xmm2 552*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r8d, %xmm0 553*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 554*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 555*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm1 556*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm2 557*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 558*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm3 559*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r11d, %xmm1 560*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 561*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 562*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 563*9880d681SAndroid Build Coastguard Worker; 564*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_vphadd_d_test: 565*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 566*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm1, %xmm0 567*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm3, %xmm2 568*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: movdqa %xmm2, %xmm1 569*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 570*9880d681SAndroid Build Coastguard Worker; 571*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_vphadd_d_test: 572*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 573*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 574*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm2, %xmm1, %xmm1 575*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 576*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm2, %xmm0, %xmm0 577*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 578*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 579*9880d681SAndroid Build Coastguard Worker; 580*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_vphadd_d_test: 581*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 582*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 583*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %xmm2, %xmm1, %xmm1 584*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 585*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %xmm2, %xmm0, %xmm0 586*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 587*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 588*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %A, i32 0 589*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %A, i32 1 590*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 591*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 592*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %A, i32 2 593*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %A, i32 3 594*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 595*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1 596*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x i32> %A, i32 4 597*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x i32> %A, i32 5 598*9880d681SAndroid Build Coastguard Worker %add8 = add i32 %vecext6, %vecext7 599*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2 600*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <8 x i32> %A, i32 6 601*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <8 x i32> %A, i32 7 602*9880d681SAndroid Build Coastguard Worker %add12 = add i32 %vecext10, %vecext11 603*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3 604*9880d681SAndroid Build Coastguard Worker %vecext14 = extractelement <8 x i32> %B, i32 0 605*9880d681SAndroid Build Coastguard Worker %vecext15 = extractelement <8 x i32> %B, i32 1 606*9880d681SAndroid Build Coastguard Worker %add16 = add i32 %vecext14, %vecext15 607*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4 608*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <8 x i32> %B, i32 2 609*9880d681SAndroid Build Coastguard Worker %vecext19 = extractelement <8 x i32> %B, i32 3 610*9880d681SAndroid Build Coastguard Worker %add20 = add i32 %vecext18, %vecext19 611*9880d681SAndroid Build Coastguard Worker %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5 612*9880d681SAndroid Build Coastguard Worker %vecext22 = extractelement <8 x i32> %B, i32 4 613*9880d681SAndroid Build Coastguard Worker %vecext23 = extractelement <8 x i32> %B, i32 5 614*9880d681SAndroid Build Coastguard Worker %add24 = add i32 %vecext22, %vecext23 615*9880d681SAndroid Build Coastguard Worker %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6 616*9880d681SAndroid Build Coastguard Worker %vecext26 = extractelement <8 x i32> %B, i32 6 617*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <8 x i32> %B, i32 7 618*9880d681SAndroid Build Coastguard Worker %add28 = add i32 %vecext26, %vecext27 619*9880d681SAndroid Build Coastguard Worker %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7 620*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit29 621*9880d681SAndroid Build Coastguard Worker} 622*9880d681SAndroid Build Coastguard Worker 623*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @avx2_vphadd_w_test(<16 x i16> %a, <16 x i16> %b) { 624*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_vphadd_w_test: 625*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 626*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %rbp 627*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp0: 628*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 16 629*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r15 630*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp1: 631*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 24 632*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r14 633*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp2: 634*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 32 635*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r13 636*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp3: 637*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 40 638*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r12 639*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp4: 640*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 48 641*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %rbx 642*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp5: 643*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 56 644*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp6: 645*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %rbx, -56 646*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp7: 647*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r12, -48 648*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp8: 649*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r13, -40 650*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp9: 651*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r14, -32 652*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp10: 653*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r15, -24 654*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp11: 655*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %rbp, -16 656*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 657*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm0, %ecx 658*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 659*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill 660*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm0, %eax 661*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm0, %r11d 662*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r11d 663*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm0, %eax 664*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm0, %r10d 665*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r10d 666*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm0, %eax 667*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm0, %r13d 668*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r13d 669*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %eax 670*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm1, %r14d 671*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r14d 672*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm1, %eax 673*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm1, %ebp 674*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ebp 675*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm1, %eax 676*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm1, %ebx 677*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ebx 678*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm1, %eax 679*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm1, %edx 680*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edx 681*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 682*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm2, %ecx 683*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 684*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill 685*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm2, %eax 686*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm2, %r12d 687*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r12d 688*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm2, %eax 689*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm2, %r15d 690*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r15d 691*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm2, %eax 692*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm2, %r8d 693*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r8d 694*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm3, %eax 695*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm3, %r9d 696*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r9d 697*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm3, %eax 698*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm3, %esi 699*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %esi 700*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm3, %eax 701*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm3, %edi 702*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edi 703*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm3, %ecx 704*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm3, %eax 705*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %ecx, %eax 706*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm8 707*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r13d, %xmm3 708*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ebp, %xmm9 709*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r11d, %xmm4 710*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ebx, %xmm10 711*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r10d, %xmm7 712*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r14d, %xmm11 713*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload 714*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero 715*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %eax, %xmm12 716*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r8d, %xmm6 717*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm13 718*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r12d, %xmm5 719*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edi, %xmm14 720*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r15d, %xmm2 721*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r9d, %xmm15 722*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload 723*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero 724*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] 725*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] 726*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 727*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] 728*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] 729*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 730*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 731*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] 732*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] 733*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] 734*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] 735*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] 736*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 737*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] 738*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %rbx 739*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r12 740*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r13 741*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r14 742*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r15 743*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %rbp 744*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 745*9880d681SAndroid Build Coastguard Worker; 746*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_vphadd_w_test: 747*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 748*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddw %xmm1, %xmm0 749*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddw %xmm3, %xmm2 750*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: movdqa %xmm2, %xmm1 751*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 752*9880d681SAndroid Build Coastguard Worker; 753*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_vphadd_w_test: 754*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 755*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 756*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddw %xmm2, %xmm1, %xmm1 757*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 758*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddw %xmm2, %xmm0, %xmm0 759*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 760*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 761*9880d681SAndroid Build Coastguard Worker; 762*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_vphadd_w_test: 763*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 764*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 765*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddw %xmm2, %xmm1, %xmm1 766*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 767*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddw %xmm2, %xmm0, %xmm0 768*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 769*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 770*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <16 x i16> %a, i32 0 771*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <16 x i16> %a, i32 1 772*9880d681SAndroid Build Coastguard Worker %add = add i16 %vecext, %vecext1 773*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0 774*9880d681SAndroid Build Coastguard Worker %vecext4 = extractelement <16 x i16> %a, i32 2 775*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <16 x i16> %a, i32 3 776*9880d681SAndroid Build Coastguard Worker %add8 = add i16 %vecext4, %vecext6 777*9880d681SAndroid Build Coastguard Worker %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1 778*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <16 x i16> %a, i32 4 779*9880d681SAndroid Build Coastguard Worker %vecext13 = extractelement <16 x i16> %a, i32 5 780*9880d681SAndroid Build Coastguard Worker %add15 = add i16 %vecext11, %vecext13 781*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2 782*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <16 x i16> %a, i32 6 783*9880d681SAndroid Build Coastguard Worker %vecext20 = extractelement <16 x i16> %a, i32 7 784*9880d681SAndroid Build Coastguard Worker %add22 = add i16 %vecext18, %vecext20 785*9880d681SAndroid Build Coastguard Worker %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3 786*9880d681SAndroid Build Coastguard Worker %vecext25 = extractelement <16 x i16> %a, i32 8 787*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <16 x i16> %a, i32 9 788*9880d681SAndroid Build Coastguard Worker %add29 = add i16 %vecext25, %vecext27 789*9880d681SAndroid Build Coastguard Worker %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 4 790*9880d681SAndroid Build Coastguard Worker %vecext32 = extractelement <16 x i16> %a, i32 10 791*9880d681SAndroid Build Coastguard Worker %vecext34 = extractelement <16 x i16> %a, i32 11 792*9880d681SAndroid Build Coastguard Worker %add36 = add i16 %vecext32, %vecext34 793*9880d681SAndroid Build Coastguard Worker %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 5 794*9880d681SAndroid Build Coastguard Worker %vecext39 = extractelement <16 x i16> %a, i32 12 795*9880d681SAndroid Build Coastguard Worker %vecext41 = extractelement <16 x i16> %a, i32 13 796*9880d681SAndroid Build Coastguard Worker %add43 = add i16 %vecext39, %vecext41 797*9880d681SAndroid Build Coastguard Worker %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 6 798*9880d681SAndroid Build Coastguard Worker %vecext46 = extractelement <16 x i16> %a, i32 14 799*9880d681SAndroid Build Coastguard Worker %vecext48 = extractelement <16 x i16> %a, i32 15 800*9880d681SAndroid Build Coastguard Worker %add50 = add i16 %vecext46, %vecext48 801*9880d681SAndroid Build Coastguard Worker %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 7 802*9880d681SAndroid Build Coastguard Worker %vecext53 = extractelement <16 x i16> %b, i32 0 803*9880d681SAndroid Build Coastguard Worker %vecext55 = extractelement <16 x i16> %b, i32 1 804*9880d681SAndroid Build Coastguard Worker %add57 = add i16 %vecext53, %vecext55 805*9880d681SAndroid Build Coastguard Worker %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 8 806*9880d681SAndroid Build Coastguard Worker %vecext60 = extractelement <16 x i16> %b, i32 2 807*9880d681SAndroid Build Coastguard Worker %vecext62 = extractelement <16 x i16> %b, i32 3 808*9880d681SAndroid Build Coastguard Worker %add64 = add i16 %vecext60, %vecext62 809*9880d681SAndroid Build Coastguard Worker %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 9 810*9880d681SAndroid Build Coastguard Worker %vecext67 = extractelement <16 x i16> %b, i32 4 811*9880d681SAndroid Build Coastguard Worker %vecext69 = extractelement <16 x i16> %b, i32 5 812*9880d681SAndroid Build Coastguard Worker %add71 = add i16 %vecext67, %vecext69 813*9880d681SAndroid Build Coastguard Worker %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 10 814*9880d681SAndroid Build Coastguard Worker %vecext74 = extractelement <16 x i16> %b, i32 6 815*9880d681SAndroid Build Coastguard Worker %vecext76 = extractelement <16 x i16> %b, i32 7 816*9880d681SAndroid Build Coastguard Worker %add78 = add i16 %vecext74, %vecext76 817*9880d681SAndroid Build Coastguard Worker %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 11 818*9880d681SAndroid Build Coastguard Worker %vecext81 = extractelement <16 x i16> %b, i32 8 819*9880d681SAndroid Build Coastguard Worker %vecext83 = extractelement <16 x i16> %b, i32 9 820*9880d681SAndroid Build Coastguard Worker %add85 = add i16 %vecext81, %vecext83 821*9880d681SAndroid Build Coastguard Worker %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12 822*9880d681SAndroid Build Coastguard Worker %vecext88 = extractelement <16 x i16> %b, i32 10 823*9880d681SAndroid Build Coastguard Worker %vecext90 = extractelement <16 x i16> %b, i32 11 824*9880d681SAndroid Build Coastguard Worker %add92 = add i16 %vecext88, %vecext90 825*9880d681SAndroid Build Coastguard Worker %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13 826*9880d681SAndroid Build Coastguard Worker %vecext95 = extractelement <16 x i16> %b, i32 12 827*9880d681SAndroid Build Coastguard Worker %vecext97 = extractelement <16 x i16> %b, i32 13 828*9880d681SAndroid Build Coastguard Worker %add99 = add i16 %vecext95, %vecext97 829*9880d681SAndroid Build Coastguard Worker %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14 830*9880d681SAndroid Build Coastguard Worker %vecext102 = extractelement <16 x i16> %b, i32 14 831*9880d681SAndroid Build Coastguard Worker %vecext104 = extractelement <16 x i16> %b, i32 15 832*9880d681SAndroid Build Coastguard Worker %add106 = add i16 %vecext102, %vecext104 833*9880d681SAndroid Build Coastguard Worker %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15 834*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %vecinit108 835*9880d681SAndroid Build Coastguard Worker} 836*9880d681SAndroid Build Coastguard Worker 837*9880d681SAndroid Build Coastguard Worker; Verify that we don't select horizontal subs in the following functions. 838*9880d681SAndroid Build Coastguard Worker 839*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @not_a_hsub_1(<4 x i32> %A, <4 x i32> %B) { 840*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_1: 841*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 842*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %eax 843*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 844*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm2, %ecx 845*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subl %ecx, %eax 846*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 847*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm2, %ecx 848*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 849*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %edx 850*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subl %edx, %ecx 851*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 852*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %edx 853*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm1, %esi 854*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subl %esi, %edx 855*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 856*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %esi 857*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 858*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %xmm0, %edi 859*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subl %edi, %esi 860*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %esi, %xmm0 861*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %ecx, %xmm1 862*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 863*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %edx, %xmm2 864*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movd %eax, %xmm0 865*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 866*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 867*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 868*9880d681SAndroid Build Coastguard Worker; 869*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_1: 870*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 871*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovd %xmm0, %eax 872*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $1, %xmm0, %ecx 873*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subl %ecx, %eax 874*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $2, %xmm0, %ecx 875*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $3, %xmm0, %edx 876*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subl %edx, %ecx 877*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $1, %xmm1, %edx 878*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovd %xmm1, %esi 879*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subl %esi, %edx 880*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $3, %xmm1, %esi 881*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpextrd $2, %xmm1, %edi 882*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: subl %edi, %esi 883*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovd %eax, %xmm0 884*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 885*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 886*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 887*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 888*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x i32> %A, i32 0 889*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x i32> %A, i32 1 890*9880d681SAndroid Build Coastguard Worker %sub = sub i32 %vecext, %vecext1 891*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x i32> undef, i32 %sub, i32 0 892*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x i32> %A, i32 2 893*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x i32> %A, i32 3 894*9880d681SAndroid Build Coastguard Worker %sub4 = sub i32 %vecext2, %vecext3 895*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x i32> %vecinit, i32 %sub4, i32 1 896*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x i32> %B, i32 1 897*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x i32> %B, i32 0 898*9880d681SAndroid Build Coastguard Worker %sub8 = sub i32 %vecext6, %vecext7 899*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x i32> %vecinit5, i32 %sub8, i32 2 900*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x i32> %B, i32 3 901*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x i32> %B, i32 2 902*9880d681SAndroid Build Coastguard Worker %sub12 = sub i32 %vecext10, %vecext11 903*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x i32> %vecinit9, i32 %sub12, i32 3 904*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %vecinit13 905*9880d681SAndroid Build Coastguard Worker} 906*9880d681SAndroid Build Coastguard Worker 907*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { 908*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_2: 909*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 910*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm0, %xmm2 911*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 912*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm0, %xmm3 913*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 914*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subss %xmm3, %xmm2 915*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 916*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subss %xmm3, %xmm0 917*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm1, %xmm3 918*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 919*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movaps %xmm1, %xmm4 920*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm4 = xmm4[1,0] 921*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subss %xmm4, %xmm3 922*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 923*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] 924*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subss %xmm3, %xmm1 925*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 926*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 927*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 928*9880d681SAndroid Build Coastguard Worker; 929*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_2: 930*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 931*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 932*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3] 933*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubss %xmm3, %xmm2, %xmm2 934*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 935*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubss %xmm3, %xmm0, %xmm0 936*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3] 937*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm4 = xmm1[1,0] 938*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubss %xmm4, %xmm3, %xmm3 939*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] 940*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 941*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm1 942*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 943*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0] 944*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 945*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x float> %A, i32 2 946*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x float> %A, i32 3 947*9880d681SAndroid Build Coastguard Worker %sub = fsub float %vecext, %vecext1 948*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x float> undef, float %sub, i32 1 949*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x float> %A, i32 0 950*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x float> %A, i32 1 951*9880d681SAndroid Build Coastguard Worker %sub4 = fsub float %vecext2, %vecext3 952*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x float> %vecinit, float %sub4, i32 0 953*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x float> %B, i32 3 954*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x float> %B, i32 2 955*9880d681SAndroid Build Coastguard Worker %sub8 = fsub float %vecext6, %vecext7 956*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x float> %vecinit5, float %sub8, i32 3 957*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x float> %B, i32 0 958*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x float> %B, i32 1 959*9880d681SAndroid Build Coastguard Worker %sub12 = fsub float %vecext10, %vecext11 960*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x float> %vecinit9, float %sub12, i32 2 961*9880d681SAndroid Build Coastguard Worker ret <4 x float> %vecinit13 962*9880d681SAndroid Build Coastguard Worker} 963*9880d681SAndroid Build Coastguard Worker 964*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { 965*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: not_a_hsub_3: 966*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 967*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm1, %xmm2 968*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 969*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subsd %xmm2, %xmm1 970*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm0, %xmm2 971*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 972*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: subsd %xmm0, %xmm2 973*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] 974*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: movapd %xmm2, %xmm0 975*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 976*9880d681SAndroid Build Coastguard Worker; 977*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: not_a_hsub_3: 978*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 979*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 980*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubsd %xmm2, %xmm1, %xmm1 981*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 982*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vsubsd %xmm0, %xmm2, %xmm0 983*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 984*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 985*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <2 x double> %B, i32 0 986*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <2 x double> %B, i32 1 987*9880d681SAndroid Build Coastguard Worker %sub = fsub double %vecext, %vecext1 988*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <2 x double> undef, double %sub, i32 1 989*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <2 x double> %A, i32 1 990*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <2 x double> %A, i32 0 991*9880d681SAndroid Build Coastguard Worker %sub2 = fsub double %vecext2, %vecext3 992*9880d681SAndroid Build Coastguard Worker %vecinit2 = insertelement <2 x double> %vecinit, double %sub2, i32 0 993*9880d681SAndroid Build Coastguard Worker ret <2 x double> %vecinit2 994*9880d681SAndroid Build Coastguard Worker} 995*9880d681SAndroid Build Coastguard Worker 996*9880d681SAndroid Build Coastguard Worker; Test AVX horizontal add/sub of packed single/double precision 997*9880d681SAndroid Build Coastguard Worker; floating point values from 256-bit vectors. 998*9880d681SAndroid Build Coastguard Worker 999*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @avx_vhadd_ps(<8 x float> %a, <8 x float> %b) { 1000*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhadd_ps: 1001*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1002*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm2, %xmm0 1003*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddps %xmm3, %xmm1 1004*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1005*9880d681SAndroid Build Coastguard Worker; 1006*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhadd_ps: 1007*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1008*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 1009*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1010*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 1011*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 1012*9880d681SAndroid Build Coastguard Worker %add = fadd float %vecext, %vecext1 1013*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x float> undef, float %add, i32 0 1014*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %a, i32 2 1015*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %a, i32 3 1016*9880d681SAndroid Build Coastguard Worker %add4 = fadd float %vecext2, %vecext3 1017*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x float> %vecinit, float %add4, i32 1 1018*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x float> %b, i32 0 1019*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x float> %b, i32 1 1020*9880d681SAndroid Build Coastguard Worker %add8 = fadd float %vecext6, %vecext7 1021*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <8 x float> %vecinit5, float %add8, i32 2 1022*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <8 x float> %b, i32 2 1023*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <8 x float> %b, i32 3 1024*9880d681SAndroid Build Coastguard Worker %add12 = fadd float %vecext10, %vecext11 1025*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <8 x float> %vecinit9, float %add12, i32 3 1026*9880d681SAndroid Build Coastguard Worker %vecext14 = extractelement <8 x float> %a, i32 4 1027*9880d681SAndroid Build Coastguard Worker %vecext15 = extractelement <8 x float> %a, i32 5 1028*9880d681SAndroid Build Coastguard Worker %add16 = fadd float %vecext14, %vecext15 1029*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <8 x float> %vecinit13, float %add16, i32 4 1030*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <8 x float> %a, i32 6 1031*9880d681SAndroid Build Coastguard Worker %vecext19 = extractelement <8 x float> %a, i32 7 1032*9880d681SAndroid Build Coastguard Worker %add20 = fadd float %vecext18, %vecext19 1033*9880d681SAndroid Build Coastguard Worker %vecinit21 = insertelement <8 x float> %vecinit17, float %add20, i32 5 1034*9880d681SAndroid Build Coastguard Worker %vecext22 = extractelement <8 x float> %b, i32 4 1035*9880d681SAndroid Build Coastguard Worker %vecext23 = extractelement <8 x float> %b, i32 5 1036*9880d681SAndroid Build Coastguard Worker %add24 = fadd float %vecext22, %vecext23 1037*9880d681SAndroid Build Coastguard Worker %vecinit25 = insertelement <8 x float> %vecinit21, float %add24, i32 6 1038*9880d681SAndroid Build Coastguard Worker %vecext26 = extractelement <8 x float> %b, i32 6 1039*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <8 x float> %b, i32 7 1040*9880d681SAndroid Build Coastguard Worker %add28 = fadd float %vecext26, %vecext27 1041*9880d681SAndroid Build Coastguard Worker %vecinit29 = insertelement <8 x float> %vecinit25, float %add28, i32 7 1042*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit29 1043*9880d681SAndroid Build Coastguard Worker} 1044*9880d681SAndroid Build Coastguard Worker 1045*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @avx_vhsub_ps(<8 x float> %a, <8 x float> %b) { 1046*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_vhsub_ps: 1047*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1048*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubps %xmm2, %xmm0 1049*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubps %xmm3, %xmm1 1050*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1051*9880d681SAndroid Build Coastguard Worker; 1052*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_vhsub_ps: 1053*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1054*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 1055*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1056*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x float> %a, i32 0 1057*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x float> %a, i32 1 1058*9880d681SAndroid Build Coastguard Worker %sub = fsub float %vecext, %vecext1 1059*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x float> undef, float %sub, i32 0 1060*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x float> %a, i32 2 1061*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x float> %a, i32 3 1062*9880d681SAndroid Build Coastguard Worker %sub4 = fsub float %vecext2, %vecext3 1063*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x float> %vecinit, float %sub4, i32 1 1064*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x float> %b, i32 0 1065*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x float> %b, i32 1 1066*9880d681SAndroid Build Coastguard Worker %sub8 = fsub float %vecext6, %vecext7 1067*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <8 x float> %vecinit5, float %sub8, i32 2 1068*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <8 x float> %b, i32 2 1069*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <8 x float> %b, i32 3 1070*9880d681SAndroid Build Coastguard Worker %sub12 = fsub float %vecext10, %vecext11 1071*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <8 x float> %vecinit9, float %sub12, i32 3 1072*9880d681SAndroid Build Coastguard Worker %vecext14 = extractelement <8 x float> %a, i32 4 1073*9880d681SAndroid Build Coastguard Worker %vecext15 = extractelement <8 x float> %a, i32 5 1074*9880d681SAndroid Build Coastguard Worker %sub16 = fsub float %vecext14, %vecext15 1075*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <8 x float> %vecinit13, float %sub16, i32 4 1076*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <8 x float> %a, i32 6 1077*9880d681SAndroid Build Coastguard Worker %vecext19 = extractelement <8 x float> %a, i32 7 1078*9880d681SAndroid Build Coastguard Worker %sub20 = fsub float %vecext18, %vecext19 1079*9880d681SAndroid Build Coastguard Worker %vecinit21 = insertelement <8 x float> %vecinit17, float %sub20, i32 5 1080*9880d681SAndroid Build Coastguard Worker %vecext22 = extractelement <8 x float> %b, i32 4 1081*9880d681SAndroid Build Coastguard Worker %vecext23 = extractelement <8 x float> %b, i32 5 1082*9880d681SAndroid Build Coastguard Worker %sub24 = fsub float %vecext22, %vecext23 1083*9880d681SAndroid Build Coastguard Worker %vecinit25 = insertelement <8 x float> %vecinit21, float %sub24, i32 6 1084*9880d681SAndroid Build Coastguard Worker %vecext26 = extractelement <8 x float> %b, i32 6 1085*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <8 x float> %b, i32 7 1086*9880d681SAndroid Build Coastguard Worker %sub28 = fsub float %vecext26, %vecext27 1087*9880d681SAndroid Build Coastguard Worker %vecinit29 = insertelement <8 x float> %vecinit25, float %sub28, i32 7 1088*9880d681SAndroid Build Coastguard Worker ret <8 x float> %vecinit29 1089*9880d681SAndroid Build Coastguard Worker} 1090*9880d681SAndroid Build Coastguard Worker 1091*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_hadd_pd(<4 x double> %a, <4 x double> %b) { 1092*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_hadd_pd: 1093*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1094*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm2, %xmm0 1095*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: haddpd %xmm3, %xmm1 1096*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1097*9880d681SAndroid Build Coastguard Worker; 1098*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_hadd_pd: 1099*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1100*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1101*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1102*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x double> %a, i32 0 1103*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x double> %a, i32 1 1104*9880d681SAndroid Build Coastguard Worker %add = fadd double %vecext, %vecext1 1105*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x double> undef, double %add, i32 0 1106*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x double> %b, i32 0 1107*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x double> %b, i32 1 1108*9880d681SAndroid Build Coastguard Worker %add4 = fadd double %vecext2, %vecext3 1109*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 1110*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x double> %a, i32 2 1111*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x double> %a, i32 3 1112*9880d681SAndroid Build Coastguard Worker %add8 = fadd double %vecext6, %vecext7 1113*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 1114*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x double> %b, i32 2 1115*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x double> %b, i32 3 1116*9880d681SAndroid Build Coastguard Worker %add12 = fadd double %vecext10, %vecext11 1117*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 1118*9880d681SAndroid Build Coastguard Worker ret <4 x double> %vecinit13 1119*9880d681SAndroid Build Coastguard Worker} 1120*9880d681SAndroid Build Coastguard Worker 1121*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @avx_hsub_pd(<4 x double> %a, <4 x double> %b) { 1122*9880d681SAndroid Build Coastguard Worker; SSE-LABEL: avx_hsub_pd: 1123*9880d681SAndroid Build Coastguard Worker; SSE: # BB#0: 1124*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm2, %xmm0 1125*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: hsubpd %xmm3, %xmm1 1126*9880d681SAndroid Build Coastguard Worker; SSE-NEXT: retq 1127*9880d681SAndroid Build Coastguard Worker; 1128*9880d681SAndroid Build Coastguard Worker; AVX-LABEL: avx_hsub_pd: 1129*9880d681SAndroid Build Coastguard Worker; AVX: # BB#0: 1130*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 1131*9880d681SAndroid Build Coastguard Worker; AVX-NEXT: retq 1132*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <4 x double> %a, i32 0 1133*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <4 x double> %a, i32 1 1134*9880d681SAndroid Build Coastguard Worker %sub = fsub double %vecext, %vecext1 1135*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <4 x double> undef, double %sub, i32 0 1136*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <4 x double> %b, i32 0 1137*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <4 x double> %b, i32 1 1138*9880d681SAndroid Build Coastguard Worker %sub4 = fsub double %vecext2, %vecext3 1139*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <4 x double> %vecinit, double %sub4, i32 1 1140*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <4 x double> %a, i32 2 1141*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <4 x double> %a, i32 3 1142*9880d681SAndroid Build Coastguard Worker %sub8 = fsub double %vecext6, %vecext7 1143*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <4 x double> %vecinit5, double %sub8, i32 2 1144*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <4 x double> %b, i32 2 1145*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <4 x double> %b, i32 3 1146*9880d681SAndroid Build Coastguard Worker %sub12 = fsub double %vecext10, %vecext11 1147*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <4 x double> %vecinit9, double %sub12, i32 3 1148*9880d681SAndroid Build Coastguard Worker ret <4 x double> %vecinit13 1149*9880d681SAndroid Build Coastguard Worker} 1150*9880d681SAndroid Build Coastguard Worker 1151*9880d681SAndroid Build Coastguard Worker; Test AVX2 horizontal add of packed integer values from 256-bit vectors. 1152*9880d681SAndroid Build Coastguard Worker 1153*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @avx2_hadd_d(<8 x i32> %a, <8 x i32> %b) { 1154*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_hadd_d: 1155*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 1156*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %ecx 1157*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,2,3] 1158*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm4, %r8d 1159*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %ecx, %r8d 1160*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 1161*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm4, %edx 1162*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1163*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r9d 1164*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %edx, %r9d 1165*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %esi 1166*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] 1167*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r10d 1168*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %r10d 1169*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1] 1170*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 1171*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[3,1,2,3] 1172*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edi 1173*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %edi 1174*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %eax 1175*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] 1176*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %r11d 1177*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r11d 1178*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1179*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 1180*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3] 1181*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %ecx 1182*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 1183*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm3, %eax 1184*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3] 1185*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %edx 1186*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %edx 1187*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,0,1] 1188*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 1189*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,1,2,3] 1190*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %esi 1191*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %esi 1192*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edi, %xmm0 1193*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r9d, %xmm1 1194*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1195*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r10d, %xmm2 1196*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r8d, %xmm0 1197*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1198*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1199*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %esi, %xmm1 1200*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm2 1201*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1202*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm3 1203*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r11d, %xmm1 1204*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1205*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1206*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 1207*9880d681SAndroid Build Coastguard Worker; 1208*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_hadd_d: 1209*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 1210*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm2, %xmm0 1211*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddd %xmm3, %xmm1 1212*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 1213*9880d681SAndroid Build Coastguard Worker; 1214*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_hadd_d: 1215*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1216*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1217*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1218*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm2, %xmm3, %xmm2 1219*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 1220*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1221*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1222*9880d681SAndroid Build Coastguard Worker; 1223*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_hadd_d: 1224*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1225*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddd %ymm1, %ymm0, %ymm0 1226*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1227*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <8 x i32> %a, i32 0 1228*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <8 x i32> %a, i32 1 1229*9880d681SAndroid Build Coastguard Worker %add = add i32 %vecext, %vecext1 1230*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <8 x i32> undef, i32 %add, i32 0 1231*9880d681SAndroid Build Coastguard Worker %vecext2 = extractelement <8 x i32> %a, i32 2 1232*9880d681SAndroid Build Coastguard Worker %vecext3 = extractelement <8 x i32> %a, i32 3 1233*9880d681SAndroid Build Coastguard Worker %add4 = add i32 %vecext2, %vecext3 1234*9880d681SAndroid Build Coastguard Worker %vecinit5 = insertelement <8 x i32> %vecinit, i32 %add4, i32 1 1235*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <8 x i32> %b, i32 0 1236*9880d681SAndroid Build Coastguard Worker %vecext7 = extractelement <8 x i32> %b, i32 1 1237*9880d681SAndroid Build Coastguard Worker %add8 = add i32 %vecext6, %vecext7 1238*9880d681SAndroid Build Coastguard Worker %vecinit9 = insertelement <8 x i32> %vecinit5, i32 %add8, i32 2 1239*9880d681SAndroid Build Coastguard Worker %vecext10 = extractelement <8 x i32> %b, i32 2 1240*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <8 x i32> %b, i32 3 1241*9880d681SAndroid Build Coastguard Worker %add12 = add i32 %vecext10, %vecext11 1242*9880d681SAndroid Build Coastguard Worker %vecinit13 = insertelement <8 x i32> %vecinit9, i32 %add12, i32 3 1243*9880d681SAndroid Build Coastguard Worker %vecext14 = extractelement <8 x i32> %a, i32 4 1244*9880d681SAndroid Build Coastguard Worker %vecext15 = extractelement <8 x i32> %a, i32 5 1245*9880d681SAndroid Build Coastguard Worker %add16 = add i32 %vecext14, %vecext15 1246*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <8 x i32> %vecinit13, i32 %add16, i32 4 1247*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <8 x i32> %a, i32 6 1248*9880d681SAndroid Build Coastguard Worker %vecext19 = extractelement <8 x i32> %a, i32 7 1249*9880d681SAndroid Build Coastguard Worker %add20 = add i32 %vecext18, %vecext19 1250*9880d681SAndroid Build Coastguard Worker %vecinit21 = insertelement <8 x i32> %vecinit17, i32 %add20, i32 5 1251*9880d681SAndroid Build Coastguard Worker %vecext22 = extractelement <8 x i32> %b, i32 4 1252*9880d681SAndroid Build Coastguard Worker %vecext23 = extractelement <8 x i32> %b, i32 5 1253*9880d681SAndroid Build Coastguard Worker %add24 = add i32 %vecext22, %vecext23 1254*9880d681SAndroid Build Coastguard Worker %vecinit25 = insertelement <8 x i32> %vecinit21, i32 %add24, i32 6 1255*9880d681SAndroid Build Coastguard Worker %vecext26 = extractelement <8 x i32> %b, i32 6 1256*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <8 x i32> %b, i32 7 1257*9880d681SAndroid Build Coastguard Worker %add28 = add i32 %vecext26, %vecext27 1258*9880d681SAndroid Build Coastguard Worker %vecinit29 = insertelement <8 x i32> %vecinit25, i32 %add28, i32 7 1259*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %vecinit29 1260*9880d681SAndroid Build Coastguard Worker} 1261*9880d681SAndroid Build Coastguard Worker 1262*9880d681SAndroid Build Coastguard Workerdefine <16 x i16> @avx2_hadd_w(<16 x i16> %a, <16 x i16> %b) { 1263*9880d681SAndroid Build Coastguard Worker; SSE3-LABEL: avx2_hadd_w: 1264*9880d681SAndroid Build Coastguard Worker; SSE3: # BB#0: 1265*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %rbp 1266*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp12: 1267*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 16 1268*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r15 1269*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp13: 1270*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 24 1271*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r14 1272*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp14: 1273*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 32 1274*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r13 1275*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp15: 1276*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 40 1277*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %r12 1278*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp16: 1279*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 48 1280*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pushq %rbx 1281*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp17: 1282*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_def_cfa_offset 56 1283*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp18: 1284*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %rbx, -56 1285*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp19: 1286*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r12, -48 1287*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp20: 1288*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r13, -40 1289*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp21: 1290*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r14, -32 1291*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp22: 1292*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %r15, -24 1293*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .Ltmp23: 1294*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: .cfi_offset %rbp, -16 1295*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm0, %eax 1296*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm0, %ecx 1297*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 1298*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill 1299*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm0, %eax 1300*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm0, %r15d 1301*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r15d 1302*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm0, %eax 1303*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm0, %r14d 1304*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r14d 1305*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm0, %eax 1306*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm0, %r13d 1307*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r13d 1308*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm1, %eax 1309*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm1, %ecx 1310*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 1311*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill 1312*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm1, %eax 1313*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm1, %r11d 1314*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r11d 1315*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm1, %eax 1316*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm1, %r10d 1317*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r10d 1318*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm1, %eax 1319*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm1, %r12d 1320*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %r12d 1321*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm2, %eax 1322*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm2, %ebx 1323*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ebx 1324*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm2, %eax 1325*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm2, %ecx 1326*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ecx 1327*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm2, %esi 1328*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm2, %r8d 1329*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %r8d 1330*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm2, %esi 1331*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm2, %edx 1332*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %edx 1333*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %xmm3, %edi 1334*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $1, %xmm3, %r9d 1335*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %edi, %r9d 1336*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $2, %xmm3, %ebp 1337*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $3, %xmm3, %edi 1338*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %ebp, %edi 1339*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $4, %xmm3, %eax 1340*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $5, %xmm3, %ebp 1341*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %eax, %ebp 1342*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $6, %xmm3, %esi 1343*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: pextrw $7, %xmm3, %eax 1344*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: addl %esi, %eax 1345*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edx, %xmm8 1346*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r13d, %xmm3 1347*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ecx, %xmm9 1348*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r15d, %xmm4 1349*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r8d, %xmm10 1350*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r14d, %xmm7 1351*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ebx, %xmm11 1352*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload 1353*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: # xmm0 = mem[0],zero,zero,zero 1354*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %eax, %xmm12 1355*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r12d, %xmm6 1356*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %edi, %xmm13 1357*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r11d, %xmm5 1358*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %ebp, %xmm14 1359*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r10d, %xmm2 1360*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd %r9d, %xmm15 1361*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: movd -{{[0-9]+}}(%rsp), %xmm1 # 4-byte Folded Reload 1362*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: # xmm1 = mem[0],zero,zero,zero 1363*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3] 1364*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3] 1365*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 1366*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3] 1367*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm11[0],xmm0[1],xmm11[1],xmm0[2],xmm11[2],xmm0[3],xmm11[3] 1368*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] 1369*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 1370*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3] 1371*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1],xmm5[2],xmm13[2],xmm5[3],xmm13[3] 1372*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3] 1373*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] 1374*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm15[0],xmm1[1],xmm15[1],xmm1[2],xmm15[2],xmm1[3],xmm15[3] 1375*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1376*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3] 1377*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %rbx 1378*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r12 1379*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r13 1380*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r14 1381*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %r15 1382*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: popq %rbp 1383*9880d681SAndroid Build Coastguard Worker; SSE3-NEXT: retq 1384*9880d681SAndroid Build Coastguard Worker; 1385*9880d681SAndroid Build Coastguard Worker; SSSE3-LABEL: avx2_hadd_w: 1386*9880d681SAndroid Build Coastguard Worker; SSSE3: # BB#0: 1387*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddw %xmm2, %xmm0 1388*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: phaddw %xmm3, %xmm1 1389*9880d681SAndroid Build Coastguard Worker; SSSE3-NEXT: retq 1390*9880d681SAndroid Build Coastguard Worker; 1391*9880d681SAndroid Build Coastguard Worker; AVX1-LABEL: avx2_hadd_w: 1392*9880d681SAndroid Build Coastguard Worker; AVX1: # BB#0: 1393*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 1394*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 1395*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddw %xmm2, %xmm3, %xmm2 1396*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 1397*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1398*9880d681SAndroid Build Coastguard Worker; AVX1-NEXT: retq 1399*9880d681SAndroid Build Coastguard Worker; 1400*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: avx2_hadd_w: 1401*9880d681SAndroid Build Coastguard Worker; AVX2: # BB#0: 1402*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: vphaddw %ymm1, %ymm0, %ymm0 1403*9880d681SAndroid Build Coastguard Worker; AVX2-NEXT: retq 1404*9880d681SAndroid Build Coastguard Worker %vecext = extractelement <16 x i16> %a, i32 0 1405*9880d681SAndroid Build Coastguard Worker %vecext1 = extractelement <16 x i16> %a, i32 1 1406*9880d681SAndroid Build Coastguard Worker %add = add i16 %vecext, %vecext1 1407*9880d681SAndroid Build Coastguard Worker %vecinit = insertelement <16 x i16> undef, i16 %add, i32 0 1408*9880d681SAndroid Build Coastguard Worker %vecext4 = extractelement <16 x i16> %a, i32 2 1409*9880d681SAndroid Build Coastguard Worker %vecext6 = extractelement <16 x i16> %a, i32 3 1410*9880d681SAndroid Build Coastguard Worker %add8 = add i16 %vecext4, %vecext6 1411*9880d681SAndroid Build Coastguard Worker %vecinit10 = insertelement <16 x i16> %vecinit, i16 %add8, i32 1 1412*9880d681SAndroid Build Coastguard Worker %vecext11 = extractelement <16 x i16> %a, i32 4 1413*9880d681SAndroid Build Coastguard Worker %vecext13 = extractelement <16 x i16> %a, i32 5 1414*9880d681SAndroid Build Coastguard Worker %add15 = add i16 %vecext11, %vecext13 1415*9880d681SAndroid Build Coastguard Worker %vecinit17 = insertelement <16 x i16> %vecinit10, i16 %add15, i32 2 1416*9880d681SAndroid Build Coastguard Worker %vecext18 = extractelement <16 x i16> %a, i32 6 1417*9880d681SAndroid Build Coastguard Worker %vecext20 = extractelement <16 x i16> %a, i32 7 1418*9880d681SAndroid Build Coastguard Worker %add22 = add i16 %vecext18, %vecext20 1419*9880d681SAndroid Build Coastguard Worker %vecinit24 = insertelement <16 x i16> %vecinit17, i16 %add22, i32 3 1420*9880d681SAndroid Build Coastguard Worker %vecext25 = extractelement <16 x i16> %a, i32 8 1421*9880d681SAndroid Build Coastguard Worker %vecext27 = extractelement <16 x i16> %a, i32 9 1422*9880d681SAndroid Build Coastguard Worker %add29 = add i16 %vecext25, %vecext27 1423*9880d681SAndroid Build Coastguard Worker %vecinit31 = insertelement <16 x i16> %vecinit24, i16 %add29, i32 8 1424*9880d681SAndroid Build Coastguard Worker %vecext32 = extractelement <16 x i16> %a, i32 10 1425*9880d681SAndroid Build Coastguard Worker %vecext34 = extractelement <16 x i16> %a, i32 11 1426*9880d681SAndroid Build Coastguard Worker %add36 = add i16 %vecext32, %vecext34 1427*9880d681SAndroid Build Coastguard Worker %vecinit38 = insertelement <16 x i16> %vecinit31, i16 %add36, i32 9 1428*9880d681SAndroid Build Coastguard Worker %vecext39 = extractelement <16 x i16> %a, i32 12 1429*9880d681SAndroid Build Coastguard Worker %vecext41 = extractelement <16 x i16> %a, i32 13 1430*9880d681SAndroid Build Coastguard Worker %add43 = add i16 %vecext39, %vecext41 1431*9880d681SAndroid Build Coastguard Worker %vecinit45 = insertelement <16 x i16> %vecinit38, i16 %add43, i32 10 1432*9880d681SAndroid Build Coastguard Worker %vecext46 = extractelement <16 x i16> %a, i32 14 1433*9880d681SAndroid Build Coastguard Worker %vecext48 = extractelement <16 x i16> %a, i32 15 1434*9880d681SAndroid Build Coastguard Worker %add50 = add i16 %vecext46, %vecext48 1435*9880d681SAndroid Build Coastguard Worker %vecinit52 = insertelement <16 x i16> %vecinit45, i16 %add50, i32 11 1436*9880d681SAndroid Build Coastguard Worker %vecext53 = extractelement <16 x i16> %b, i32 0 1437*9880d681SAndroid Build Coastguard Worker %vecext55 = extractelement <16 x i16> %b, i32 1 1438*9880d681SAndroid Build Coastguard Worker %add57 = add i16 %vecext53, %vecext55 1439*9880d681SAndroid Build Coastguard Worker %vecinit59 = insertelement <16 x i16> %vecinit52, i16 %add57, i32 4 1440*9880d681SAndroid Build Coastguard Worker %vecext60 = extractelement <16 x i16> %b, i32 2 1441*9880d681SAndroid Build Coastguard Worker %vecext62 = extractelement <16 x i16> %b, i32 3 1442*9880d681SAndroid Build Coastguard Worker %add64 = add i16 %vecext60, %vecext62 1443*9880d681SAndroid Build Coastguard Worker %vecinit66 = insertelement <16 x i16> %vecinit59, i16 %add64, i32 5 1444*9880d681SAndroid Build Coastguard Worker %vecext67 = extractelement <16 x i16> %b, i32 4 1445*9880d681SAndroid Build Coastguard Worker %vecext69 = extractelement <16 x i16> %b, i32 5 1446*9880d681SAndroid Build Coastguard Worker %add71 = add i16 %vecext67, %vecext69 1447*9880d681SAndroid Build Coastguard Worker %vecinit73 = insertelement <16 x i16> %vecinit66, i16 %add71, i32 6 1448*9880d681SAndroid Build Coastguard Worker %vecext74 = extractelement <16 x i16> %b, i32 6 1449*9880d681SAndroid Build Coastguard Worker %vecext76 = extractelement <16 x i16> %b, i32 7 1450*9880d681SAndroid Build Coastguard Worker %add78 = add i16 %vecext74, %vecext76 1451*9880d681SAndroid Build Coastguard Worker %vecinit80 = insertelement <16 x i16> %vecinit73, i16 %add78, i32 7 1452*9880d681SAndroid Build Coastguard Worker %vecext81 = extractelement <16 x i16> %b, i32 8 1453*9880d681SAndroid Build Coastguard Worker %vecext83 = extractelement <16 x i16> %b, i32 9 1454*9880d681SAndroid Build Coastguard Worker %add85 = add i16 %vecext81, %vecext83 1455*9880d681SAndroid Build Coastguard Worker %vecinit87 = insertelement <16 x i16> %vecinit80, i16 %add85, i32 12 1456*9880d681SAndroid Build Coastguard Worker %vecext88 = extractelement <16 x i16> %b, i32 10 1457*9880d681SAndroid Build Coastguard Worker %vecext90 = extractelement <16 x i16> %b, i32 11 1458*9880d681SAndroid Build Coastguard Worker %add92 = add i16 %vecext88, %vecext90 1459*9880d681SAndroid Build Coastguard Worker %vecinit94 = insertelement <16 x i16> %vecinit87, i16 %add92, i32 13 1460*9880d681SAndroid Build Coastguard Worker %vecext95 = extractelement <16 x i16> %b, i32 12 1461*9880d681SAndroid Build Coastguard Worker %vecext97 = extractelement <16 x i16> %b, i32 13 1462*9880d681SAndroid Build Coastguard Worker %add99 = add i16 %vecext95, %vecext97 1463*9880d681SAndroid Build Coastguard Worker %vecinit101 = insertelement <16 x i16> %vecinit94, i16 %add99, i32 14 1464*9880d681SAndroid Build Coastguard Worker %vecext102 = extractelement <16 x i16> %b, i32 14 1465*9880d681SAndroid Build Coastguard Worker %vecext104 = extractelement <16 x i16> %b, i32 15 1466*9880d681SAndroid Build Coastguard Worker %add106 = add i16 %vecext102, %vecext104 1467*9880d681SAndroid Build Coastguard Worker %vecinit108 = insertelement <16 x i16> %vecinit101, i16 %add106, i32 15 1468*9880d681SAndroid Build Coastguard Worker ret <16 x i16> %vecinit108 1469*9880d681SAndroid Build Coastguard Worker} 1470