1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512F 5*9880d681SAndroid Build Coastguard Worker; 6*9880d681SAndroid Build Coastguard Worker; Combine tests involving AVX target shuffles 7*9880d681SAndroid Build Coastguard Worker 8*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) 9*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) 10*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) 11*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) 12*9880d681SAndroid Build Coastguard Worker 13*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) 14*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) 15*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) 16*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) 17*9880d681SAndroid Build Coastguard Worker 18*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) 19*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) 20*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) 21*9880d681SAndroid Build Coastguard Worker 22*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_identity(<4 x float> %a0) { 23*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_identity: 24*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 25*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 26*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 27*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 28*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 29*9880d681SAndroid Build Coastguard Worker} 30*9880d681SAndroid Build Coastguard Worker 31*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movddup(<4 x float> %a0) { 32*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movddup: 33*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 34*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 35*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 36*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 1, i32 0, i32 1>) 37*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 38*9880d681SAndroid Build Coastguard Worker} 39*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movddup_load(<4 x float> *%a0) { 40*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movddup_load: 41*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 42*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 43*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 44*9880d681SAndroid Build Coastguard Worker %1 = load <4 x float>, <4 x float> *%a0 45*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 0, i32 1, i32 0, i32 1>) 46*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 47*9880d681SAndroid Build Coastguard Worker} 48*9880d681SAndroid Build Coastguard Worker 49*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movshdup(<4 x float> %a0) { 50*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movshdup: 51*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 52*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 53*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 54*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 undef, i32 1, i32 3, i32 3>) 55*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 56*9880d681SAndroid Build Coastguard Worker} 57*9880d681SAndroid Build Coastguard Worker 58*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_movsldup(<4 x float> %a0) { 59*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_movsldup: 60*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 61*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] 62*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 63*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 undef>) 64*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 65*9880d681SAndroid Build Coastguard Worker} 66*9880d681SAndroid Build Coastguard Worker 67*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_unpckh(<4 x float> %a0) { 68*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_unpckh: 69*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 70*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3] 71*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 72*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 2, i32 2, i32 3, i32 3>) 73*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 74*9880d681SAndroid Build Coastguard Worker} 75*9880d681SAndroid Build Coastguard Worker 76*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_unpckl(<4 x float> %a0) { 77*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_unpckl: 78*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 79*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1] 80*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 81*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 1, i32 1>) 82*9880d681SAndroid Build Coastguard Worker ret <4 x float> %1 83*9880d681SAndroid Build Coastguard Worker} 84*9880d681SAndroid Build Coastguard Worker 85*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_identity(<8 x float> %a0) { 86*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_identity: 87*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 88*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 89*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 undef>) 90*9880d681SAndroid Build Coastguard Worker %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 2, i32 3, i32 0, i32 1>) 91*9880d681SAndroid Build Coastguard Worker ret <8 x float> %2 92*9880d681SAndroid Build Coastguard Worker} 93*9880d681SAndroid Build Coastguard Worker 94*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_10326u4u(<8 x float> %a0) { 95*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_10326u4u: 96*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 97*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,6,u,4,u] 98*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 99*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 0, i32 1, i32 2, i32 undef>) 100*9880d681SAndroid Build Coastguard Worker %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 undef>) 101*9880d681SAndroid Build Coastguard Worker ret <8 x float> %2 102*9880d681SAndroid Build Coastguard Worker} 103*9880d681SAndroid Build Coastguard Worker 104*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_vperm2f128_8f32(<8 x float> %a0) { 105*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_vperm2f128_8f32: 106*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 107*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 108*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 109*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 110*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 111*9880d681SAndroid Build Coastguard Worker %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 112*9880d681SAndroid Build Coastguard Worker ret <8 x float> %3 113*9880d681SAndroid Build Coastguard Worker} 114*9880d681SAndroid Build Coastguard Worker 115*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_vperm2f128_zero_8f32(<8 x float> %a0) { 116*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_vperm2f128_zero_8f32: 117*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 118*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] 119*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 120*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 121*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <8 x float> %1, <8 x float> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3> 122*9880d681SAndroid Build Coastguard Worker %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 123*9880d681SAndroid Build Coastguard Worker ret <8 x float> %3 124*9880d681SAndroid Build Coastguard Worker} 125*9880d681SAndroid Build Coastguard Worker 126*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vperm2f128_vpermilvar_as_vpblendpd(<4 x double> %a0) { 127*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vperm2f128_vpermilvar_as_vpblendpd: 128*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 129*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 130*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 131*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 132*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>) 133*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <4 x double> %1, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 134*9880d681SAndroid Build Coastguard Worker %3 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %2, <4 x i64> <i64 2, i64 0, i64 2, i64 0>) 135*9880d681SAndroid Build Coastguard Worker ret <4 x double> %3 136*9880d681SAndroid Build Coastguard Worker} 137*9880d681SAndroid Build Coastguard Worker 138*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movddup(<8 x float> %a0) { 139*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movddup: 140*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 141*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 142*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 143*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>) 144*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 145*9880d681SAndroid Build Coastguard Worker} 146*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movddup_load(<8 x float> *%a0) { 147*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movddup_load: 148*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 149*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2] 150*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 151*9880d681SAndroid Build Coastguard Worker %1 = load <8 x float>, <8 x float> *%a0 152*9880d681SAndroid Build Coastguard Worker %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>) 153*9880d681SAndroid Build Coastguard Worker ret <8 x float> %2 154*9880d681SAndroid Build Coastguard Worker} 155*9880d681SAndroid Build Coastguard Worker 156*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movshdup(<8 x float> %a0) { 157*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movshdup: 158*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 159*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] 160*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 161*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 5, i32 7, i32 7>) 162*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 163*9880d681SAndroid Build Coastguard Worker} 164*9880d681SAndroid Build Coastguard Worker 165*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) { 166*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_movsldup: 167*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 168*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] 169*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 170*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>) 171*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 172*9880d681SAndroid Build Coastguard Worker} 173*9880d681SAndroid Build Coastguard Worker 174*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) { 175*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_2f64_identity: 176*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 177*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 178*9880d681SAndroid Build Coastguard Worker %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>) 179*9880d681SAndroid Build Coastguard Worker %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>) 180*9880d681SAndroid Build Coastguard Worker ret <2 x double> %2 181*9880d681SAndroid Build Coastguard Worker} 182*9880d681SAndroid Build Coastguard Worker 183*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @combine_vpermilvar_2f64_movddup(<2 x double> %a0) { 184*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_2f64_movddup: 185*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 186*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 187*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 188*9880d681SAndroid Build Coastguard Worker %1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 0, i64 0>) 189*9880d681SAndroid Build Coastguard Worker ret <2 x double> %1 190*9880d681SAndroid Build Coastguard Worker} 191*9880d681SAndroid Build Coastguard Worker 192*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vpermilvar_4f64_identity(<4 x double> %a0) { 193*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f64_identity: 194*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 195*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 196*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>) 197*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> <i64 2, i64 0, i64 2, i64 0>) 198*9880d681SAndroid Build Coastguard Worker ret <4 x double> %2 199*9880d681SAndroid Build Coastguard Worker} 200*9880d681SAndroid Build Coastguard Worker 201*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_vpermilvar_4f64_movddup(<4 x double> %a0) { 202*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f64_movddup: 203*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 204*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 205*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 206*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 0, i64 0, i64 4, i64 4>) 207*9880d681SAndroid Build Coastguard Worker ret <4 x double> %1 208*9880d681SAndroid Build Coastguard Worker} 209*9880d681SAndroid Build Coastguard Worker 210*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_4stage(<4 x float> %a0) { 211*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_4stage: 212*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 213*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1] 214*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 215*9880d681SAndroid Build Coastguard Worker %1 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 216*9880d681SAndroid Build Coastguard Worker %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> <i32 2, i32 3, i32 0, i32 1>) 217*9880d681SAndroid Build Coastguard Worker %3 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>) 218*9880d681SAndroid Build Coastguard Worker %4 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %3, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 219*9880d681SAndroid Build Coastguard Worker ret <4 x float> %4 220*9880d681SAndroid Build Coastguard Worker} 221*9880d681SAndroid Build Coastguard Worker 222*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_vpermilvar_8f32_4stage(<8 x float> %a0) { 223*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_8f32_4stage: 224*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 225*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5] 226*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 227*9880d681SAndroid Build Coastguard Worker %1 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 228*9880d681SAndroid Build Coastguard Worker %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1>) 229*9880d681SAndroid Build Coastguard Worker %3 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %2, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 0, i32 2, i32 1, i32 3>) 230*9880d681SAndroid Build Coastguard Worker %4 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %3, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>) 231*9880d681SAndroid Build Coastguard Worker ret <8 x float> %4 232*9880d681SAndroid Build Coastguard Worker} 233*9880d681SAndroid Build Coastguard Worker 234*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) { 235*9880d681SAndroid Build Coastguard Worker; ALL-LABEL: combine_vpermilvar_4f32_as_insertps: 236*9880d681SAndroid Build Coastguard Worker; ALL: # BB#0: 237*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[1],zero,xmm0[2],zero 238*9880d681SAndroid Build Coastguard Worker; ALL-NEXT: retq 239*9880d681SAndroid Build Coastguard Worker %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 240*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 1, i32 4> 241*9880d681SAndroid Build Coastguard Worker ret <4 x float> %2 242*9880d681SAndroid Build Coastguard Worker} 243