1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s 3*9880d681SAndroid Build Coastguard Worker 4*9880d681SAndroid Build Coastguard Workerdeclare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) 5*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) 6*9880d681SAndroid Build Coastguard Workerdeclare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) 7*9880d681SAndroid Build Coastguard Workerdeclare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) 8*9880d681SAndroid Build Coastguard Worker 9*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) { 10*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_pslldq: 11*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 12*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 13*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 14*9880d681SAndroid Build Coastguard Worker %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 15*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 16*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %2 17*9880d681SAndroid Build Coastguard Worker} 18*9880d681SAndroid Build Coastguard Worker 19*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) { 20*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_psrldq: 21*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 22*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0 23*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 24*9880d681SAndroid Build Coastguard Worker %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 25*9880d681SAndroid Build Coastguard Worker %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 26*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %2 27*9880d681SAndroid Build Coastguard Worker} 28*9880d681SAndroid Build Coastguard Worker 29*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) { 30*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_vpermd: 31*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 32*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] 33*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 34*9880d681SAndroid Build Coastguard Worker %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) 35*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8> 36*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> 37*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %tmp2 38*9880d681SAndroid Build Coastguard Worker} 39*9880d681SAndroid Build Coastguard Worker 40*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) { 41*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_vpermps: 42*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 43*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18] 44*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 45*9880d681SAndroid Build Coastguard Worker %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>) 46*9880d681SAndroid Build Coastguard Worker %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8> 47*9880d681SAndroid Build Coastguard Worker %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30> 48*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %tmp2 49*9880d681SAndroid Build Coastguard Worker} 50*9880d681SAndroid Build Coastguard Worker 51*9880d681SAndroid Build Coastguard Workerdefine <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) { 52*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128: 53*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero 55*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0 56*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 57*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 58*9880d681SAndroid Build Coastguard Worker %2 = bitcast <4 x i64> %1 to <32 x i8> 59*9880d681SAndroid Build Coastguard Worker %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) 60*9880d681SAndroid Build Coastguard Worker %4 = bitcast <32 x i8> %3 to <4 x i64> 61*9880d681SAndroid Build Coastguard Worker %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3> 62*9880d681SAndroid Build Coastguard Worker ret <4 x i64> %5 63*9880d681SAndroid Build Coastguard Worker} 64*9880d681SAndroid Build Coastguard Worker 65*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) { 66*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permq_pshufb_as_vpblendd: 67*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 68*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1 69*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 70*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 71*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 72*9880d681SAndroid Build Coastguard Worker %2 = bitcast <4 x i64> %1 to <32 x i8> 73*9880d681SAndroid Build Coastguard Worker %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>) 74*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %3 75*9880d681SAndroid Build Coastguard Worker} 76*9880d681SAndroid Build Coastguard Worker 77*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) { 78*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128: 79*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 80*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 81*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 82*9880d681SAndroid Build Coastguard Worker %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer) 83*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %1 84*9880d681SAndroid Build Coastguard Worker} 85*9880d681SAndroid Build Coastguard Worker 86*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) { 87*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256: 88*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 89*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 90*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 92*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 93*9880d681SAndroid Build Coastguard Worker %2 = bitcast <4 x i64> %1 to <32 x i8> 94*9880d681SAndroid Build Coastguard Worker %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer) 95*9880d681SAndroid Build Coastguard Worker %4 = bitcast <32 x i8> %3 to <8 x i32> 96*9880d681SAndroid Build Coastguard Worker %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) 97*9880d681SAndroid Build Coastguard Worker %6 = bitcast <8 x i32> %5 to <32 x i8> 98*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %6 99*9880d681SAndroid Build Coastguard Worker} 100*9880d681SAndroid Build Coastguard Worker 101*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) { 102*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128: 103*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 104*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 105*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 106*9880d681SAndroid Build Coastguard Worker %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) 107*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %1 108*9880d681SAndroid Build Coastguard Worker} 109*9880d681SAndroid Build Coastguard Worker 110*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) { 111*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256: 112*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 113*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 114*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 115*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 116*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 117*9880d681SAndroid Build Coastguard Worker %2 = bitcast <4 x i64> %1 to <32 x i8> 118*9880d681SAndroid Build Coastguard Worker %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) 119*9880d681SAndroid Build Coastguard Worker %4 = bitcast <32 x i8> %3 to <8 x i32> 120*9880d681SAndroid Build Coastguard Worker %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer) 121*9880d681SAndroid Build Coastguard Worker %6 = bitcast <8 x i32> %5 to <32 x i8> 122*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %6 123*9880d681SAndroid Build Coastguard Worker} 124*9880d681SAndroid Build Coastguard Worker 125*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) { 126*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128: 127*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 128*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 131*9880d681SAndroid Build Coastguard Worker %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) 132*9880d681SAndroid Build Coastguard Worker %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3> 133*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %2 134*9880d681SAndroid Build Coastguard Worker} 135*9880d681SAndroid Build Coastguard Worker 136*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) { 137*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastd256: 138*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 140*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 141*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 142*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 143*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 144*9880d681SAndroid Build Coastguard Worker %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer) 145*9880d681SAndroid Build Coastguard Worker %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 146*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %3 147*9880d681SAndroid Build Coastguard Worker} 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Workerdefine <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) { 150*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128: 151*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 152*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 153*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 154*9880d681SAndroid Build Coastguard Worker %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>) 155*9880d681SAndroid Build Coastguard Worker ret <16 x i8> %1 156*9880d681SAndroid Build Coastguard Worker} 157*9880d681SAndroid Build Coastguard Worker 158*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) { 159*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastq256: 160*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 161*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 162*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 163*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 164*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 165*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 166*9880d681SAndroid Build Coastguard Worker %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) 167*9880d681SAndroid Build Coastguard Worker %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 168*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %3 169*9880d681SAndroid Build Coastguard Worker} 170*9880d681SAndroid Build Coastguard Worker 171*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) { 172*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128: 173*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 174*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 175*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 176*9880d681SAndroid Build Coastguard Worker %1 = bitcast <4 x float> %a to <16 x i8> 177*9880d681SAndroid Build Coastguard Worker %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>) 178*9880d681SAndroid Build Coastguard Worker %3 = bitcast <16 x i8> %2 to <4 x float> 179*9880d681SAndroid Build Coastguard Worker ret <4 x float> %3 180*9880d681SAndroid Build Coastguard Worker} 181*9880d681SAndroid Build Coastguard Worker 182*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) { 183*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastss256: 184*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 185*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 186*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 187*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 188*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 189*9880d681SAndroid Build Coastguard Worker %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer) 190*9880d681SAndroid Build Coastguard Worker ret <8 x float> %2 191*9880d681SAndroid Build Coastguard Worker} 192*9880d681SAndroid Build Coastguard Worker 193*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) { 194*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_vpbroadcastsd256: 195*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 196*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> 197*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 198*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 199*9880d681SAndroid Build Coastguard Worker %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 200*9880d681SAndroid Build Coastguard Worker %2 = bitcast <4 x double> %1 to <8 x float> 201*9880d681SAndroid Build Coastguard Worker %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>) 202*9880d681SAndroid Build Coastguard Worker %4 = bitcast <8 x float> %3 to <4 x double> 203*9880d681SAndroid Build Coastguard Worker ret <4 x double> %4 204*9880d681SAndroid Build Coastguard Worker} 205*9880d681SAndroid Build Coastguard Worker 206*9880d681SAndroid Build Coastguard Workerdefine <8 x i32> @combine_permd_as_permq(<8 x i32> %a) { 207*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permd_as_permq: 208*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 209*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1] 210*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 211*9880d681SAndroid Build Coastguard Worker %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>) 212*9880d681SAndroid Build Coastguard Worker ret <8 x i32> %1 213*9880d681SAndroid Build Coastguard Worker} 214*9880d681SAndroid Build Coastguard Worker 215*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @combine_permps_as_permpd(<8 x float> %a) { 216*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_permps_as_permpd: 217*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 218*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1] 219*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 220*9880d681SAndroid Build Coastguard Worker %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>) 221*9880d681SAndroid Build Coastguard Worker ret <8 x float> %1 222*9880d681SAndroid Build Coastguard Worker} 223*9880d681SAndroid Build Coastguard Worker 224*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) { 225*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pslldq: 226*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 227*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21] 228*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 229*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>) 230*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res0 231*9880d681SAndroid Build Coastguard Worker} 232*9880d681SAndroid Build Coastguard Worker 233*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) { 234*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_psrldq: 235*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 236*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 237*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 238*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>) 239*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res0 240*9880d681SAndroid Build Coastguard Worker} 241*9880d681SAndroid Build Coastguard Worker 242*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) { 243*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pshuflw: 244*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 245*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] 246*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 247*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 248*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res0 249*9880d681SAndroid Build Coastguard Worker} 250*9880d681SAndroid Build Coastguard Worker 251*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) { 252*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_as_pshufhw: 253*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 254*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] 255*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 256*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 257*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res0 258*9880d681SAndroid Build Coastguard Worker} 259*9880d681SAndroid Build Coastguard Worker 260*9880d681SAndroid Build Coastguard Workerdefine <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { 261*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: combine_pshufb_not_as_pshufw: 262*9880d681SAndroid Build Coastguard Worker; CHECK: # BB#0: 263*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29] 264*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: retq 265*9880d681SAndroid Build Coastguard Worker %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>) 266*9880d681SAndroid Build Coastguard Worker %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>) 267*9880d681SAndroid Build Coastguard Worker ret <32 x i8> %res1 268*9880d681SAndroid Build Coastguard Worker} 269