1*9880d681SAndroid Build Coastguard Worker; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2*9880d681SAndroid Build Coastguard Worker; RUN: opt < %s -instcombine -S | FileCheck %s 3*9880d681SAndroid Build Coastguard Workertarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Worker; Verify that instcombine is able to fold identity shuffles. 6*9880d681SAndroid Build Coastguard Worker 7*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) { 8*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @identity_test_vpermilvar_ps( 9*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x float> %v 10*9880d681SAndroid Build Coastguard Worker; 11*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>) 12*9880d681SAndroid Build Coastguard Worker ret <4 x float> %a 13*9880d681SAndroid Build Coastguard Worker} 14*9880d681SAndroid Build Coastguard Worker 15*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) { 16*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @identity_test_vpermilvar_ps_256( 17*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <8 x float> %v 18*9880d681SAndroid Build Coastguard Worker; 19*9880d681SAndroid Build Coastguard Worker %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>) 20*9880d681SAndroid Build Coastguard Worker ret <8 x float> %a 21*9880d681SAndroid Build Coastguard Worker} 22*9880d681SAndroid Build Coastguard Worker 23*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) { 24*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @identity_test_vpermilvar_pd( 25*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <2 x double> %v 26*9880d681SAndroid Build Coastguard Worker; 27*9880d681SAndroid Build Coastguard Worker %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>) 28*9880d681SAndroid Build Coastguard Worker ret <2 x double> %a 29*9880d681SAndroid Build Coastguard Worker} 30*9880d681SAndroid Build Coastguard Worker 31*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) { 32*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @identity_test_vpermilvar_pd_256( 33*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x double> %v 34*9880d681SAndroid Build Coastguard Worker; 35*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>) 36*9880d681SAndroid Build Coastguard Worker ret <4 x double> %a 37*9880d681SAndroid Build Coastguard Worker} 38*9880d681SAndroid Build Coastguard Worker 39*9880d681SAndroid Build Coastguard Worker; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector 40*9880d681SAndroid Build Coastguard Worker; with a shuffle mask of all zeroes. 41*9880d681SAndroid Build Coastguard Worker 42*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) { 43*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @zero_test_vpermilvar_ps_zero( 44*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer 45*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x float> [[TMP1]] 46*9880d681SAndroid Build Coastguard Worker; 47*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer) 48*9880d681SAndroid Build Coastguard Worker ret <4 x float> %a 49*9880d681SAndroid Build Coastguard Worker} 50*9880d681SAndroid Build Coastguard Worker 51*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) { 52*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero( 53*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4> 54*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <8 x float> [[TMP1]] 55*9880d681SAndroid Build Coastguard Worker; 56*9880d681SAndroid Build Coastguard Worker %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer) 57*9880d681SAndroid Build Coastguard Worker ret <8 x float> %a 58*9880d681SAndroid Build Coastguard Worker} 59*9880d681SAndroid Build Coastguard Worker 60*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) { 61*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @zero_test_vpermilvar_pd_zero( 62*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer 63*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <2 x double> [[TMP1]] 64*9880d681SAndroid Build Coastguard Worker; 65*9880d681SAndroid Build Coastguard Worker %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer) 66*9880d681SAndroid Build Coastguard Worker ret <2 x double> %a 67*9880d681SAndroid Build Coastguard Worker} 68*9880d681SAndroid Build Coastguard Worker 69*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) { 70*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero( 71*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 72*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x double> [[TMP1]] 73*9880d681SAndroid Build Coastguard Worker; 74*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer) 75*9880d681SAndroid Build Coastguard Worker ret <4 x double> %a 76*9880d681SAndroid Build Coastguard Worker} 77*9880d681SAndroid Build Coastguard Worker 78*9880d681SAndroid Build Coastguard Worker; Verify that instcombine is able to fold constant shuffles. 79*9880d681SAndroid Build Coastguard Worker 80*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_vpermilvar_ps(<4 x float> %v) { 81*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @test_vpermilvar_ps( 82*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 83*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x float> [[TMP1]] 84*9880d681SAndroid Build Coastguard Worker; 85*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>) 86*9880d681SAndroid Build Coastguard Worker ret <4 x float> %a 87*9880d681SAndroid Build Coastguard Worker} 88*9880d681SAndroid Build Coastguard Worker 89*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) { 90*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @test_vpermilvar_ps_256( 91*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 92*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <8 x float> [[TMP1]] 93*9880d681SAndroid Build Coastguard Worker; 94*9880d681SAndroid Build Coastguard Worker %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>) 95*9880d681SAndroid Build Coastguard Worker ret <8 x float> %a 96*9880d681SAndroid Build Coastguard Worker} 97*9880d681SAndroid Build Coastguard Worker 98*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_vpermilvar_pd(<2 x double> %v) { 99*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @test_vpermilvar_pd( 100*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0> 101*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <2 x double> [[TMP1]] 102*9880d681SAndroid Build Coastguard Worker; 103*9880d681SAndroid Build Coastguard Worker %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>) 104*9880d681SAndroid Build Coastguard Worker ret <2 x double> %a 105*9880d681SAndroid Build Coastguard Worker} 106*9880d681SAndroid Build Coastguard Worker 107*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) { 108*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @test_vpermilvar_pd_256( 109*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 110*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x double> [[TMP1]] 111*9880d681SAndroid Build Coastguard Worker; 112*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>) 113*9880d681SAndroid Build Coastguard Worker ret <4 x double> %a 114*9880d681SAndroid Build Coastguard Worker} 115*9880d681SAndroid Build Coastguard Worker 116*9880d681SAndroid Build Coastguard Worker; Verify that instcombine is able to fold constant shuffles with undef mask elements. 117*9880d681SAndroid Build Coastguard Worker 118*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) { 119*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @undef_test_vpermilvar_ps( 120*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef> 121*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x float> [[TMP1]] 122*9880d681SAndroid Build Coastguard Worker; 123*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>) 124*9880d681SAndroid Build Coastguard Worker ret <4 x float> %a 125*9880d681SAndroid Build Coastguard Worker} 126*9880d681SAndroid Build Coastguard Worker 127*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) { 128*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @undef_test_vpermilvar_ps_256( 129*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4> 130*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <8 x float> [[TMP1]] 131*9880d681SAndroid Build Coastguard Worker; 132*9880d681SAndroid Build Coastguard Worker %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>) 133*9880d681SAndroid Build Coastguard Worker ret <8 x float> %a 134*9880d681SAndroid Build Coastguard Worker} 135*9880d681SAndroid Build Coastguard Worker 136*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) { 137*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @undef_test_vpermilvar_pd( 138*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0> 139*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <2 x double> [[TMP1]] 140*9880d681SAndroid Build Coastguard Worker; 141*9880d681SAndroid Build Coastguard Worker %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>) 142*9880d681SAndroid Build Coastguard Worker ret <2 x double> %a 143*9880d681SAndroid Build Coastguard Worker} 144*9880d681SAndroid Build Coastguard Worker 145*9880d681SAndroid Build Coastguard Workerdefine <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) { 146*9880d681SAndroid Build Coastguard Worker; CHECK-LABEL: @undef_test_vpermilvar_pd_256( 147*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef> 148*9880d681SAndroid Build Coastguard Worker; CHECK-NEXT: ret <4 x double> [[TMP1]] 149*9880d681SAndroid Build Coastguard Worker; 150*9880d681SAndroid Build Coastguard Worker %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>) 151*9880d681SAndroid Build Coastguard Worker ret <4 x double> %a 152*9880d681SAndroid Build Coastguard Worker} 153*9880d681SAndroid Build Coastguard Worker 154*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) 155*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) 156*9880d681SAndroid Build Coastguard Worker 157*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) 158*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) 159