1*9880d681SAndroid Build Coastguard Worker; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s --check-prefix=AVX2 2*9880d681SAndroid Build Coastguard Worker; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck %s --check-prefix=KNL 3*9880d681SAndroid Build Coastguard Worker; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze < %s | FileCheck %s --check-prefix=SKX 4*9880d681SAndroid Build Coastguard Worker 5*9880d681SAndroid Build Coastguard Worker 6*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test1 7*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 4 {{.*}}.masked 8*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) { 9*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <2 x i64> %trigger, zeroinitializer 10*9880d681SAndroid Build Coastguard Worker %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) 11*9880d681SAndroid Build Coastguard Worker ret <2 x double> %res 12*9880d681SAndroid Build Coastguard Worker} 13*9880d681SAndroid Build Coastguard Worker 14*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test2 15*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 4 {{.*}}.masked 16*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) { 17*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <4 x i32> %trigger, zeroinitializer 18*9880d681SAndroid Build Coastguard Worker %res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) 19*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %res 20*9880d681SAndroid Build Coastguard Worker} 21*9880d681SAndroid Build Coastguard Worker 22*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test3 23*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 4 {{.*}}.masked 24*9880d681SAndroid Build Coastguard Workerdefine void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) { 25*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <4 x i32> %trigger, zeroinitializer 26*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask) 27*9880d681SAndroid Build Coastguard Worker ret void 28*9880d681SAndroid Build Coastguard Worker} 29*9880d681SAndroid Build Coastguard Worker 30*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test4 31*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 4 {{.*}}.masked 32*9880d681SAndroid Build Coastguard Workerdefine <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) { 33*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <8 x i32> %trigger, zeroinitializer 34*9880d681SAndroid Build Coastguard Worker %res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) 35*9880d681SAndroid Build Coastguard Worker ret <8 x float> %res 36*9880d681SAndroid Build Coastguard Worker} 37*9880d681SAndroid Build Coastguard Worker 38*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test5 39*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 5 {{.*}}.masked 40*9880d681SAndroid Build Coastguard Workerdefine void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) { 41*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <2 x i32> %trigger, zeroinitializer 42*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v2f32.p0v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask) 43*9880d681SAndroid Build Coastguard Worker ret void 44*9880d681SAndroid Build Coastguard Worker} 45*9880d681SAndroid Build Coastguard Worker 46*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test6 47*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 6 {{.*}}.masked 48*9880d681SAndroid Build Coastguard Workerdefine void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) { 49*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <2 x i32> %trigger, zeroinitializer 50*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask) 51*9880d681SAndroid Build Coastguard Worker ret void 52*9880d681SAndroid Build Coastguard Worker} 53*9880d681SAndroid Build Coastguard Worker 54*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test7 55*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 5 {{.*}}.masked 56*9880d681SAndroid Build Coastguard Workerdefine <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) { 57*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <2 x i32> %trigger, zeroinitializer 58*9880d681SAndroid Build Coastguard Worker %res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) 59*9880d681SAndroid Build Coastguard Worker ret <2 x float> %res 60*9880d681SAndroid Build Coastguard Worker} 61*9880d681SAndroid Build Coastguard Worker 62*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test8 63*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 6 {{.*}}.masked 64*9880d681SAndroid Build Coastguard Workerdefine <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) { 65*9880d681SAndroid Build Coastguard Worker %mask = icmp eq <2 x i32> %trigger, zeroinitializer 66*9880d681SAndroid Build Coastguard Worker %res = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) 67*9880d681SAndroid Build Coastguard Worker ret <2 x i32> %res 68*9880d681SAndroid Build Coastguard Worker} 69*9880d681SAndroid Build Coastguard Worker 70*9880d681SAndroid Build Coastguard Workerdefine <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0) { 71*9880d681SAndroid Build Coastguard Worker 72*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_2f64 73*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 7 {{.*}}.gather 74*9880d681SAndroid Build Coastguard Worker 75*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_2f64 76*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 7 {{.*}}.gather 77*9880d681SAndroid Build Coastguard Worker 78*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_2f64 79*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 7 {{.*}}.gather 80*9880d681SAndroid Build Coastguard Worker 81*9880d681SAndroid Build Coastguard Worker%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) 82*9880d681SAndroid Build Coastguard Worker ret <2 x double> %res 83*9880d681SAndroid Build Coastguard Worker} 84*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) 85*9880d681SAndroid Build Coastguard Worker 86*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { 87*9880d681SAndroid Build Coastguard Worker 88*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_4i32 89*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 16 {{.*}}.gather 90*9880d681SAndroid Build Coastguard Worker 91*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_4i32 92*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 16 {{.*}}.gather 93*9880d681SAndroid Build Coastguard Worker 94*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_4i32 95*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 6 {{.*}}.gather 96*9880d681SAndroid Build Coastguard Worker 97*9880d681SAndroid Build Coastguard Worker%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) 98*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %res 99*9880d681SAndroid Build Coastguard Worker} 100*9880d681SAndroid Build Coastguard Worker 101*9880d681SAndroid Build Coastguard Workerdefine <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0) { 102*9880d681SAndroid Build Coastguard Worker 103*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_4i32_const_mask 104*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 8 {{.*}}.gather 105*9880d681SAndroid Build Coastguard Worker 106*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_4i32_const_mask 107*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 8 {{.*}}.gather 108*9880d681SAndroid Build Coastguard Worker 109*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_4i32_const_mask 110*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 6 {{.*}}.gather 111*9880d681SAndroid Build Coastguard Worker 112*9880d681SAndroid Build Coastguard Worker%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) 113*9880d681SAndroid Build Coastguard Worker ret <4 x i32> %res 114*9880d681SAndroid Build Coastguard Worker} 115*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0) 116*9880d681SAndroid Build Coastguard Worker 117*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) { 118*9880d681SAndroid Build Coastguard Worker 119*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_16f32_const_mask 120*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 30 {{.*}}.gather 121*9880d681SAndroid Build Coastguard Worker 122*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_16f32_const_mask 123*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 18 {{.*}}.gather 124*9880d681SAndroid Build Coastguard Worker 125*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_16f32_const_mask 126*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 18 {{.*}}.gather 127*9880d681SAndroid Build Coastguard Worker 128*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <16 x i32> %ind to <16 x i64> 129*9880d681SAndroid Build Coastguard Worker %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 130*9880d681SAndroid Build Coastguard Worker 131*9880d681SAndroid Build Coastguard Worker %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 132*9880d681SAndroid Build Coastguard Worker ret <16 x float>%res 133*9880d681SAndroid Build Coastguard Worker} 134*9880d681SAndroid Build Coastguard Worker 135*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) { 136*9880d681SAndroid Build Coastguard Worker 137*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_16f32_var_mask 138*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 62 {{.*}}.gather 139*9880d681SAndroid Build Coastguard Worker 140*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_16f32_var_mask 141*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 18 {{.*}}.gather 142*9880d681SAndroid Build Coastguard Worker 143*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_16f32_var_mask 144*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 18 {{.*}}.gather 145*9880d681SAndroid Build Coastguard Worker 146*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <16 x i32> %ind to <16 x i64> 147*9880d681SAndroid Build Coastguard Worker %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind 148*9880d681SAndroid Build Coastguard Worker 149*9880d681SAndroid Build Coastguard Worker %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 150*9880d681SAndroid Build Coastguard Worker ret <16 x float>%res 151*9880d681SAndroid Build Coastguard Worker} 152*9880d681SAndroid Build Coastguard Worker 153*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { 154*9880d681SAndroid Build Coastguard Worker 155*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_16f32_ra_var_mask 156*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 62 {{.*}}.gather 157*9880d681SAndroid Build Coastguard Worker 158*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_16f32_ra_var_mask 159*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 20 {{.*}}.gather 160*9880d681SAndroid Build Coastguard Worker 161*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_16f32_ra_var_mask 162*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 20 {{.*}}.gather 163*9880d681SAndroid Build Coastguard Worker 164*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <16 x i32> %ind to <16 x i64> 165*9880d681SAndroid Build Coastguard Worker %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind 166*9880d681SAndroid Build Coastguard Worker 167*9880d681SAndroid Build Coastguard Worker %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) 168*9880d681SAndroid Build Coastguard Worker ret <16 x float>%res 169*9880d681SAndroid Build Coastguard Worker} 170*9880d681SAndroid Build Coastguard Worker 171*9880d681SAndroid Build Coastguard Workerdefine <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) { 172*9880d681SAndroid Build Coastguard Worker 173*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_16f32_const_mask2 174*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 30 {{.*}}.gather 175*9880d681SAndroid Build Coastguard Worker 176*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_16f32_const_mask2 177*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 18 {{.*}}.gather 178*9880d681SAndroid Build Coastguard Worker 179*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_16f32_const_mask2 180*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 18 {{.*}}.gather 181*9880d681SAndroid Build Coastguard Worker 182*9880d681SAndroid Build Coastguard Worker %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 183*9880d681SAndroid Build Coastguard Worker %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 184*9880d681SAndroid Build Coastguard Worker 185*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <16 x i32> %ind to <16 x i64> 186*9880d681SAndroid Build Coastguard Worker %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 187*9880d681SAndroid Build Coastguard Worker 188*9880d681SAndroid Build Coastguard Worker %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 189*9880d681SAndroid Build Coastguard Worker ret <16 x float>%res 190*9880d681SAndroid Build Coastguard Worker} 191*9880d681SAndroid Build Coastguard Worker 192*9880d681SAndroid Build Coastguard Workerdefine void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { 193*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_scatter_16i32 194*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 64 {{.*}}.scatter 195*9880d681SAndroid Build Coastguard Worker 196*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_scatter_16i32 197*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 18 {{.*}}.scatter 198*9880d681SAndroid Build Coastguard Worker 199*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_scatter_16i32 200*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 18 {{.*}}.scatter 201*9880d681SAndroid Build Coastguard Worker 202*9880d681SAndroid Build Coastguard Worker %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 203*9880d681SAndroid Build Coastguard Worker %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 204*9880d681SAndroid Build Coastguard Worker 205*9880d681SAndroid Build Coastguard Worker %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 206*9880d681SAndroid Build Coastguard Worker %imask = bitcast i16 %mask to <16 x i1> 207*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 208*9880d681SAndroid Build Coastguard Worker ret void 209*9880d681SAndroid Build Coastguard Worker} 210*9880d681SAndroid Build Coastguard Worker 211*9880d681SAndroid Build Coastguard Workerdefine void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) { 212*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_scatter_8i32 213*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 32 {{.*}}.scatter 214*9880d681SAndroid Build Coastguard Worker 215*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_scatter_8i32 216*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 10 {{.*}}.scatter 217*9880d681SAndroid Build Coastguard Worker 218*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_scatter_8i32 219*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 10 {{.*}}.scatter 220*9880d681SAndroid Build Coastguard Worker 221*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) 222*9880d681SAndroid Build Coastguard Worker ret void 223*9880d681SAndroid Build Coastguard Worker} 224*9880d681SAndroid Build Coastguard Worker 225*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask) 226*9880d681SAndroid Build Coastguard Worker 227*9880d681SAndroid Build Coastguard Workerdefine void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { 228*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_scatter_4i32 229*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 16 {{.*}}.scatter 230*9880d681SAndroid Build Coastguard Worker 231*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_scatter_4i32 232*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 16 {{.*}}.scatter 233*9880d681SAndroid Build Coastguard Worker 234*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_scatter_4i32 235*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 6 {{.*}}.scatter 236*9880d681SAndroid Build Coastguard Worker 237*9880d681SAndroid Build Coastguard Worker call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 238*9880d681SAndroid Build Coastguard Worker ret void 239*9880d681SAndroid Build Coastguard Worker} 240*9880d681SAndroid Build Coastguard Worker 241*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) { 242*9880d681SAndroid Build Coastguard Worker 243*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_4f32 244*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 15 {{.*}}.gather 245*9880d681SAndroid Build Coastguard Worker 246*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_4f32 247*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 15 {{.*}}.gather 248*9880d681SAndroid Build Coastguard Worker 249*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_4f32 250*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 6 {{.*}}.gather 251*9880d681SAndroid Build Coastguard Worker 252*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <4 x i32> %ind to <4 x i64> 253*9880d681SAndroid Build Coastguard Worker %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 254*9880d681SAndroid Build Coastguard Worker 255*9880d681SAndroid Build Coastguard Worker %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) 256*9880d681SAndroid Build Coastguard Worker ret <4 x float>%res 257*9880d681SAndroid Build Coastguard Worker} 258*9880d681SAndroid Build Coastguard Worker 259*9880d681SAndroid Build Coastguard Workerdefine <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) { 260*9880d681SAndroid Build Coastguard Worker 261*9880d681SAndroid Build Coastguard Worker; AVX2-LABEL: test_gather_4f32_const_mask 262*9880d681SAndroid Build Coastguard Worker; AVX2: Found an estimated cost of 7 {{.*}}.gather 263*9880d681SAndroid Build Coastguard Worker 264*9880d681SAndroid Build Coastguard Worker; KNL-LABEL: test_gather_4f32_const_mask 265*9880d681SAndroid Build Coastguard Worker; KNL: Found an estimated cost of 7 {{.*}}.gather 266*9880d681SAndroid Build Coastguard Worker 267*9880d681SAndroid Build Coastguard Worker; SKX-LABEL: test_gather_4f32_const_mask 268*9880d681SAndroid Build Coastguard Worker; SKX: Found an estimated cost of 6 {{.*}}.gather 269*9880d681SAndroid Build Coastguard Worker 270*9880d681SAndroid Build Coastguard Worker %sext_ind = sext <4 x i32> %ind to <4 x i64> 271*9880d681SAndroid Build Coastguard Worker %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind 272*9880d681SAndroid Build Coastguard Worker 273*9880d681SAndroid Build Coastguard Worker %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) 274*9880d681SAndroid Build Coastguard Worker ret <4 x float>%res 275*9880d681SAndroid Build Coastguard Worker} 276*9880d681SAndroid Build Coastguard Worker 277*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> ) 278*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask) 279*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask) 280*9880d681SAndroid Build Coastguard Workerdeclare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>) 281*9880d681SAndroid Build Coastguard Worker 282*9880d681SAndroid Build Coastguard Workerdeclare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>) 283*9880d681SAndroid Build Coastguard Workerdeclare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) 284*9880d681SAndroid Build Coastguard Workerdeclare <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>) 285*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>) 286*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) 287*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) 288*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>) 289*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>) 290*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) 291*9880d681SAndroid Build Coastguard Workerdeclare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) 292*9880d681SAndroid Build Coastguard Workerdeclare <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>) 293*9880d681SAndroid Build Coastguard Workerdeclare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) 294*9880d681SAndroid Build Coastguard Workerdeclare <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>) 295*9880d681SAndroid Build Coastguard Workerdeclare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) 296*9880d681SAndroid Build Coastguard Workerdeclare <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>) 297*9880d681SAndroid Build Coastguard Workerdeclare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) 298*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) 299*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) 300*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.masked.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>) 301