1*67e74705SXin Li// REQUIRES: amdgpu-registered-target 2*67e74705SXin Li// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s 3*67e74705SXin Li 4*67e74705SXin Li#pragma OPENCL EXTENSION cl_khr_fp64 : enable 5*67e74705SXin Li 6*67e74705SXin Litypedef unsigned long ulong; 7*67e74705SXin Li 8*67e74705SXin Li// CHECK-LABEL: @test_div_scale_f64 9*67e74705SXin Li// CHECK: call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) 10*67e74705SXin Li// CHECK-DAG: [[FLAG:%.+]] = extractvalue { double, i1 } %{{.+}}, 1 11*67e74705SXin Li// CHECK-DAG: [[VAL:%.+]] = extractvalue { double, i1 } %{{.+}}, 0 12*67e74705SXin Li// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 13*67e74705SXin Li// CHECK: store i32 [[FLAGEXT]] 14*67e74705SXin Livoid test_div_scale_f64(global double* out, global int* flagout, double a, double b) 15*67e74705SXin Li{ 16*67e74705SXin Li bool flag; 17*67e74705SXin Li *out = __builtin_amdgcn_div_scale(a, b, true, &flag); 18*67e74705SXin Li *flagout = flag; 19*67e74705SXin Li} 20*67e74705SXin Li 21*67e74705SXin Li// CHECK-LABEL: @test_div_scale_f32 22*67e74705SXin Li// CHECK: call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) 23*67e74705SXin Li// CHECK-DAG: [[FLAG:%.+]] = extractvalue { float, i1 } %{{.+}}, 1 24*67e74705SXin Li// CHECK-DAG: [[VAL:%.+]] = extractvalue { float, i1 } %{{.+}}, 0 25*67e74705SXin Li// CHECK: [[FLAGEXT:%.+]] = zext i1 [[FLAG]] to i32 26*67e74705SXin Li// CHECK: store i32 [[FLAGEXT]] 27*67e74705SXin Livoid test_div_scale_f32(global float* out, global int* flagout, float a, float b) 28*67e74705SXin Li{ 29*67e74705SXin Li bool flag; 30*67e74705SXin Li *out = __builtin_amdgcn_div_scalef(a, b, true, &flag); 31*67e74705SXin Li *flagout = flag; 32*67e74705SXin Li} 33*67e74705SXin Li 34*67e74705SXin Li// CHECK-LABEL: @test_div_fmas_f32 35*67e74705SXin Li// CHECK: call float @llvm.amdgcn.div.fmas.f32 36*67e74705SXin Livoid test_div_fmas_f32(global float* out, float a, float b, float c, int d) 37*67e74705SXin Li{ 38*67e74705SXin Li *out = __builtin_amdgcn_div_fmasf(a, b, c, d); 39*67e74705SXin Li} 40*67e74705SXin Li 41*67e74705SXin Li// CHECK-LABEL: @test_div_fmas_f64 42*67e74705SXin Li// CHECK: call double @llvm.amdgcn.div.fmas.f64 43*67e74705SXin Livoid test_div_fmas_f64(global double* out, double a, double b, double c, int d) 44*67e74705SXin Li{ 45*67e74705SXin Li *out = __builtin_amdgcn_div_fmas(a, b, c, d); 46*67e74705SXin Li} 47*67e74705SXin Li 48*67e74705SXin Li// CHECK-LABEL: @test_div_fixup_f32 49*67e74705SXin Li// CHECK: call float @llvm.amdgcn.div.fixup.f32 50*67e74705SXin Livoid test_div_fixup_f32(global float* out, float a, float b, float c) 51*67e74705SXin Li{ 52*67e74705SXin Li *out = __builtin_amdgcn_div_fixupf(a, b, c); 53*67e74705SXin Li} 54*67e74705SXin Li 55*67e74705SXin Li// CHECK-LABEL: @test_div_fixup_f64 56*67e74705SXin Li// CHECK: call double @llvm.amdgcn.div.fixup.f64 57*67e74705SXin Livoid test_div_fixup_f64(global double* out, double a, double b, double c) 58*67e74705SXin Li{ 59*67e74705SXin Li *out = __builtin_amdgcn_div_fixup(a, b, c); 60*67e74705SXin Li} 61*67e74705SXin Li 62*67e74705SXin Li// CHECK-LABEL: @test_trig_preop_f32 63*67e74705SXin Li// CHECK: call float @llvm.amdgcn.trig.preop.f32 64*67e74705SXin Livoid test_trig_preop_f32(global float* out, float a, int b) 65*67e74705SXin Li{ 66*67e74705SXin Li *out = __builtin_amdgcn_trig_preopf(a, b); 67*67e74705SXin Li} 68*67e74705SXin Li 69*67e74705SXin Li// CHECK-LABEL: @test_trig_preop_f64 70*67e74705SXin Li// CHECK: call double @llvm.amdgcn.trig.preop.f64 71*67e74705SXin Livoid test_trig_preop_f64(global double* out, double a, int b) 72*67e74705SXin Li{ 73*67e74705SXin Li *out = __builtin_amdgcn_trig_preop(a, b); 74*67e74705SXin Li} 75*67e74705SXin Li 76*67e74705SXin Li// CHECK-LABEL: @test_rcp_f32 77*67e74705SXin Li// CHECK: call float @llvm.amdgcn.rcp.f32 78*67e74705SXin Livoid test_rcp_f32(global float* out, float a) 79*67e74705SXin Li{ 80*67e74705SXin Li *out = __builtin_amdgcn_rcpf(a); 81*67e74705SXin Li} 82*67e74705SXin Li 83*67e74705SXin Li// CHECK-LABEL: @test_rcp_f64 84*67e74705SXin Li// CHECK: call double @llvm.amdgcn.rcp.f64 85*67e74705SXin Livoid test_rcp_f64(global double* out, double a) 86*67e74705SXin Li{ 87*67e74705SXin Li *out = __builtin_amdgcn_rcp(a); 88*67e74705SXin Li} 89*67e74705SXin Li 90*67e74705SXin Li// CHECK-LABEL: @test_rsq_f32 91*67e74705SXin Li// CHECK: call float @llvm.amdgcn.rsq.f32 92*67e74705SXin Livoid test_rsq_f32(global float* out, float a) 93*67e74705SXin Li{ 94*67e74705SXin Li *out = __builtin_amdgcn_rsqf(a); 95*67e74705SXin Li} 96*67e74705SXin Li 97*67e74705SXin Li// CHECK-LABEL: @test_rsq_f64 98*67e74705SXin Li// CHECK: call double @llvm.amdgcn.rsq.f64 99*67e74705SXin Livoid test_rsq_f64(global double* out, double a) 100*67e74705SXin Li{ 101*67e74705SXin Li *out = __builtin_amdgcn_rsq(a); 102*67e74705SXin Li} 103*67e74705SXin Li 104*67e74705SXin Li// CHECK-LABEL: @test_rsq_clamp_f32 105*67e74705SXin Li// CHECK: call float @llvm.amdgcn.rsq.clamp.f32 106*67e74705SXin Livoid test_rsq_clamp_f32(global float* out, float a) 107*67e74705SXin Li{ 108*67e74705SXin Li *out = __builtin_amdgcn_rsq_clampf(a); 109*67e74705SXin Li} 110*67e74705SXin Li 111*67e74705SXin Li// CHECK-LABEL: @test_rsq_clamp_f64 112*67e74705SXin Li// CHECK: call double @llvm.amdgcn.rsq.clamp.f64 113*67e74705SXin Livoid test_rsq_clamp_f64(global double* out, double a) 114*67e74705SXin Li{ 115*67e74705SXin Li *out = __builtin_amdgcn_rsq_clamp(a); 116*67e74705SXin Li} 117*67e74705SXin Li 118*67e74705SXin Li// CHECK-LABEL: @test_sin_f32 119*67e74705SXin Li// CHECK: call float @llvm.amdgcn.sin.f32 120*67e74705SXin Livoid test_sin_f32(global float* out, float a) 121*67e74705SXin Li{ 122*67e74705SXin Li *out = __builtin_amdgcn_sinf(a); 123*67e74705SXin Li} 124*67e74705SXin Li 125*67e74705SXin Li// CHECK-LABEL: @test_cos_f32 126*67e74705SXin Li// CHECK: call float @llvm.amdgcn.cos.f32 127*67e74705SXin Livoid test_cos_f32(global float* out, float a) 128*67e74705SXin Li{ 129*67e74705SXin Li *out = __builtin_amdgcn_cosf(a); 130*67e74705SXin Li} 131*67e74705SXin Li 132*67e74705SXin Li// CHECK-LABEL: @test_log_clamp_f32 133*67e74705SXin Li// CHECK: call float @llvm.amdgcn.log.clamp.f32 134*67e74705SXin Livoid test_log_clamp_f32(global float* out, float a) 135*67e74705SXin Li{ 136*67e74705SXin Li *out = __builtin_amdgcn_log_clampf(a); 137*67e74705SXin Li} 138*67e74705SXin Li 139*67e74705SXin Li// CHECK-LABEL: @test_ldexp_f32 140*67e74705SXin Li// CHECK: call float @llvm.amdgcn.ldexp.f32 141*67e74705SXin Livoid test_ldexp_f32(global float* out, float a, int b) 142*67e74705SXin Li{ 143*67e74705SXin Li *out = __builtin_amdgcn_ldexpf(a, b); 144*67e74705SXin Li} 145*67e74705SXin Li 146*67e74705SXin Li// CHECK-LABEL: @test_ldexp_f64 147*67e74705SXin Li// CHECK: call double @llvm.amdgcn.ldexp.f64 148*67e74705SXin Livoid test_ldexp_f64(global double* out, double a, int b) 149*67e74705SXin Li{ 150*67e74705SXin Li *out = __builtin_amdgcn_ldexp(a, b); 151*67e74705SXin Li} 152*67e74705SXin Li 153*67e74705SXin Li// CHECK-LABEL: @test_frexp_mant_f32 154*67e74705SXin Li// CHECK: call float @llvm.amdgcn.frexp.mant.f32 155*67e74705SXin Livoid test_frexp_mant_f32(global float* out, float a) 156*67e74705SXin Li{ 157*67e74705SXin Li *out = __builtin_amdgcn_frexp_mantf(a); 158*67e74705SXin Li} 159*67e74705SXin Li 160*67e74705SXin Li// CHECK-LABEL: @test_frexp_mant_f64 161*67e74705SXin Li// CHECK: call double @llvm.amdgcn.frexp.mant.f64 162*67e74705SXin Livoid test_frexp_mant_f64(global double* out, double a) 163*67e74705SXin Li{ 164*67e74705SXin Li *out = __builtin_amdgcn_frexp_mant(a); 165*67e74705SXin Li} 166*67e74705SXin Li 167*67e74705SXin Li// CHECK-LABEL: @test_frexp_exp_f32 168*67e74705SXin Li// CHECK: call i32 @llvm.amdgcn.frexp.exp.f32 169*67e74705SXin Livoid test_frexp_exp_f32(global int* out, float a) 170*67e74705SXin Li{ 171*67e74705SXin Li *out = __builtin_amdgcn_frexp_expf(a); 172*67e74705SXin Li} 173*67e74705SXin Li 174*67e74705SXin Li// CHECK-LABEL: @test_frexp_exp_f64 175*67e74705SXin Li// CHECK: call i32 @llvm.amdgcn.frexp.exp.f64 176*67e74705SXin Livoid test_frexp_exp_f64(global int* out, double a) 177*67e74705SXin Li{ 178*67e74705SXin Li *out = __builtin_amdgcn_frexp_exp(a); 179*67e74705SXin Li} 180*67e74705SXin Li 181*67e74705SXin Li// CHECK-LABEL: @test_fract_f32 182*67e74705SXin Li// CHECK: call float @llvm.amdgcn.fract.f32 183*67e74705SXin Livoid test_fract_f32(global int* out, float a) 184*67e74705SXin Li{ 185*67e74705SXin Li *out = __builtin_amdgcn_fractf(a); 186*67e74705SXin Li} 187*67e74705SXin Li 188*67e74705SXin Li// CHECK-LABEL: @test_fract_f64 189*67e74705SXin Li// CHECK: call double @llvm.amdgcn.fract.f64 190*67e74705SXin Livoid test_fract_f64(global int* out, double a) 191*67e74705SXin Li{ 192*67e74705SXin Li *out = __builtin_amdgcn_fract(a); 193*67e74705SXin Li} 194*67e74705SXin Li 195*67e74705SXin Li// CHECK-LABEL: @test_class_f32 196*67e74705SXin Li// CHECK: call i1 @llvm.amdgcn.class.f32 197*67e74705SXin Livoid test_class_f32(global float* out, float a, int b) 198*67e74705SXin Li{ 199*67e74705SXin Li *out = __builtin_amdgcn_classf(a, b); 200*67e74705SXin Li} 201*67e74705SXin Li 202*67e74705SXin Li// CHECK-LABEL: @test_class_f64 203*67e74705SXin Li// CHECK: call i1 @llvm.amdgcn.class.f64 204*67e74705SXin Livoid test_class_f64(global double* out, double a, int b) 205*67e74705SXin Li{ 206*67e74705SXin Li *out = __builtin_amdgcn_class(a, b); 207*67e74705SXin Li} 208*67e74705SXin Li 209*67e74705SXin Li// CHECK-LABEL: @test_s_barrier 210*67e74705SXin Li// CHECK: call void @llvm.amdgcn.s.barrier( 211*67e74705SXin Livoid test_s_barrier() 212*67e74705SXin Li{ 213*67e74705SXin Li __builtin_amdgcn_s_barrier(); 214*67e74705SXin Li} 215*67e74705SXin Li 216*67e74705SXin Li// CHECK-LABEL: @test_s_memtime 217*67e74705SXin Li// CHECK: call i64 @llvm.amdgcn.s.memtime() 218*67e74705SXin Livoid test_s_memtime(global ulong* out) 219*67e74705SXin Li{ 220*67e74705SXin Li *out = __builtin_amdgcn_s_memtime(); 221*67e74705SXin Li} 222*67e74705SXin Li 223*67e74705SXin Li// CHECK-LABEL: @test_s_sleep 224*67e74705SXin Li// CHECK: call void @llvm.amdgcn.s.sleep(i32 1) 225*67e74705SXin Li// CHECK: call void @llvm.amdgcn.s.sleep(i32 15) 226*67e74705SXin Livoid test_s_sleep() 227*67e74705SXin Li{ 228*67e74705SXin Li __builtin_amdgcn_s_sleep(1); 229*67e74705SXin Li __builtin_amdgcn_s_sleep(15); 230*67e74705SXin Li} 231*67e74705SXin Li 232*67e74705SXin Li// CHECK-LABEL: @test_cubeid( 233*67e74705SXin Li// CHECK: call float @llvm.amdgcn.cubeid(float %a, float %b, float %c) 234*67e74705SXin Livoid test_cubeid(global float* out, float a, float b, float c) { 235*67e74705SXin Li *out = __builtin_amdgcn_cubeid(a, b, c); 236*67e74705SXin Li} 237*67e74705SXin Li 238*67e74705SXin Li// CHECK-LABEL: @test_cubesc( 239*67e74705SXin Li// CHECK: call float @llvm.amdgcn.cubesc(float %a, float %b, float %c) 240*67e74705SXin Livoid test_cubesc(global float* out, float a, float b, float c) { 241*67e74705SXin Li *out = __builtin_amdgcn_cubesc(a, b, c); 242*67e74705SXin Li} 243*67e74705SXin Li 244*67e74705SXin Li// CHECK-LABEL: @test_cubetc( 245*67e74705SXin Li// CHECK: call float @llvm.amdgcn.cubetc(float %a, float %b, float %c) 246*67e74705SXin Livoid test_cubetc(global float* out, float a, float b, float c) { 247*67e74705SXin Li *out = __builtin_amdgcn_cubetc(a, b, c); 248*67e74705SXin Li} 249*67e74705SXin Li 250*67e74705SXin Li// CHECK-LABEL: @test_cubema( 251*67e74705SXin Li// CHECK: call float @llvm.amdgcn.cubema(float %a, float %b, float %c) 252*67e74705SXin Livoid test_cubema(global float* out, float a, float b, float c) { 253*67e74705SXin Li *out = __builtin_amdgcn_cubema(a, b, c); 254*67e74705SXin Li} 255*67e74705SXin Li 256*67e74705SXin Li// CHECK-LABEL: @test_read_exec( 257*67e74705SXin Li// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]] 258*67e74705SXin Livoid test_read_exec(global ulong* out) { 259*67e74705SXin Li *out = __builtin_amdgcn_read_exec(); 260*67e74705SXin Li} 261*67e74705SXin Li 262*67e74705SXin Li// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] 263*67e74705SXin Li 264*67e74705SXin Li// Legacy intrinsics with AMDGPU prefix 265*67e74705SXin Li 266*67e74705SXin Li// CHECK-LABEL: @test_legacy_rsq_f32 267*67e74705SXin Li// CHECK: call float @llvm.amdgcn.rsq.f32 268*67e74705SXin Livoid test_legacy_rsq_f32(global float* out, float a) 269*67e74705SXin Li{ 270*67e74705SXin Li *out = __builtin_amdgpu_rsqf(a); 271*67e74705SXin Li} 272*67e74705SXin Li 273*67e74705SXin Li// CHECK-LABEL: @test_legacy_rsq_f64 274*67e74705SXin Li// CHECK: call double @llvm.amdgcn.rsq.f64 275*67e74705SXin Livoid test_legacy_rsq_f64(global double* out, double a) 276*67e74705SXin Li{ 277*67e74705SXin Li *out = __builtin_amdgpu_rsq(a); 278*67e74705SXin Li} 279*67e74705SXin Li 280*67e74705SXin Li// CHECK-LABEL: @test_legacy_ldexp_f32 281*67e74705SXin Li// CHECK: call float @llvm.amdgcn.ldexp.f32 282*67e74705SXin Livoid test_legacy_ldexp_f32(global float* out, float a, int b) 283*67e74705SXin Li{ 284*67e74705SXin Li *out = __builtin_amdgpu_ldexpf(a, b); 285*67e74705SXin Li} 286*67e74705SXin Li 287*67e74705SXin Li// CHECK-LABEL: @test_legacy_ldexp_f64 288*67e74705SXin Li// CHECK: call double @llvm.amdgcn.ldexp.f64 289*67e74705SXin Livoid test_legacy_ldexp_f64(global double* out, double a, int b) 290*67e74705SXin Li{ 291*67e74705SXin Li *out = __builtin_amdgpu_ldexp(a, b); 292*67e74705SXin Li} 293*67e74705SXin Li 294*67e74705SXin Li// CHECK-LABEL: @test_kernarg_segment_ptr 295*67e74705SXin Li// CHECK: call i8 addrspace(2)* @llvm.amdgcn.kernarg.segment.ptr() 296*67e74705SXin Livoid test_kernarg_segment_ptr(__attribute__((address_space(2))) unsigned char ** out) 297*67e74705SXin Li{ 298*67e74705SXin Li *out = __builtin_amdgcn_kernarg_segment_ptr(); 299*67e74705SXin Li} 300*67e74705SXin Li 301*67e74705SXin Li// CHECK-LABEL: @test_implicitarg_ptr 302*67e74705SXin Li// CHECK: call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() 303*67e74705SXin Livoid test_implicitarg_ptr(__attribute__((address_space(2))) unsigned char ** out) 304*67e74705SXin Li{ 305*67e74705SXin Li *out = __builtin_amdgcn_implicitarg_ptr(); 306*67e74705SXin Li} 307*67e74705SXin Li 308*67e74705SXin Li// CHECK-LABEL: @test_get_group_id( 309*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.x() 310*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.y() 311*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workgroup.id.z() 312*67e74705SXin Livoid test_get_group_id(int d, global int *out) 313*67e74705SXin Li{ 314*67e74705SXin Li switch (d) { 315*67e74705SXin Li case 0: *out = __builtin_amdgcn_workgroup_id_x(); break; 316*67e74705SXin Li case 1: *out = __builtin_amdgcn_workgroup_id_y(); break; 317*67e74705SXin Li case 2: *out = __builtin_amdgcn_workgroup_id_z(); break; 318*67e74705SXin Li default: *out = 0; 319*67e74705SXin Li } 320*67e74705SXin Li} 321*67e74705SXin Li 322*67e74705SXin Li// CHECK-LABEL: @test_get_local_id( 323*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[WI_RANGE:![0-9]*]] 324*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workitem.id.y(), !range [[WI_RANGE]] 325*67e74705SXin Li// CHECK: tail call i32 @llvm.amdgcn.workitem.id.z(), !range [[WI_RANGE]] 326*67e74705SXin Livoid test_get_local_id(int d, global int *out) 327*67e74705SXin Li{ 328*67e74705SXin Li switch (d) { 329*67e74705SXin Li case 0: *out = __builtin_amdgcn_workitem_id_x(); break; 330*67e74705SXin Li case 1: *out = __builtin_amdgcn_workitem_id_y(); break; 331*67e74705SXin Li case 2: *out = __builtin_amdgcn_workitem_id_z(); break; 332*67e74705SXin Li default: *out = 0; 333*67e74705SXin Li } 334*67e74705SXin Li} 335*67e74705SXin Li 336*67e74705SXin Li// CHECK-DAG: [[WI_RANGE]] = !{i32 0, i32 1024} 337*67e74705SXin Li// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } 338*67e74705SXin Li// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } 339*67e74705SXin Li// CHECK-DAG: ![[EXEC]] = !{!"exec"} 340