xref: /aosp_15_r20/external/clang/test/CodeGen/builtins-nvptx.c (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li // REQUIRES: nvptx-registered-target
2*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \
3*67e74705SXin Li // RUN:   FileCheck -check-prefix=CHECK -check-prefix=LP32 %s
4*67e74705SXin Li // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device -S -emit-llvm -o - -x cuda %s | \
5*67e74705SXin Li // RUN:   FileCheck -check-prefix=CHECK -check-prefix=LP64 %s
6*67e74705SXin Li 
7*67e74705SXin Li #define __device__ __attribute__((device))
8*67e74705SXin Li #define __global__ __attribute__((global))
9*67e74705SXin Li #define __shared__ __attribute__((shared))
10*67e74705SXin Li #define __constant__ __attribute__((constant))
11*67e74705SXin Li 
read_tid()12*67e74705SXin Li __device__ int read_tid() {
13*67e74705SXin Li 
14*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
15*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
16*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.z()
17*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.tid.w()
18*67e74705SXin Li 
19*67e74705SXin Li   int x = __nvvm_read_ptx_sreg_tid_x();
20*67e74705SXin Li   int y = __nvvm_read_ptx_sreg_tid_y();
21*67e74705SXin Li   int z = __nvvm_read_ptx_sreg_tid_z();
22*67e74705SXin Li   int w = __nvvm_read_ptx_sreg_tid_w();
23*67e74705SXin Li 
24*67e74705SXin Li   return x + y + z + w;
25*67e74705SXin Li 
26*67e74705SXin Li }
27*67e74705SXin Li 
read_ntid()28*67e74705SXin Li __device__ int read_ntid() {
29*67e74705SXin Li 
30*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
31*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
32*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
33*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ntid.w()
34*67e74705SXin Li 
35*67e74705SXin Li   int x = __nvvm_read_ptx_sreg_ntid_x();
36*67e74705SXin Li   int y = __nvvm_read_ptx_sreg_ntid_y();
37*67e74705SXin Li   int z = __nvvm_read_ptx_sreg_ntid_z();
38*67e74705SXin Li   int w = __nvvm_read_ptx_sreg_ntid_w();
39*67e74705SXin Li 
40*67e74705SXin Li   return x + y + z + w;
41*67e74705SXin Li 
42*67e74705SXin Li }
43*67e74705SXin Li 
read_ctaid()44*67e74705SXin Li __device__ int read_ctaid() {
45*67e74705SXin Li 
46*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
47*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
48*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
49*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.w()
50*67e74705SXin Li 
51*67e74705SXin Li   int x = __nvvm_read_ptx_sreg_ctaid_x();
52*67e74705SXin Li   int y = __nvvm_read_ptx_sreg_ctaid_y();
53*67e74705SXin Li   int z = __nvvm_read_ptx_sreg_ctaid_z();
54*67e74705SXin Li   int w = __nvvm_read_ptx_sreg_ctaid_w();
55*67e74705SXin Li 
56*67e74705SXin Li   return x + y + z + w;
57*67e74705SXin Li 
58*67e74705SXin Li }
59*67e74705SXin Li 
read_nctaid()60*67e74705SXin Li __device__ int read_nctaid() {
61*67e74705SXin Li 
62*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
63*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
64*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
65*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nctaid.w()
66*67e74705SXin Li 
67*67e74705SXin Li   int x = __nvvm_read_ptx_sreg_nctaid_x();
68*67e74705SXin Li   int y = __nvvm_read_ptx_sreg_nctaid_y();
69*67e74705SXin Li   int z = __nvvm_read_ptx_sreg_nctaid_z();
70*67e74705SXin Li   int w = __nvvm_read_ptx_sreg_nctaid_w();
71*67e74705SXin Li 
72*67e74705SXin Li   return x + y + z + w;
73*67e74705SXin Li 
74*67e74705SXin Li }
75*67e74705SXin Li 
read_ids()76*67e74705SXin Li __device__ int read_ids() {
77*67e74705SXin Li 
78*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.laneid()
79*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid()
80*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid()
81*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid()
82*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid()
83*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid()
84*67e74705SXin Li 
85*67e74705SXin Li   int a = __nvvm_read_ptx_sreg_laneid();
86*67e74705SXin Li   int b = __nvvm_read_ptx_sreg_warpid();
87*67e74705SXin Li   int c = __nvvm_read_ptx_sreg_nwarpid();
88*67e74705SXin Li   int d = __nvvm_read_ptx_sreg_smid();
89*67e74705SXin Li   int e = __nvvm_read_ptx_sreg_nsmid();
90*67e74705SXin Li   int f = __nvvm_read_ptx_sreg_gridid();
91*67e74705SXin Li 
92*67e74705SXin Li   return a + b + c + d + e + f;
93*67e74705SXin Li 
94*67e74705SXin Li }
95*67e74705SXin Li 
read_lanemasks()96*67e74705SXin Li __device__ int read_lanemasks() {
97*67e74705SXin Li 
98*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq()
99*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le()
100*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt()
101*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge()
102*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt()
103*67e74705SXin Li 
104*67e74705SXin Li   int a = __nvvm_read_ptx_sreg_lanemask_eq();
105*67e74705SXin Li   int b = __nvvm_read_ptx_sreg_lanemask_le();
106*67e74705SXin Li   int c = __nvvm_read_ptx_sreg_lanemask_lt();
107*67e74705SXin Li   int d = __nvvm_read_ptx_sreg_lanemask_ge();
108*67e74705SXin Li   int e = __nvvm_read_ptx_sreg_lanemask_gt();
109*67e74705SXin Li 
110*67e74705SXin Li   return a + b + c + d + e;
111*67e74705SXin Li 
112*67e74705SXin Li }
113*67e74705SXin Li 
read_clocks()114*67e74705SXin Li __device__ long long read_clocks() {
115*67e74705SXin Li 
116*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
117*67e74705SXin Li // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
118*67e74705SXin Li 
119*67e74705SXin Li   int a = __nvvm_read_ptx_sreg_clock();
120*67e74705SXin Li   long long b = __nvvm_read_ptx_sreg_clock64();
121*67e74705SXin Li 
122*67e74705SXin Li   return a + b;
123*67e74705SXin Li }
124*67e74705SXin Li 
read_pms()125*67e74705SXin Li __device__ int read_pms() {
126*67e74705SXin Li 
127*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm0()
128*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm1()
129*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm2()
130*67e74705SXin Li // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.pm3()
131*67e74705SXin Li 
132*67e74705SXin Li   int a = __nvvm_read_ptx_sreg_pm0();
133*67e74705SXin Li   int b = __nvvm_read_ptx_sreg_pm1();
134*67e74705SXin Li   int c = __nvvm_read_ptx_sreg_pm2();
135*67e74705SXin Li   int d = __nvvm_read_ptx_sreg_pm3();
136*67e74705SXin Li 
137*67e74705SXin Li   return a + b + c + d;
138*67e74705SXin Li 
139*67e74705SXin Li }
140*67e74705SXin Li 
sync()141*67e74705SXin Li __device__ void sync() {
142*67e74705SXin Li 
143*67e74705SXin Li // CHECK: call void @llvm.nvvm.bar.sync(i32 0)
144*67e74705SXin Li 
145*67e74705SXin Li   __nvvm_bar_sync(0);
146*67e74705SXin Li 
147*67e74705SXin Li }
148*67e74705SXin Li 
149*67e74705SXin Li 
150*67e74705SXin Li // NVVM intrinsics
151*67e74705SXin Li 
152*67e74705SXin Li // The idea is not to test all intrinsics, just that Clang is recognizing the
153*67e74705SXin Li // builtins defined in BuiltinsNVPTX.def
nvvm_math(float f1,float f2,double d1,double d2)154*67e74705SXin Li __device__ void nvvm_math(float f1, float f2, double d1, double d2) {
155*67e74705SXin Li // CHECK: call float @llvm.nvvm.fmax.f
156*67e74705SXin Li   float t1 = __nvvm_fmax_f(f1, f2);
157*67e74705SXin Li // CHECK: call float @llvm.nvvm.fmin.f
158*67e74705SXin Li   float t2 = __nvvm_fmin_f(f1, f2);
159*67e74705SXin Li // CHECK: call float @llvm.nvvm.sqrt.rn.f
160*67e74705SXin Li   float t3 = __nvvm_sqrt_rn_f(f1);
161*67e74705SXin Li // CHECK: call float @llvm.nvvm.rcp.rn.f
162*67e74705SXin Li   float t4 = __nvvm_rcp_rn_f(f2);
163*67e74705SXin Li // CHECK: call float @llvm.nvvm.add.rn.f
164*67e74705SXin Li   float t5 = __nvvm_add_rn_f(f1, f2);
165*67e74705SXin Li 
166*67e74705SXin Li // CHECK: call double @llvm.nvvm.fmax.d
167*67e74705SXin Li   double td1 = __nvvm_fmax_d(d1, d2);
168*67e74705SXin Li // CHECK: call double @llvm.nvvm.fmin.d
169*67e74705SXin Li   double td2 = __nvvm_fmin_d(d1, d2);
170*67e74705SXin Li // CHECK: call double @llvm.nvvm.sqrt.rn.d
171*67e74705SXin Li   double td3 = __nvvm_sqrt_rn_d(d1);
172*67e74705SXin Li // CHECK: call double @llvm.nvvm.rcp.rn.d
173*67e74705SXin Li   double td4 = __nvvm_rcp_rn_d(d2);
174*67e74705SXin Li 
175*67e74705SXin Li // CHECK: call void @llvm.nvvm.membar.cta()
176*67e74705SXin Li   __nvvm_membar_cta();
177*67e74705SXin Li // CHECK: call void @llvm.nvvm.membar.gl()
178*67e74705SXin Li   __nvvm_membar_gl();
179*67e74705SXin Li // CHECK: call void @llvm.nvvm.membar.sys()
180*67e74705SXin Li   __nvvm_membar_sys();
181*67e74705SXin Li // CHECK: call void @llvm.nvvm.barrier0()
182*67e74705SXin Li   __syncthreads();
183*67e74705SXin Li }
184*67e74705SXin Li 
185*67e74705SXin Li __device__ int di;
186*67e74705SXin Li __shared__ int si;
187*67e74705SXin Li __device__ long dl;
188*67e74705SXin Li __shared__ long sl;
189*67e74705SXin Li __device__ long long dll;
190*67e74705SXin Li __shared__ long long sll;
191*67e74705SXin Li 
192*67e74705SXin Li // Check for atomic intrinsics
193*67e74705SXin Li // CHECK-LABEL: nvvm_atom
nvvm_atom(float * fp,float f,int * ip,int i,unsigned int * uip,unsigned ui,long * lp,long l,long long * llp,long long ll)194*67e74705SXin Li __device__ void nvvm_atom(float *fp, float f, int *ip, int i, unsigned int *uip, unsigned ui, long *lp, long l,
195*67e74705SXin Li                           long long *llp, long long ll) {
196*67e74705SXin Li   // CHECK: atomicrmw add
197*67e74705SXin Li   __nvvm_atom_add_gen_i(ip, i);
198*67e74705SXin Li   // CHECK: atomicrmw add
199*67e74705SXin Li   __nvvm_atom_add_gen_l(&dl, l);
200*67e74705SXin Li   // CHECK: atomicrmw add
201*67e74705SXin Li   __nvvm_atom_add_gen_ll(&sll, ll);
202*67e74705SXin Li 
203*67e74705SXin Li   // CHECK: atomicrmw sub
204*67e74705SXin Li   __nvvm_atom_sub_gen_i(ip, i);
205*67e74705SXin Li   // CHECK: atomicrmw sub
206*67e74705SXin Li   __nvvm_atom_sub_gen_l(&dl, l);
207*67e74705SXin Li   // CHECK: atomicrmw sub
208*67e74705SXin Li   __nvvm_atom_sub_gen_ll(&sll, ll);
209*67e74705SXin Li 
210*67e74705SXin Li   // CHECK: atomicrmw and
211*67e74705SXin Li   __nvvm_atom_and_gen_i(ip, i);
212*67e74705SXin Li   // CHECK: atomicrmw and
213*67e74705SXin Li   __nvvm_atom_and_gen_l(&dl, l);
214*67e74705SXin Li   // CHECK: atomicrmw and
215*67e74705SXin Li   __nvvm_atom_and_gen_ll(&sll, ll);
216*67e74705SXin Li 
217*67e74705SXin Li   // CHECK: atomicrmw or
218*67e74705SXin Li   __nvvm_atom_or_gen_i(ip, i);
219*67e74705SXin Li   // CHECK: atomicrmw or
220*67e74705SXin Li   __nvvm_atom_or_gen_l(&dl, l);
221*67e74705SXin Li   // CHECK: atomicrmw or
222*67e74705SXin Li   __nvvm_atom_or_gen_ll(&sll, ll);
223*67e74705SXin Li 
224*67e74705SXin Li   // CHECK: atomicrmw xor
225*67e74705SXin Li   __nvvm_atom_xor_gen_i(ip, i);
226*67e74705SXin Li   // CHECK: atomicrmw xor
227*67e74705SXin Li   __nvvm_atom_xor_gen_l(&dl, l);
228*67e74705SXin Li   // CHECK: atomicrmw xor
229*67e74705SXin Li   __nvvm_atom_xor_gen_ll(&sll, ll);
230*67e74705SXin Li 
231*67e74705SXin Li   // CHECK: atomicrmw xchg
232*67e74705SXin Li   __nvvm_atom_xchg_gen_i(ip, i);
233*67e74705SXin Li   // CHECK: atomicrmw xchg
234*67e74705SXin Li   __nvvm_atom_xchg_gen_l(&dl, l);
235*67e74705SXin Li   // CHECK: atomicrmw xchg
236*67e74705SXin Li   __nvvm_atom_xchg_gen_ll(&sll, ll);
237*67e74705SXin Li 
238*67e74705SXin Li   // CHECK: atomicrmw max i32*
239*67e74705SXin Li   __nvvm_atom_max_gen_i(ip, i);
240*67e74705SXin Li   // CHECK: atomicrmw umax i32*
241*67e74705SXin Li   __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
242*67e74705SXin Li   // CHECK: atomicrmw max
243*67e74705SXin Li   __nvvm_atom_max_gen_l(&dl, l);
244*67e74705SXin Li   // CHECK: atomicrmw umax
245*67e74705SXin Li   __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
246*67e74705SXin Li   // CHECK: atomicrmw max i64*
247*67e74705SXin Li   __nvvm_atom_max_gen_ll(&sll, ll);
248*67e74705SXin Li   // CHECK: atomicrmw umax i64*
249*67e74705SXin Li   __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
250*67e74705SXin Li 
251*67e74705SXin Li   // CHECK: atomicrmw min i32*
252*67e74705SXin Li   __nvvm_atom_min_gen_i(ip, i);
253*67e74705SXin Li   // CHECK: atomicrmw umin i32*
254*67e74705SXin Li   __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
255*67e74705SXin Li   // CHECK: atomicrmw min
256*67e74705SXin Li   __nvvm_atom_min_gen_l(&dl, l);
257*67e74705SXin Li   // CHECK: atomicrmw umin
258*67e74705SXin Li   __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
259*67e74705SXin Li   // CHECK: atomicrmw min i64*
260*67e74705SXin Li   __nvvm_atom_min_gen_ll(&sll, ll);
261*67e74705SXin Li   // CHECK: atomicrmw umin i64*
262*67e74705SXin Li   __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
263*67e74705SXin Li 
264*67e74705SXin Li   // CHECK: cmpxchg
265*67e74705SXin Li   // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
266*67e74705SXin Li   __nvvm_atom_cas_gen_i(ip, 0, i);
267*67e74705SXin Li   // CHECK: cmpxchg
268*67e74705SXin Li   // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0
269*67e74705SXin Li   __nvvm_atom_cas_gen_l(&dl, 0, l);
270*67e74705SXin Li   // CHECK: cmpxchg
271*67e74705SXin Li   // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
272*67e74705SXin Li   __nvvm_atom_cas_gen_ll(&sll, 0, ll);
273*67e74705SXin Li 
274*67e74705SXin Li   // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
275*67e74705SXin Li   __nvvm_atom_add_gen_f(fp, f);
276*67e74705SXin Li 
277*67e74705SXin Li   // CHECK: call i32 @llvm.nvvm.atomic.load.inc.32.p0i32
278*67e74705SXin Li   __nvvm_atom_inc_gen_ui(uip, ui);
279*67e74705SXin Li 
280*67e74705SXin Li   // CHECK: call i32 @llvm.nvvm.atomic.load.dec.32.p0i32
281*67e74705SXin Li   __nvvm_atom_dec_gen_ui(uip, ui);
282*67e74705SXin Li 
283*67e74705SXin Li   // CHECK: ret
284*67e74705SXin Li }
285*67e74705SXin Li 
286*67e74705SXin Li // CHECK-LABEL: nvvm_ldg
nvvm_ldg(const void * p)287*67e74705SXin Li __device__ void nvvm_ldg(const void *p) {
288*67e74705SXin Li   // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
289*67e74705SXin Li   // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0i8(i8* {{%[0-9]+}}, i32 1)
290*67e74705SXin Li   __nvvm_ldg_c((const char *)p);
291*67e74705SXin Li   __nvvm_ldg_uc((const unsigned char *)p);
292*67e74705SXin Li 
293*67e74705SXin Li   // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
294*67e74705SXin Li   // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0i16(i16* {{%[0-9]+}}, i32 2)
295*67e74705SXin Li   __nvvm_ldg_s((const short *)p);
296*67e74705SXin Li   __nvvm_ldg_us((const unsigned short *)p);
297*67e74705SXin Li 
298*67e74705SXin Li   // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
299*67e74705SXin Li   // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
300*67e74705SXin Li   __nvvm_ldg_i((const int *)p);
301*67e74705SXin Li   __nvvm_ldg_ui((const unsigned int *)p);
302*67e74705SXin Li 
303*67e74705SXin Li   // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
304*67e74705SXin Li   // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0i32(i32* {{%[0-9]+}}, i32 4)
305*67e74705SXin Li   // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
306*67e74705SXin Li   // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0i64(i64* {{%[0-9]+}}, i32 8)
307*67e74705SXin Li   __nvvm_ldg_l((const long *)p);
308*67e74705SXin Li   __nvvm_ldg_ul((const unsigned long *)p);
309*67e74705SXin Li 
310*67e74705SXin Li   // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0f32(float* {{%[0-9]+}}, i32 4)
311*67e74705SXin Li   __nvvm_ldg_f((const float *)p);
312*67e74705SXin Li   // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0f64(double* {{%[0-9]+}}, i32 8)
313*67e74705SXin Li   __nvvm_ldg_d((const double *)p);
314*67e74705SXin Li 
315*67e74705SXin Li   // In practice, the pointers we pass to __ldg will be aligned as appropriate
316*67e74705SXin Li   // for the CUDA <type>N vector types (e.g. short4), which are not the same as
317*67e74705SXin Li   // the LLVM vector types.  However, each LLVM vector type has an alignment
318*67e74705SXin Li   // less than or equal to its corresponding CUDA type, so we're OK.
319*67e74705SXin Li   //
320*67e74705SXin Li   // PTX Interoperability section 2.2: "For a vector with an even number of
321*67e74705SXin Li   // elements, its alignment is set to number of elements times the alignment of
322*67e74705SXin Li   // its member: n*alignof(t)."
323*67e74705SXin Li 
324*67e74705SXin Li   // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
325*67e74705SXin Li   // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0v2i8(<2 x i8>* {{%[0-9]+}}, i32 2)
326*67e74705SXin Li   typedef char char2 __attribute__((ext_vector_type(2)));
327*67e74705SXin Li   typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
328*67e74705SXin Li   __nvvm_ldg_c2((const char2 *)p);
329*67e74705SXin Li   __nvvm_ldg_uc2((const uchar2 *)p);
330*67e74705SXin Li 
331*67e74705SXin Li   // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
332*67e74705SXin Li   // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0v4i8(<4 x i8>* {{%[0-9]+}}, i32 4)
333*67e74705SXin Li   typedef char char4 __attribute__((ext_vector_type(4)));
334*67e74705SXin Li   typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
335*67e74705SXin Li   __nvvm_ldg_c4((const char4 *)p);
336*67e74705SXin Li   __nvvm_ldg_uc4((const uchar4 *)p);
337*67e74705SXin Li 
338*67e74705SXin Li   // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
339*67e74705SXin Li   // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0v2i16(<2 x i16>* {{%[0-9]+}}, i32 4)
340*67e74705SXin Li   typedef short short2 __attribute__((ext_vector_type(2)));
341*67e74705SXin Li   typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
342*67e74705SXin Li   __nvvm_ldg_s2((const short2 *)p);
343*67e74705SXin Li   __nvvm_ldg_us2((const ushort2 *)p);
344*67e74705SXin Li 
345*67e74705SXin Li   // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
346*67e74705SXin Li   // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0v4i16(<4 x i16>* {{%[0-9]+}}, i32 8)
347*67e74705SXin Li   typedef short short4 __attribute__((ext_vector_type(4)));
348*67e74705SXin Li   typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
349*67e74705SXin Li   __nvvm_ldg_s4((const short4 *)p);
350*67e74705SXin Li   __nvvm_ldg_us4((const ushort4 *)p);
351*67e74705SXin Li 
352*67e74705SXin Li   // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
353*67e74705SXin Li   // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0v2i32(<2 x i32>* {{%[0-9]+}}, i32 8)
354*67e74705SXin Li   typedef int int2 __attribute__((ext_vector_type(2)));
355*67e74705SXin Li   typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
356*67e74705SXin Li   __nvvm_ldg_i2((const int2 *)p);
357*67e74705SXin Li   __nvvm_ldg_ui2((const uint2 *)p);
358*67e74705SXin Li 
359*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
360*67e74705SXin Li   // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0v4i32(<4 x i32>* {{%[0-9]+}}, i32 16)
361*67e74705SXin Li   typedef int int4 __attribute__((ext_vector_type(4)));
362*67e74705SXin Li   typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
363*67e74705SXin Li   __nvvm_ldg_i4((const int4 *)p);
364*67e74705SXin Li   __nvvm_ldg_ui4((const uint4 *)p);
365*67e74705SXin Li 
366*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
367*67e74705SXin Li   // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0v2i64(<2 x i64>* {{%[0-9]+}}, i32 16)
368*67e74705SXin Li   typedef long long longlong2 __attribute__((ext_vector_type(2)));
369*67e74705SXin Li   typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2)));
370*67e74705SXin Li   __nvvm_ldg_ll2((const longlong2 *)p);
371*67e74705SXin Li   __nvvm_ldg_ull2((const ulonglong2 *)p);
372*67e74705SXin Li 
373*67e74705SXin Li   // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0v2f32(<2 x float>* {{%[0-9]+}}, i32 8)
374*67e74705SXin Li   typedef float float2 __attribute__((ext_vector_type(2)));
375*67e74705SXin Li   __nvvm_ldg_f2((const float2 *)p);
376*67e74705SXin Li 
377*67e74705SXin Li   // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0v4f32(<4 x float>* {{%[0-9]+}}, i32 16)
378*67e74705SXin Li   typedef float float4 __attribute__((ext_vector_type(4)));
379*67e74705SXin Li   __nvvm_ldg_f4((const float4 *)p);
380*67e74705SXin Li 
381*67e74705SXin Li   // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0v2f64(<2 x double>* {{%[0-9]+}}, i32 16)
382*67e74705SXin Li   typedef double double2 __attribute__((ext_vector_type(2)));
383*67e74705SXin Li   __nvvm_ldg_d2((const double2 *)p);
384*67e74705SXin Li }
385