xref: /aosp_15_r20/external/llvm/test/CodeGen/AMDGPU/large-work-group-registers.ll (revision 9880d6810fe72a1726cb53787c6711e909410d58)
1*9880d681SAndroid Build Coastguard Worker; RUN: llc -march=amdgcn -mcpu=tonga -post-RA-scheduler=0 < %s | FileCheck %s
2*9880d681SAndroid Build Coastguard Worker
3*9880d681SAndroid Build Coastguard Worker; CHECK: NumVgprs: 64
4*9880d681SAndroid Build Coastguard Workerdefine void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, <3 x i32> inreg, <3 x i32> inreg, <3 x i32>) #0 {
5*9880d681SAndroid Build Coastguard Workermain_body:
6*9880d681SAndroid Build Coastguard Worker  %8 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 8
7*9880d681SAndroid Build Coastguard Worker  %9 = load <4 x i32>, <4 x i32> addrspace(2)* %8, align 16, !tbaa !0
8*9880d681SAndroid Build Coastguard Worker  %10 = extractelement <3 x i32> %7, i32 0
9*9880d681SAndroid Build Coastguard Worker  %11 = extractelement <3 x i32> %7, i32 1
10*9880d681SAndroid Build Coastguard Worker  %12 = mul i32 %10, %11
11*9880d681SAndroid Build Coastguard Worker  %bc = bitcast <3 x i32> %7 to <3 x float>
12*9880d681SAndroid Build Coastguard Worker  %13 = extractelement <3 x float> %bc, i32 1
13*9880d681SAndroid Build Coastguard Worker  %14 = insertelement <512 x float> undef, float %13, i32 %12
14*9880d681SAndroid Build Coastguard Worker  call void @llvm.amdgcn.s.barrier()
15*9880d681SAndroid Build Coastguard Worker  %15 = extractelement <3 x i32> %6, i32 0
16*9880d681SAndroid Build Coastguard Worker  %16 = extractelement <3 x i32> %7, i32 0
17*9880d681SAndroid Build Coastguard Worker  %17 = shl i32 %15, 5
18*9880d681SAndroid Build Coastguard Worker  %18 = add i32 %17, %16
19*9880d681SAndroid Build Coastguard Worker  %19 = shl i32 %18, 4
20*9880d681SAndroid Build Coastguard Worker  %20 = extractelement <3 x i32> %7, i32 1
21*9880d681SAndroid Build Coastguard Worker  %21 = shl i32 %20, 2
22*9880d681SAndroid Build Coastguard Worker  %22 = sext i32 %21 to i64
23*9880d681SAndroid Build Coastguard Worker  %23 = getelementptr i8, i8 addrspace(3)* null, i64 %22
24*9880d681SAndroid Build Coastguard Worker  %24 = bitcast i8 addrspace(3)* %23 to i32 addrspace(3)*
25*9880d681SAndroid Build Coastguard Worker  %25 = load i32, i32 addrspace(3)* %24, align 4
26*9880d681SAndroid Build Coastguard Worker  %26 = extractelement <512 x float> %14, i32 %25
27*9880d681SAndroid Build Coastguard Worker  %27 = insertelement <4 x float> undef, float %26, i32 0
28*9880d681SAndroid Build Coastguard Worker  call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %27, <4 x i32> %9, i32 0, i32 %19, i1 false, i1 false)
29*9880d681SAndroid Build Coastguard Worker  ret void
30*9880d681SAndroid Build Coastguard Worker}
31*9880d681SAndroid Build Coastguard Worker
32*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.amdgcn.s.barrier() #1
33*9880d681SAndroid Build Coastguard Worker
34*9880d681SAndroid Build Coastguard Workerdeclare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
35*9880d681SAndroid Build Coastguard Worker
36*9880d681SAndroid Build Coastguard Workerattributes #0 = { "amdgpu-max-work-group-size"="1024" }
37*9880d681SAndroid Build Coastguard Workerattributes #1 = { convergent nounwind }
38*9880d681SAndroid Build Coastguard Workerattributes #2 = { nounwind }
39*9880d681SAndroid Build Coastguard Worker
40*9880d681SAndroid Build Coastguard Worker!0 = !{!1, !1, i64 0, i32 1}
41*9880d681SAndroid Build Coastguard Worker!1 = !{!"const", null}
42